So I have a task where for a pdf, I need to go through its page, and create an area at the top or bottom which is empty. Essentially for each page, I'm retrieving the image, creating a new pillow image to reduce its height, remove the old image and add the new one.
However I'm noticing that my image quality has deteriorated during this process.
Any ideas what could be the issue? (This is the original code, the updated code is further below)
import fitz # PyMuPDF
from PIL import Image
from io import BytesIO
def flatten_images_in_pdf(byte_array, placementType, verticalPosition, text_height):
# Open the PDF from the byte array
pdf_document = fitz.open(stream=byte_array, filetype="pdf")
# loop through pages
for page_number in range(len(pdf_document)):
# Get the page
page = pdf_document[page_number]
page_pixmap = page.get_pixmap()
# Determine the color mode based on the number of components
if page_pixmap.n == 1:
color_mode = 'L' # Grayscale or black and white
elif page_pixmap.n == 3:
color_mode = 'RGB' # Color
else:
color_mode = 'CMYK' # CMYK or other color modes
page_pil_image = Image.frombytes(color_mode, [int(page_pixmap.width), int(page_pixmap.height)], page.get_pixmap().samples)
# Get the dimensions (width and height) of the image in pixels
width_pixels, height_pixels = page_pil_image.size
# Get the dimensions of the media box in points
media_box = page.mediabox
width_points = media_box[2]
height_points = media_box[3]
# Calculate the DPI
dpi_x = width_pixels / (width_points / 72) # 72 points = 1 inch
dpi_y = height_pixels / (height_points / 72)
page_edge_offset = 0.5
if (placementType.lower() == "margin"):
# The margin will be the page edge offset and the height of the stamp
margin_height = (page_edge_offset * dpi_y) + text_height;
new_height = int(page_pixmap.height - margin_height);
# Check for invalid new height
if (new_height<= 0):
raise Exception ("New height for page is less than 0")
# Create a new blank image with the adjusted height
new_img = Image.new(color_mode, (page_pixmap.width, page_pixmap.height), white)
# Determine the position to paste the old image onto the new one
if verticalPosition == "top":
# if the text position is top, the image needs to start from the bottom
position = (0, 0)
else:
# if the text position is bottom, the image needs to start from the top
position = (0, new_height - height_pixels)
# Paste the old image onto the new one
new_img.paste(page_pil_image, position)
# Convert the modified Pillow image to a bytes-like object (e.g., PNG format)
image_bytes = BytesIO()
new_img.save(image_bytes, format="GIF",dpi=(dpi_x,dpi_y))
image_bytes.seek(0)
images_in_page = page.get_images()
for image in images_in_page:
image_xref = image[0] # the xref is the first property.
page.delete_image(image_xref)
page.insert_image(rect=page.rect, stream = image_bytes)
# Create an in-memory byte stream
output_stream = BytesIO()
# Save the modified PDF to the byte stream
pdf_document.save(output_stream)
pdf_document.close()
Updated code to remove determining the color scale manually by saving the pixmap to ppm and opening it up with pil image.open:
# Open the PDF from the byte array
pdf_document = fitz.open(stream=byte_array, filetype="pdf")
# loop through pages
for page_number in range(len(pdf_document)):
# Get the page
page = pdf_document[page_number]
bate_stamp = bate_stamps[page_number]
page_pixmap = page.get_pixmap()
page_pil_image = Image.open(BytesIO(page_pixmap.tobytes("ppm")))
# Get the dimensions of the media box in points
width_points = page.mediabox[2]
height_points = page.mediabox[3]
# Calculate the DPI
dpi_x = page_pil_image.size[0] / (width_points / 72) # 72 points = 1 inch
dpi_y = page_pil_image.size[1] / (height_points / 72)
page_edge_offset = 0.5
if (placementType.lower() == "margin"):
# The margin will be the page edge offset and the height of the stamp
margin_height = (page_edge_offset * dpi_y) + text_height;
new_height = int(page_pixmap.height - margin_height);
# Check for invalid new height
if (new_height<= 0):
raise Exception ("New height for page when setting the bate stam as margin is less than 0")
# Create a new blank image with the adjusted height
new_img = Image.new(page_pil_image.mode, (page_pixmap.width, page_pixmap.height), white)
# Determine the position to paste the old image onto the new one
if verticalPosition == "top":
# if the batestamp position is top, the image needs to start from the bottom
position = (0, 0)
else:
# if the batestamp position is bottom, the image needs to start from the top
position = (0, new_height - page_pil_image.size[1])
# Paste the old image onto the new one
new_img.paste(page_pil_image, position)
# Convert the modified Pillow image to a bytes-like object (e.g., PNG format)
image_bytes = BytesIO()
new_img.save(image_bytes, format="GIF",dpi=(dpi_x,dpi_y))
image_bytes.seek(0)
#https://pymupdf.readthedocs.io/en/latest/document.html#Document.get_page_images returns
images_in_page = page.get_images()
for image in images_in_page:
image_xref = image[0] # the xref is the first property.
page.delete_image(image_xref)
page.insert_image(rect=page.rect, stream = image_bytes)
# Create an in-memory byte stream
output_stream = BytesIO()
# Save the modified PDF to the byte stream
pdf_document.save(output_stream)
pdf_document.close()