PyMuPDF, inserting same image but quality is reduced

236 views Asked by At

So I have a task where for a pdf, I need to go through its page, and create an area at the top or bottom which is empty. Essentially for each page, I'm retrieving the image, creating a new pillow image to reduce its height, remove the old image and add the new one.

However I'm noticing that my image quality has deteriorated during this process.

Any ideas what could be the issue? (This is the original code, the updated code is further below)

import fitz  # PyMuPDF
from PIL import Image
from io import BytesIO

def flatten_images_in_pdf(byte_array, placementType, verticalPosition, text_height):
# Open the PDF from the byte array
pdf_document = fitz.open(stream=byte_array, filetype="pdf")

# loop through pages
for page_number in range(len(pdf_document)):
    # Get the page
    page = pdf_document[page_number]

    page_pixmap = page.get_pixmap()

    # Determine the color mode based on the number of components
    if page_pixmap.n == 1:
        color_mode = 'L'  # Grayscale or black and white
    elif page_pixmap.n == 3:
        color_mode = 'RGB'  # Color
    else:
        color_mode = 'CMYK'  # CMYK or other color modes

    page_pil_image = Image.frombytes(color_mode, [int(page_pixmap.width), int(page_pixmap.height)], page.get_pixmap().samples)

    # Get the dimensions (width and height) of the image in pixels
    width_pixels, height_pixels = page_pil_image.size

    # Get the dimensions of the media box in points
    media_box = page.mediabox
    width_points = media_box[2]
    height_points = media_box[3]

    # Calculate the DPI
    dpi_x = width_pixels / (width_points / 72)  # 72 points = 1 inch
    dpi_y = height_pixels / (height_points / 72)

    page_edge_offset = 0.5

    if (placementType.lower() == "margin"):
        # The margin will be the page edge offset and the height of the stamp
        margin_height = (page_edge_offset * dpi_y) + text_height;
        new_height = int(page_pixmap.height - margin_height);

        # Check for invalid new height
        if (new_height<= 0):
            raise Exception ("New height for page is less than 0")

        # Create a new blank image with the adjusted height
        new_img = Image.new(color_mode, (page_pixmap.width, page_pixmap.height), white)

        # Determine the position to paste the old image onto the new one
        if verticalPosition == "top":
            # if the text position is top, the image needs to start from the bottom
            position = (0, 0)
        else:
            # if the text position is bottom, the image needs to start from the top
            position = (0, new_height - height_pixels)

        # Paste the old image onto the new one
        new_img.paste(page_pil_image, position)

        # Convert the modified Pillow image to a bytes-like object (e.g., PNG format)
        image_bytes = BytesIO()
        new_img.save(image_bytes, format="GIF",dpi=(dpi_x,dpi_y))
        image_bytes.seek(0)

        images_in_page = page.get_images()

        for image in images_in_page:
            image_xref = image[0]  # the xref is the first property.
            page.delete_image(image_xref)

        page.insert_image(rect=page.rect, stream = image_bytes)

# Create an in-memory byte stream
output_stream = BytesIO()

# Save the modified PDF to the byte stream
pdf_document.save(output_stream)
pdf_document.close()

Updated code to remove determining the color scale manually by saving the pixmap to ppm and opening it up with pil image.open:

# Open the PDF from the byte array
pdf_document = fitz.open(stream=byte_array, filetype="pdf")

# loop through pages
for page_number in range(len(pdf_document)):
    # Get the page
    page = pdf_document[page_number]
    bate_stamp = bate_stamps[page_number]

    page_pixmap = page.get_pixmap()

    page_pil_image = Image.open(BytesIO(page_pixmap.tobytes("ppm")))

    # Get the dimensions of the media box in points
    width_points = page.mediabox[2]
    height_points = page.mediabox[3]

    # Calculate the DPI
    dpi_x = page_pil_image.size[0] / (width_points / 72)  # 72 points = 1 inch
    dpi_y = page_pil_image.size[1] / (height_points / 72)

    page_edge_offset = 0.5

    if (placementType.lower() == "margin"):
        # The margin will be the page edge offset and the height of the stamp
        margin_height = (page_edge_offset * dpi_y) + text_height;
        new_height = int(page_pixmap.height - margin_height);

        # Check for invalid new height
        if (new_height<= 0):
            raise Exception ("New height for page when setting the bate stam as margin is less than 0")
    
        # Create a new blank image with the adjusted height
        new_img = Image.new(page_pil_image.mode, (page_pixmap.width, page_pixmap.height), white)

        # Determine the position to paste the old image onto the new one
        if verticalPosition == "top":
            # if the batestamp position is top, the image needs to start from the bottom
            position = (0, 0)
        else:
            # if the batestamp position is bottom, the image needs to start from the top
            position = (0, new_height - page_pil_image.size[1])

        # Paste the old image onto the new one
        new_img.paste(page_pil_image, position)

        # Convert the modified Pillow image to a bytes-like object (e.g., PNG format)
        image_bytes = BytesIO()
        new_img.save(image_bytes, format="GIF",dpi=(dpi_x,dpi_y))
        image_bytes.seek(0)

        #https://pymupdf.readthedocs.io/en/latest/document.html#Document.get_page_images returns 
        images_in_page = page.get_images()

        for image in images_in_page:
            image_xref = image[0]  # the xref is the first property.
            page.delete_image(image_xref)

        page.insert_image(rect=page.rect, stream = image_bytes)
    

    
# Create an in-memory byte stream
output_stream = BytesIO()

# Save the modified PDF to the byte stream
pdf_document.save(output_stream)
pdf_document.close()
0

There are 0 answers