PDF saves with only one page/image out of hundreds of images but the size is equal to all the pages/images

Question

PDF saves with only one page/image out of hundreds of images but the size is equal to all the pages/images

66 views Asked by Rafique Suchwani At 19 February 2024 at 08:39

The code block sets up the PDF with only one random image but with the size of all the images, it should add all the images in the PDF while setting up the page and height with margins.

Hi,

I'm lost at what I'm doing wrong here, I want to take all the jpg files and put them in the PDF while setting up the PDF page dimension per the image dimensions and adding about 20px as margin on all sides. What it does is it takes the first image dimensions alright, sets up the PDF page width and height in accordance but doesn't add the margins, and while iterating through all the images it only puts one random image in the PDF but sets up the size of the PDF equals to all the images' size.

Here is the code block:

import os
import logging
from PIL import Image
from reportlab.lib.pagesizes import letter, portrait
from reportlab.pdfgen import canvas

# Setup logging to a file
logging.basicConfig(filename="pdf_script_log.txt", level=logging.DEBUG, format="%(asctime)s %(levelname)s %(message)s")

def convert_images_to_jpg(source_dir, temp_dir, quality, scaling_factor, user_prefers_sharpness):
    """
    Converts images in the source directory to JPG format with specified parameters.

    Args:
        source_dir (str): Absolute path to the directory containing image files.
        temp_dir (str): Absolute path to the temporary directory for storing converted images.
        quality (int): Quality parameter for image compression (0-100).
        scaling_factor (float): Scaling factor for resizing images (0.0-1.0).
        user_prefers_sharpness (bool): Whether sharper downscaling is preferred (True) or not (False).

    Returns:
        list: A list of paths to the converted JPG images.
    """
    # Check if there are already converted images in the temporary directory
    existing_jpg_files = [f for f in os.listdir(temp_dir) if f.endswith('.jpg')]
    if existing_jpg_files:
        logging.info("Found existing converted images in the temporary directory. Skipping conversion.")
        return [os.path.join(temp_dir, f) for f in existing_jpg_files]

    jpg_images = []
    for filename in os.listdir(source_dir):
        if os.path.isfile(os.path.join(source_dir, filename)):
            try:
                img = Image.open(os.path.join(source_dir, filename))
                # Convert to RGB for better compression
                img = img.convert("RGB")

                # Apply scaling factor
                new_width = int(img.width * scaling_factor)
                new_height = int(img.height * scaling_factor)
                img.thumbnail((new_width, new_height), Image.Resampling.LANCZOS if user_prefers_sharpness else Image.Resampling.BICUBIC)

                # Save as JPG with specified quality
                jpg_filename = os.path.join(temp_dir, os.path.splitext(filename)[0] + ".jpg")
                img.save(jpg_filename, quality=quality)
                jpg_images.append(jpg_filename)
            except Exception as e:
                logging.error(f"Error converting image '{filename}' to JPG format: {e}")
    return jpg_images

def create_pdf(output_dir, desired_name, title, author, keywords, single_page=False, margin=20):
    """
    Creates a PDF document from a series of JPG image files.

    Args:
        output_dir (str): Output directory for the created PDF.
        desired_name (str): Desired name for the PDF (without extension).
        title (str): Title of the PDF.
        author (str): Author of the PDF.
        keywords (list): Keywords associated with the PDF.
        single_page (bool, optional): Whether to have each image on a single page. Defaults to False.
        margin (int, optional): Margin size in pixels for each image on the page. Defaults to 20.
    """

    logging.info("Starting PDF creation...")

    jpg_files = [f for f in os.listdir(temp_dir) if f.endswith('.jpg')]
    if not jpg_files:
        logging.error("No JPG image files found for PDF creation.")
        return

    jpg_files.sort(key=lambda x: int(''.join(filter(str.isdigit, os.path.splitext(x)[0]))))

    if single_page:
        # Use dimensions of the first image
        first_image_path = os.path.join(temp_dir, jpg_files[0])
        first_image = Image.open(first_image_path)
        page_width = first_image.width - 2 * margin
        page_height = first_image.height - 2 * margin

        # Use `portrait` to get the base page size (without margin adjustment)
        page_size = portrait((page_width, page_height))

    else:
        # Use the predefined 'letter' page size
        page_size = portrait(letter)

    try:
        pdf_path = os.path.join(output_dir, desired_name + ".pdf")
        c = canvas.Canvas(pdf_path, pagesize=page_size)

        # Add Metadata to the PDF
        c.setTitle(title)
        c.setAuthor(author)
        c.setKeywords(keywords)

        # Iterate through all JPG images and draw them
        for i, jpg_file in enumerate(jpg_files):
            if jpg_file.endswith(".jpg"):
                image_path = os.path.join(temp_dir, jpg_file)
                try:
                    img = Image.open(image_path)

                    # Calculate image position with margin
                    if single_page:
                        x = margin
                        y = margin
                    else:
                        max_width = page_size[0] - 2 * margin
                        max_height = page_size[1] - 2 * margin
                        x = margin + (i % columns) * (max_width + margin)  # Adjust for multi-column layout
                        y = margin + (i // columns) * (max_height + margin)  # Adjust for multi-column layout

                    # Draw the image
                    c.drawImage(image_path, x, y, width=img.width, height=img.height)

                    if not single_page:
                        c.showPage()  # Add page break for multi-page PDFs

                except IOError as e:
                    logging.error(f"Error reading image: {image_path} - {e}")

        # Save the PDF and log success message
        c.save()
        logging.info(f"PDF created successfully at: {pdf_path}")

    except Exception as e:
        logging.error(f"Error during PDF creation: {e}")

    # Clean up temporary directory (optional, consider manual review)
    for file in os.listdir(temp_dir):
        file_path = os.path.join(temp_dir, file)
        os.remove(file_path)
    logging.info("Temporary files cleaned up.")


# Main execution
try:
    # Define variables for customization
    source_dir = ""
    output_dir = ""
    desired_name = ""
    title = ""
    author = ""
    keywords = ["KW1", "KW2", "KW3"]
    single_page = True
    quality = 60
    scaling_factor = 0.7
    user_prefers_sharpness = True

    # Temporary directory for storing converted images
    temp_dir = os.path.join(output_dir, 'temp')

    # Create temporary directory if it doesn't exist
    os.makedirs(temp_dir, exist_ok=True)

    # Convert images to JPG format
    jpg_images = convert_images_to_jpg(source_dir, temp_dir, quality, scaling_factor, user_prefers_sharpness)

    # Create PDF from converted images
    create_pdf(output_dir, desired_name, title, author, keywords, single_page)

    # Close logging and exit
    logging.shutdown()
    logging.info("Script execution completed.")

except Exception as e:
    logging.error(f"An error occurred during script execution: {e}")
    logging.shutdown()

Can someone please point me out what I'm doing wrong here?

The Modules/Libraries being used are Pillow (PIL), Reportlab (pagesize, pdfgen), Logging.

EDIT01: A complete workable script is now included for reference.

EDIT02: @KJ's suggestion somewhat worked for me and I got the script to work, now I can have all the images as separate pages in the PDF with the margins.

Original Q&A

TechQA.

PDF saves with only one page/image out of hundreds of images but the size is equal to all the pages/images

There are 0 answers

Related Questions in PYTHON

Related Questions in PDF

Related Questions in PYTHON-IMAGING-LIBRARY

Related Questions in PDF-GENERATION

Related Questions in REPORTLAB

Popular Questions

Trending Questions