Some PDF attachments do not open when added using PyPDF4. How to fix this?

106 views Asked by At

I am using PyPDF4 to add file attachments to a base PDF. The final PDF file opens and has all attachment PDFs attached but some of them do not open when double-clicked.

Could you advise on why this happens and what's the workaround for the same? The code as below. All files 1.pdf to 9.pdf open through attachments but 11.pdf does not. The file however opens from the folder.

#----addAttachment function of PyPDF4 is modified to add instead of replacing---#
import os
import PyPDF4
from PyPDF4 import PdfFileReader, PdfFileWriter
from PyPDF4.generic import DecodedStreamObject, NameObject, DictionaryObject, createStringObject, ArrayObject

def appendAttachment(myPdfFileWriterObj, fname, fdata):
    #The entry for file
    file_entry = DecodedStreamObject()
    file_entry.setData(fdata)
    file_entry.update({NameObject("/Type") : NameObject("/EmbeddedFile")})

    #The Filespec entry
    efEntry = DictionaryObject()
    efEntry.update({NameObject("/F") : file_entry})

    filespec = DictionaryObject()
    filespec.update({NameObject("/Type") : NameObject("/Filespec"), NameObject("/F") : createStringObject(fname), NameObject("/EF") : efEntry})

    if "/Names" not in myPdfFileWriterObj._root_object.keys():
        # No files attached yet. Create the entry for the root, as it needs a reference to the Filespec
        embeddedFilesNamesDictionary = DictionaryObject()
        embeddedFilesNamesDictionary.update({NameObject("/Names"): ArrayObject([createStringObject(fname), filespec])})

        embeddedFilesDictionary = DictionaryObject()
        embeddedFilesDictionary.update({NameObject("/EmbeddedFiles"): embeddedFilesNamesDictionary})
        myPdfFileWriterObj._root_object.update({NameObject("/Names"): embeddedFilesDictionary})
    else:
        # There are files already attached. Append the new file.
        myPdfFileWriterObj._root_object["/Names"]["/EmbeddedFiles"]["/Names"].append(createStringObject(fname))
        myPdfFileWriterObj._root_object["/Names"]["/EmbeddedFiles"]["/Names"].append(filespec)

pdf_file = 'Contents.pdf'
attachment_file = ['1.pdf','2.pdf','3.pdf','4.pdf','5.pdf','6.pdf','7.pdf', '8.pdf', '9.pdf', '11.pdf']


pdf_reader = PyPDF4.PdfFileReader(open(pdf_file,'rb'))
pdf_writer = PyPDF4.PdfFileWriter()

pdf_writer.addPage(pdf_reader.getPage(0))
for i in range(len(attachment_file)):
    with open(attachment_file[i],'rb') as f:
        appendAttachment(pdf_writer,attachment_file[i],f.read())
        f.close()
    with open('CalBook.pdf','wb') as f:
        pdf_writer.write(f)
0

There are 0 answers