How to highlight empty cells of a table in a pdf that contains tables

90 views Asked by At
from PyPDF2 import PdfReader, PdfWriter
import pdfrw
from google.colab import files
from google.colab import patches

def highlight_empty_cells_in_pdf(table, pdf_path, output_pdf_path):
pdf_reader = PdfReader(pdf_path)
pdf_writer = PdfWriter()

    page = pdf_reader.pages[0]
    annotations = page.get_annotations()
    
    for row in table:
        for column, content in enumerate(row):
            if content.strip() == '':
                rect = [float(i) for i in annotations[column]['/Rect']]
                rect[3], rect[1] = rect[1], rect[3]  # Swap y coordinates
                page.add_highlight_annot(rect)
    
    pdf_writer.add_page(page)
    
    with open(output_pdf_path, 'wb') as output_pdf:
        pdf_writer.write(output_pdf)

# Usage example

pdf_path = '/table_sample.pdf'
output_pdf_path = '/12007468.pdf'

table = extract_table_from_pdf(pdf_path)
highlight_empty_cells_in_pdf(table, pdf_path, output_pdf_path)

# Display and print the PDF file

pdf_viewer = patches.PdfFileViewer(output_pdf_path)
pdf_viewer.show()

ERROR:

AttributeError                            
Traceback (most recent call last)
ipython-input-10-a90360d8311f\> in \<cell line: 24\>()
22
23 table = extract_table_from_pdf(pdf_path)
24 highlight_empty_cells_in_pdf(table, pdf_path, output_pdf_path)
25
26 # Display and print the PDF file

ipython-input-10-a90360d8311f\> in highlight_empty_cells_in_pdf(table, pdf_path, output_pdf_path)
4
5     page = pdf_reader.pages\[0\]
6     annotations = page.get_annotations()
7
8     for row in table:

AttributeError: 'PageObject' object has no attribute 'get_annotations'
0

There are 0 answers