from PyPDF2 import PdfReader, PdfWriter
import pdfrw
from google.colab import files
from google.colab import patches
def highlight_empty_cells_in_pdf(table, pdf_path, output_pdf_path):
pdf_reader = PdfReader(pdf_path)
pdf_writer = PdfWriter()
page = pdf_reader.pages[0]
annotations = page.get_annotations()
for row in table:
for column, content in enumerate(row):
if content.strip() == '':
rect = [float(i) for i in annotations[column]['/Rect']]
rect[3], rect[1] = rect[1], rect[3] # Swap y coordinates
page.add_highlight_annot(rect)
pdf_writer.add_page(page)
with open(output_pdf_path, 'wb') as output_pdf:
pdf_writer.write(output_pdf)
# Usage example
pdf_path = '/table_sample.pdf'
output_pdf_path = '/12007468.pdf'
table = extract_table_from_pdf(pdf_path)
highlight_empty_cells_in_pdf(table, pdf_path, output_pdf_path)
# Display and print the PDF file
pdf_viewer = patches.PdfFileViewer(output_pdf_path)
pdf_viewer.show()
ERROR:
AttributeError
Traceback (most recent call last)
ipython-input-10-a90360d8311f\> in \<cell line: 24\>()
22
23 table = extract_table_from_pdf(pdf_path)
24 highlight_empty_cells_in_pdf(table, pdf_path, output_pdf_path)
25
26 # Display and print the PDF file
ipython-input-10-a90360d8311f\> in highlight_empty_cells_in_pdf(table, pdf_path, output_pdf_path)
4
5 page = pdf_reader.pages\[0\]
6 annotations = page.get_annotations()
7
8 for row in table:
AttributeError: 'PageObject' object has no attribute 'get_annotations'