i'm trying to extract all tables of my pdf and convert to a structured table in .csv file, but all i got is a csv file in one column.
I have to create a python code to extract all data's on this pdf and save this data in a structured table in csv format
My code:
import tabula import pandas as pd
pdf_path = "./meuPdf.pdf"
tables = tabula.read_pdf(
pdf_path,
pages="all",
pandas_options={"header": None}, )
for i, df in enumerate(tables):
tables[i] = df.iloc[:, :13]
df_final = pd.concat(tables, ignore_index=True)
if len(df_final.columns) != 13:
raise ValueError("O número de colunas não é 13")
cols = [
"PROCEDIMENTO",
"RN\r(alteração)",
"VIGÊNCIA",
"OD",
"AMB",
"HCO",
"HSO",
"REF",
"PAC",
"DUT",
"SUBGRUPO",
"GRUPO",
"CAPÍTULO", ]
df_final.columns = cols
df_final.to_csv("converted.csv", sep=",", index=False,
encoding="utf-8")
print(df_final)
