#unable to find the last column of my pdf

1 messages · Page 1 of 1 (latest)

knotty rapids
#
import tabula
import pandas as pd

# Set the coordinates of the table boundaries
top = 150
left = 60
right = 700
bottom = 900

# Read the PDF and extract tables
dfs = tabula.read_pdf(
    "/Users/admin/Desktop/raw /Oil Lines/South/CL40/IP Data/2021/12P49SSP_FR.pdf",
    # Trying out the first two pages only
    pages='84',
    guess=False,
    lattice=True,
    area=(top, left, right, bottom)
)

# Check if tables were extracted
if len(dfs) == 0:
    print("No tables found in the specified area.")
else:
    # Combine tables into a single DataFrame
    combined_df = pd.concat(dfs)

    column_names = ['log distance [m]', 'feature type', 'feature identification', 'comment','cl. id','joint number','weld type','length comp. [m]', 'ID [mm]','t [mm]','bend Y/N','to u/s weld [m]','clock pos.','surf. loc.','avg d [%]','d[%]','l [mm]','w [mm]','dim. class','ERF B31G','PB weldnr.','location class']
                    # ,'DIST_2014 [m]']
    combined_df.columns = column_names

    # Save the combined table as a CSV file
    output_path = "/Users/admin/Desktop/Stages/Stage1/Oil Lines/South/CL40/PIPE_TALLY_2021.csv"
    combined_df.to_csv(output_path, index=False)

    print("2021 Combined table saved as CSV")

i keep changing the 'right' variable to be a huge number but i still cant get the last column in my pdf file. im not sure why, when i edit the column names i kept getting an error that it is only able to find 22 elements which is why i commented out the last column name but i dont really understand why it is unable to find it.

round tangle
#

While we do like to help everyone, this is Pycord, a Discord API Wrapper. If you don't know what that means, you are probably asking in the wrong place. You can try asking in the Python Discord Server: (.gg/python)