I am using below code to make entries of INDIAN passport we have in possession, here is the code, the xpath is correct but the file is printing error, Data extraction failed. Check XPath expressions.
also need to change html_content = "<html>...</html>" # Replace with actual HTML content
import webbrowser
import pandas as pd
from lxml import html
def get_user_input():
return input("Enter the file number (or 'end' to exit): ")
def generate_url(file_number):
base_url = "https://portal2.passportindia.gov.in/AppOnlineProject/statusTracker/trackStatusForFileNoNew?fileNo="
return base_url + file_number
def scrape_data(html_content):
tree = html.fromstring(html_content)
# Example XPath expressions (replace with actual ones)
extracted_file_number = tree.xpath("/html/body/table/tbody/tr/td/table/tbody/tr[1]/td/div/table/tbody/tr[6]/td/table/tbody/tr/td[2]/form/div/table/tbody/tr[1]/td/table/tbody/tr[1]/td[2]")
extracted_dob = tree.xpath("/html/body/table/tbody/tr/td/table/tbody/tr[1]/td/div/table/tbody/tr[6]/td/table/tbody/tr/td[2]/form/div/table/tbody/tr[1]/td/table/tbody/tr[2]/td[2]")
extracted_given_name = tree.xpath("/html/body/table/tbody/tr/td/table/tbody/tr[1]/td/div/table/tbody/tr[6]/td/table/tbody/tr/td[2]/form/div/table/tbody/tr[1]/td/table/tbody/tr[3]/td[2]")
extracted_surname = tree.xpath("/html/body/table/tbody/tr/td/table/tbody/tr[1]/td/div/table/tbody/tr[6]/td/table/tbody/tr/td[2]/form/div/table/tbody/tr[1]/td/table/tbody/tr[4]/td[2]")
extracted_received_date = tree.xpath("/html/body/table/tbody/tr/td/table/tbody/tr[1]/td/div/table/tbody/tr[6]/td/table/tbody/tr/td[2]/form/div/table/tbody/tr[1]/td/table/tbody/tr[6]/td[2]")
# Check if any data was extracted
if extracted_file_number:
extracted_data = {
"File Number": extracted_file_number[0],
"Date of Birth": extracted_dob[0],
"Given Name": extracted_given_name[0],
"Surname": extracted_surname[0],
"Application Received Date": extracted_received_date[0],
}
return extracted_data
else:
print("Data extraction failed. Check XPath expressions.")
return None
def store_data(data):
df = pd.DataFrame(data, index=[0])
with pd.ExcelWriter("E:\\passport_data.xlsx", mode="a", engine="openpyxl") as writer:
df.to_excel(writer, index=False, header=False)
def main():
while True:
file_number = get_user_input()
if file_number.lower() == "end":
print("Exiting the program.")
break
url = generate_url(file_number)
webbrowser.open(url)
# Placeholder for actual HTML content (retrieve from the opened URL)
html_content = "<html>...</html>" # Replace with actual HTML content
extracted_data = scrape_data(html_content)
if extracted_data:
store_data(extracted_data)
print("Data saved to passport_data.xlsx")
else:
print("Data extraction failed. Please check the website or try another file number.")
if __name__ == "__main__":
main()
I want the python file to create excel data when entering file number of the passport, it will go to the passport seva link and extract data from the site, after that it will store the same in excel file in table format.