How to read and XML file and convert the same to a CSV file using Azure function?, also the XML file name and the new CSV name should be parameterized

213 views Asked by At

I want to convert an XML file to CSV which resides inside my Azure blob storage container using Azure function via Python code, but the problem is I need to pass the XML file name and the CSV file name inside the query parameter, while test/run. How can I do it?

I wrote a Python code but got a 500 error

from io import StringIO
import logging
from azure.storage.blob import BlobServiceClient 
import azure.functions as func
import pandas as pd
import requests

constrin = "conn-str"

connection_string = constrin
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
container_client = blob_service_client.get_container_client(r"func-data-01")
fileName = requests.params.get('fileName')
File_Name = container_client.get_blob_client(fileName)


blob = File_Name.download_blob().readall().decode("utf-8") #decode depends on the type of file. UTF is for CSV files.

data = pd.read_csv(StringIO(blob)) #the file will be accessed and the data will be moved to dataframe and can be used further

print(f"file is found an the name is: {File_Name}")


def main(req: func.HttpRequest) -> func.HttpResponse:
    logging.info('Python HTTP trigger function processed a request.')

    name = req.params.get('name')
    if not name:
        try:
            req_body = req.get_json()
        except ValueError:
            pass
        else:
            name = req_body.get('name')

    if name:
        return func.HttpResponse(f"Hello, {name}. This HTTP triggered function executed successfully.")
    else:
        return func.HttpResponse(
             "This HTTP-triggered function executed successfully. Pass a name in the query string or the request body for a personalized response.",
             status_code=200
        )

Final-Output:

enter image description here

Query-param:

enter image description here

1

There are 1 answers

0
Pravallika KV On

I have reproduced your requirement in my environment.

I could read XML file which is available in Storage container and convert it to CSV file using Python Azure function.

  • Created a sample XML file and uploaded to Storage Container.

enter image description here

My Python Azure function code:

This function code downloads the XML file to local, converts to CSV and then upload it to Azure Storage Container.

enter image description here

init.py function

import logging
import azure.functions as func
import xml.etree.ElementTree as ET
from azure.storage.blob import BlobServiceClient
import csv

def main(req: func.HttpRequest) -> func.HttpResponse:
    logging.info('Python HTTP trigger function processed a request.')

    connection_string = "<Storage_Connection_String>"
    container_name = 'test'


    # query parameters    
    xml_file_name = req.params.get('xml_file_name')
    csv_file_name = req.params.get('csv_file_name')


    # Create a BlobServiceClient object
    blob_service_client = BlobServiceClient.from_connection_string(connection_string)

    # Get a BlobClient object for the XML file
    blob_client = blob_service_client.get_blob_client(container=container_name, blob=xml_file_name)

    # Download the XML file to a local file
    with open("local_xml_file.xml", "wb") as my_blob:
        download_stream = blob_client.download_blob()
        my_blob.write(download_stream.readall())

    # Parse the XML file and write the CSV file
    root = ET.parse("local_xml_file.xml").getroot()
    with open("local_csv_file.csv", "w", newline="") as my_csv:
        writer = csv.writer(my_csv)
        # Write the header row
        writer.writerow(["Title", "Author", "Genre", "Price", "PublishDate", "Description"])
        # Write the data rows
        for child in root:
            title = child.find("title")
            author = child.find("author")
            genre = child.find("genre")
            price = child.find("price")
            publish_date = child.find("publish_date")
            description = child.find("description")
            if title is not None and author is not None and genre is not None and price is not None and publish_date is not None and description is not None:
             writer.writerow([title.text, author.text, genre.text, price.text, publish_date.text, description.text])
        else:
            print("Error: Missing element in XML file")

    # Upload the CSV file to Azure Blob Storage
    with open("local_csv_file.csv", "rb") as my_csv:
        blob_client = blob_service_client.get_blob_client(container=container_name, blob=csv_file_name)
        blob_client.upload_blob(my_csv)
    return func.HttpResponse(f"XML file converted to CSV file  successfully.")

enter image description here

enter image description here

enter image description here

Portal:

enter image description here