How do you download a file from Azure Data Lake when you know the type of the file but not the name?

1.9k views Asked by At

I can run the following to download the file "some/path/known_name.json"

def download_file():
    try:
        file_system_client = FileSystemClient.from_connection_string(...)

        full_file_location = "some/path/known_name.json"
        target_file_client = file_system_client.get_file_client(full_file_location)

        download=target_file_client.download_file()
        downloaded_bytes = download.readall()
        local_file = open('my_file.json','wb')
        local_file.write(downloaded_bytes)
        local_file.close()

    except Exception as e:
        print(e)

My question is: how do I download from some other path when the name of the file is unknown but the file type is known e.g. "different/path/xxx.json"

1

There are 1 answers

0
Joseph  Xu On BEST ANSWER

You can list the blobs in the container and then filter the json files by the blob.name.

Here is my blobs in my test container:

enter image description here

Here is my python code:

import os, uuid
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient

try:
    # environment variable into account.
    connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING')

    # Create the BlobServiceClient object which will be used to create a container client
    blob_service_client = BlobServiceClient.from_connection_string(connect_str)

    # Create a unique name for the container
    container_name = "test"     

    # Create the container
    container_client = blob_service_client.get_container_client(container_name)

    # List the blobs in the container
    local_path = "./data"
    blob_list = container_client.list_blobs()
    for blob in blob_list:
        if('.json' in blob.name) :
            local_file_name = blob.name
            blob_client = blob_service_client.get_blob_client(container=container_name, blob=local_file_name)
            download_file_path = os.path.join(local_path, local_file_name)
            print("\nDownloading blob to \n\t" + local_path)
        
            with open(download_file_path, "wb") as download_file:
                download_file.write(blob_client.download_blob().readall())
            print("\t" + blob.name)

except Exception as ex:
    print('Exception:')
    print(ex)

When I run the code, it will download the data.json and data2.json.
enter image description here