Reading multiple shapefiles with geopandas from a zip file in memory

81 views Asked by At

I have a zipfile containing two shapefiles that is hosted on a storage. Requirement it to read the shapefiles from the zipfile without downloading locally. The code below returns an error "`/vsimem/155d82d191b646f496de7ff3ef8283e7' not recognized as a supported file format." Not sure what more should I do to get this working.

import io
import json
import geopandas as gpd
import azure.functions as func
import requests 
from zipfile import ZipFile

# util functions
is_shape = lambda string: string.endswith('shp')

def main(req: func.HttpRequest) -> func.HttpResponse:
    try:
        # Data.zip has two shapfiles. DAMSELFISH_distributions.shp and DAMSELFISH_distributions2.shp)
        zipfile_url = "https://github.com/delatitude/spatialtestdata/raw/8c4dea03f4e325aefa523854d44a7084b6316f6e/Data.zip"
    
        # Source : https://stackoverflow.com/questions/72533355/reading-shapefiles-inside-nested-zip-archives
        gdfs = []
        zipfile_url_response = requests.get(zipfile_url)
        with ZipFile(io.BytesIO(zipfile_url_response.content)) as main_zfile:
            for file_name in main_zfile.namelist():
                if is_shape(file_name):
                    print("*** " + file_name)
                    with main_zfile.open(file_name, "r") as zipped_shp:
                        # for gpd.read_file() file position must be changed back to 0
                        zipped_shp.seek(0)
                        gdfs.append(gpd.read_file(zipped_shp))
                        rows, cols = gdfs[-1].shape
                        print(f'GeoDataFrame: {rows} rows, {cols} columns\n')      
                                          

        # head of first gdf
        #print(gdfs[0].head())
                    
        return func.HttpResponse(json.dumps({ "Status" : "success" }),status_code=200, mimetype="application/json")
    except Exception as e:
        return func.HttpResponse(f"Error: {str(e)}", status_code=500)
1

There are 1 answers

0
Pieter On

geopandas.read_file uses GDAL under the hood, and GDAL supports reading remote zip files natively via its virtual_file_systems system.

Is there a specific reason why you don't use that, as it seems to work on those files?

import geopandas as gpd

def main():
    # Data.zip has two shapefiles. DAMSELFISH_distributions.shp and DAMSELFISH_distributions2.shp)
    zipfile_url = "/vsizip/vsicurl/https://github.com/delatitude/spatialtestdata/raw/8c4dea03f4e325aefa523854d44a7084b6316f6e/Data.zip"
    file1_url = f"{zipfile_url}/DAMSELFISH_distributions.shp"
    file2_url = f"{zipfile_url}/DAMSELFISH_distributions2.shp"

    # Source : https://stackoverflow.com/questions/72533355/reading-shapefiles-inside-nested-zip-archives
    gdfs = []
    gdfs.append(gpd.read_file(file1_url))
    gdfs.append(gpd.read_file(file2_url))
    rows, cols = gdfs[-1].shape
    print(f"GeoDataFrame: {rows} rows, {cols} columns\n")

main()