I have been banging my head against the wall and neither I or chatgpt can figure out why my file won't save.
import pygbif
import os
import numpy as np
import pandas as pd
my_dir = "C:/myfiles/flies_and_moths"
os.chdir(my_dir)
cwd = os.getcwd()
new_folder = "occurrence_files"
extract_to_directory = os.path.join(cwd, new_folder)
for csv_file in os.listdir(extract_to_directory):
try:
full_path = os.path.join(extract_to_directory, csv_file)
# Read the CSV file into a DataFrame
df = pd.read_csv(full_path, sep='\t', low_memory= False)
taxon_key2 = df['taxonKey'].iloc[0]
species_name2 = species.name_lookup(taxonKey=taxon_key2)
# Modify DataFrame using .loc
df.loc[:, "catalogNumber"] = df["catalogNumber"].astype(str)
df.loc[:, "CATALOG_NO"] = df["institutionCode"] + '' + df["catalogNumber"]
conditions = [
df['establishmentMeans'].isin(['INTRODUCED', 'INVASIVE', 'MANAGED', 'NATURALISED']),
df['establishmentMeans'] == 'NATIVE',
df['establishmentMeans'] == 'UNCERTAIN'
]
values = ['3', '1', '5']
df.loc[:, "ORIGIN"] = np.select(conditions, values, default = "1")
keeps = ['decimalLatitude', 'decimalLongitude', 'basisOfRecord','year','CATALOG_NO', 'ORIGIN']
# Select only the specified columns
df2 = df.loc[:, keeps]
df2["SPATIAL_REF"] ="WGS84"
df2["YEAR"] = "2024"
df2["CITATION"] = "GBIF 2024"
df2["PRESENCE"] = "1"
df2["BINOMIAL"] = f"{species_name2}"
df2["SEASONAL"] = "1"
df2["COMPILER"] = f"{assessors_name}"
# Rename columns
df2.rename(columns={"decimalLatitude": "DEC_LAT","decimalLongitude": "DEC_LONG","basisOfRecord": "BasisOfRec","year": "EVENT_YEAR"}, inplace=True)
#make csv from dataframe
outname =f'{species_name2}.csv'
if not os.path.exists(extract_to_directory):
os.mkdir(extract_to_directory)
fullname = os.path.join(extract_to_directory, outname)
df2.to_csv(fullname)
except Exception as e:
print(f"did not write new file: {e}")
ERROR:
did not write new file: Cannot save file into a non-existent directory: 'C:\myfiles\flies_and_moths\occurrence_files\{'offset': 0, 'limit': 100, 'end......
Then it keeps listing the entire contents of the csv which is thousands of entries
I tried checking that directory exists:
C:\myfiles\flies_and_moths\occurrence_files
I checked that the dataframe exists:
>>> print(df2)
DEC_LAT DEC_LONG ... SEASONAL COMPILER
0 35.104935 -111.004723 ... 1
1 36.047530 -104.377200 ... 1
2 32.059819 -110.076233 ... 1
3 31.978887 -108.823113 ... 1
4 32.059991 -110.076137 ... 1
.. ... ... ... ... ...
136 35.238000 -111.576000 ... 1
137 32.430600 -107.762000 ... 1
138 31.883405 -109.204969 ... 1
139 34.212954 -111.460799 ... 1
140 34.212954 -111.460799 ... 1
[141 rows x 13 columns]
The file csv names look like this, but that shouldn't matter because it opens these files just fine: 0034528-240229165702484.csv
I googled this and used chatgpt to try and fix this, nothing worked