I'm encountering an issue with synchronizing data between my Python application and a MongoDB Atlas cluster. Here are the details:
I have a Python script that synchronizes data between a CSV file and a MongoDB database. When I run the script with a localhost MongoDB connection string, it works perfectly fine. However, when I switch to using a MongoDB Atlas cluster connection string, the synchronization process fails.
The MongoDB Atlas cluster is hosted on AWS (the free tier) and I'm using the following connection string: mongodb+srv://NewUser:**********@cluster0.nnyqklj.mongodb.net/ExcelDB. The MongoDB user "NewUser" has been granted the necessary permissions to access the "ExcelDB" database in the Atlas cluster.
I have tried manually changing documents in my Compass, it is updated in my Atlas with no issues, but when i run the code, it throws an error.
The thing is the code works perfectly fine when change the connection string to 'mongodb://localhost:27017/'(I have the same DB here).
`
import pandas as pd
import pymongo
import logging
def synchronize_data(csv_file, mongo_uri, database_name, collection_name, primary_key):
# Setup logging
logging.basicConfig(filename='sync_log.log', level=logging.INFO)
try:
# Connect to MongoDB
client = pymongo.MongoClient(mongo_uri)
db = client[database_name]
collection = db[collection_name]
# Read CSV file
Exdf = pd.read_csv(csv_file)
Exdf_cleaned = Exdf.dropna(how='all').dropna(axis=1, how='all')
# Convert DataFrame to list of dictionaries
new_records = Exdf_cleaned.where(pd.notnull(Exdf_cleaned), "N/A").to_dict(orient='records')
existing_records = list(collection.find({}, {"_id": 0}))
# Extract column names from the DataFrame
csv_columns = set(Exdf_cleaned.columns)
# Extract column names from existing MongoDB documents
mongo_columns = set(existing_records[0].keys()) if existing_records else set()
# Determine columns to add and remove
columns_to_add = csv_columns - mongo_columns
columns_to_remove = mongo_columns - csv_columns
# Initialize counters
num_updated = 0
num_deleted = 0
num_inserted = 0
if len(existing_records) == 0:
num_inserted = len(new_records)
else:
# Update or delete existing records
for existing_record in existing_records:
found = False
for new_record in new_records:
if existing_record[primary_key] == new_record[primary_key]:
found = True
# Check for field-level changes
if existing_record != new_record:
# Create update operation by excluding fields deleted from CSV
update_operation = {"$set": {key: new_record[key] for key in new_record if key in existing_record}}
# Include $unset for fields removed from CSV
for key in existing_record:
if key not in new_record:
update_operation["$unset"] = {key: ""}
collection.update_one({primary_key: new_record[primary_key]}, update_operation)
num_updated += 1
break
if not found:
# Delete the record if not found in new data
collection.delete_one({primary_key: existing_record[primary_key]})
num_deleted += 1
# Insert new records
for new_record in new_records:
if not any(existing_record[primary_key] == new_record[primary_key] for existing_record in existing_records):
collection.insert_one(new_record)
num_inserted += 1
# Remove columns from MongoDB documents
for column in columns_to_remove:
collection.update_many({}, {"$unset": {column: ""}})
num_deleted += 1
# Add new columns to MongoDB documents
for column in columns_to_add:
for index, row in Exdf_cleaned.iterrows():
collection.update_one({primary_key: row[primary_key]}, {"$set": {column: row[column]}})
num_updated += 1
# Log synchronization results
logging.info(f"Number of documents updated: {num_updated}")
logging.info(f"Number of documents deleted: {num_deleted}")
logging.info(f"Number of documents inserted: {num_inserted}")
# Print synchronization results
print(f"Number of documents updated: {num_updated}")
print(f"Number of documents deleted: {num_deleted}")
print(f"Number of documents inserted: {num_inserted}")
except FileNotFoundError:
logging.exception(f"CSV file {csv_file} not found.")
print(f"CSV file {csv_file} not found.")
except Exception as e:
# Log any exceptions
logging.exception("An error occurred during synchronization:")
print("An error occurred during synchronization:", e)
synchronize_data("C:/Users/aksha/Desktop/mongoexceltest.csv", "mongodb+srv://NewUser:[email protected]/ExcelDB", "ExcelDB", "Excel", "EEID")
Im shown this error An error occurred during synchronization: 'EEID'
note that this works perfectly in my local host and the column exists in both the collection and excel