Unable to Synchronize Data with MongoDB Atlas Cluster Using Python

26 views Asked by At

I'm encountering an issue with synchronizing data between my Python application and a MongoDB Atlas cluster. Here are the details:

I have a Python script that synchronizes data between a CSV file and a MongoDB database. When I run the script with a localhost MongoDB connection string, it works perfectly fine. However, when I switch to using a MongoDB Atlas cluster connection string, the synchronization process fails.

The MongoDB Atlas cluster is hosted on AWS (the free tier) and I'm using the following connection string: mongodb+srv://NewUser:**********@cluster0.nnyqklj.mongodb.net/ExcelDB. The MongoDB user "NewUser" has been granted the necessary permissions to access the "ExcelDB" database in the Atlas cluster.

I have tried manually changing documents in my Compass, it is updated in my Atlas with no issues, but when i run the code, it throws an error.

The thing is the code works perfectly fine when change the connection string to 'mongodb://localhost:27017/'(I have the same DB here).

`

import pandas as pd
import pymongo
import logging

def synchronize_data(csv_file, mongo_uri, database_name, collection_name, primary_key):
    # Setup logging
    logging.basicConfig(filename='sync_log.log', level=logging.INFO)

    try:
        # Connect to MongoDB
        client = pymongo.MongoClient(mongo_uri)
        db = client[database_name]
        collection = db[collection_name]

        # Read CSV file
        Exdf = pd.read_csv(csv_file)
        Exdf_cleaned = Exdf.dropna(how='all').dropna(axis=1, how='all')

        # Convert DataFrame to list of dictionaries
        new_records = Exdf_cleaned.where(pd.notnull(Exdf_cleaned), "N/A").to_dict(orient='records')
        existing_records = list(collection.find({}, {"_id": 0}))

        # Extract column names from the DataFrame
        csv_columns = set(Exdf_cleaned.columns)

        # Extract column names from existing MongoDB documents
        mongo_columns = set(existing_records[0].keys()) if existing_records else set()

        # Determine columns to add and remove
        columns_to_add = csv_columns - mongo_columns
        columns_to_remove = mongo_columns - csv_columns

        # Initialize counters
        num_updated = 0
        num_deleted = 0
        num_inserted = 0

        if len(existing_records) == 0:
            num_inserted = len(new_records)
        else:
            # Update or delete existing records
            for existing_record in existing_records:
                found = False
                for new_record in new_records:
                    if existing_record[primary_key] == new_record[primary_key]:
                        found = True
                        # Check for field-level changes
                        if existing_record != new_record:
                            # Create update operation by excluding fields deleted from CSV
                            update_operation = {"$set": {key: new_record[key] for key in new_record if key in existing_record}}
                            # Include $unset for fields removed from CSV
                            for key in existing_record:
                                if key not in new_record:
                                    update_operation["$unset"] = {key: ""}
                            collection.update_one({primary_key: new_record[primary_key]}, update_operation)
                            num_updated += 1
                        break
                if not found:
                    # Delete the record if not found in new data
                    collection.delete_one({primary_key: existing_record[primary_key]})
                    num_deleted += 1

        # Insert new records
        for new_record in new_records:
            if not any(existing_record[primary_key] == new_record[primary_key] for existing_record in existing_records):
                collection.insert_one(new_record)
                num_inserted += 1

        # Remove columns from MongoDB documents
        for column in columns_to_remove:
            collection.update_many({}, {"$unset": {column: ""}})
            num_deleted += 1

        # Add new columns to MongoDB documents
        for column in columns_to_add:
            for index, row in Exdf_cleaned.iterrows():
                collection.update_one({primary_key: row[primary_key]}, {"$set": {column: row[column]}})
            num_updated += 1

        # Log synchronization results
        logging.info(f"Number of documents updated: {num_updated}")
        logging.info(f"Number of documents deleted: {num_deleted}")
        logging.info(f"Number of documents inserted: {num_inserted}")

        # Print synchronization results
        print(f"Number of documents updated: {num_updated}")
        print(f"Number of documents deleted: {num_deleted}")
        print(f"Number of documents inserted: {num_inserted}")

    except FileNotFoundError:
        logging.exception(f"CSV file {csv_file} not found.")
        print(f"CSV file {csv_file} not found.")
    except Exception as e:
        # Log any exceptions
        logging.exception("An error occurred during synchronization:")
        print("An error occurred during synchronization:", e)


synchronize_data("C:/Users/aksha/Desktop/mongoexceltest.csv", "mongodb+srv://NewUser:[email protected]/ExcelDB", "ExcelDB", "Excel", "EEID")

Im shown this error An error occurred during synchronization: 'EEID'

note that this works perfectly in my local host and the column exists in both the collection and excel

0

There are 0 answers