convert a CSV file to JSON file

944 views Asked by At

I am trying to convert CSV file to JSON file based on a column value. The csv file looks somewhat like this.

ID        Name          Age         
CSE001    John           18
CSE002    Marie          20
ECE001    Josh           22
ECE002    Peter          23

currently I am using the following code to obtain json file.

import csv
import json
 
def csv_to_json(csv_file_path, json_file_path):
    
    data_dict = {}
 
    with open(csv_file_path, encoding = 'utf-8') as csv_file_handler:
        csv_reader = csv.DictReader(csv_file_handler)
 
        for rows in csv_reader:
            
            key = rows['ID']
            data_dict[key] = rows

    with open(json_file_path, 'w', encoding = 'utf-8') as json_file_handler:
        json_file_handler.write(json.dumps(data_dict, indent = 4))

OUTPUT:

**{  
  "CSE001":{ 
         "ID":"CSE001",
         "Name":"John",
         "Age":18
        }
 "CSE002":{
        "ID":"CSE002",
        "Name":"Marie",
        "Age":20
       }
"ECE001":{
       "ID":"ECE001",
       "Name":"Josh",
       "Age":22
      }
"ECE002":{
       "ID":"ECE002",
       "Name":"Peter",
       "Age":23
      }
}**

I want my output to generate two separate json files for CSE and ECE based on the ID value. Is there a way to achieve this output.

Required Output:

CSE.json:

{  
    "CSE001":{ 
             "ID":"CSE001",
             "Name":"John",
             "Age":18
            }
   "CSE002":{
           "ID":"CSE002",
            "Name":"Marie",
            "Age":20
           }
}

ECE.json:

{
 "ECE001":{
           "ID":"ECE001",
           "Name":"Josh",
           "Age":22
          }
 "ECE002":{
           "ID":"ECE002",
           "Name":"Peter",
           "Age":23
          }
    }
3

There are 3 answers

0
Dan Lai On

I would suggest you to use pandas, that way will be more easier.

Code may look like:

import pandas as pd

def csv_to_json(csv_file_path):
    df = pd.read_csv(csv_file_path)

    df_CSE = df[df['ID'].str.contains('CSE')]
    df_ECE = df[df['ID'].str.contains('ECE')]

    df_CSE.to_json('CSE.json')
    df_ECE.to_json('ESE.json')
0
Deepak Tripathi On

You can create dataframe and then do the following operation

import pandas as pd
df = pd.DataFrame.from_dict({  
  "CSE001":{ 
         "ID":"CSE001",
         "Name":"John",
         "Age":18
        },
 "CSE002":{
        "ID":"CSE002",
        "Name":"Marie",
        "Age":20
       },
"ECE001":{
       "ID":"ECE001",
       "Name":"Josh",
       "Age":22
      },
"ECE002":{
       "ID":"ECE002",
       "Name":"Peter",
       "Age":23
      }
},orient='index')

df["id_"] = df["ID"].str[0:2] # temp column for storing first two chars
grps = df.groupby("id_")[["ID", "Name", "Age"]]
for k, v in grps:
  print(v.to_json(orient="index")) # you can create json file as well
0
Martin Evans On

You could store each row into two level dictionary with the top level being the first 3 characters of the ID.

These could then be written out into separate files with the key being part of the filename:

from collections import defaultdict
import csv
import json

 
def csv_to_json(csv_file_path, json_base_path):
    data_dict = defaultdict(dict)
 
    with open(csv_file_path, encoding = 'utf-8') as csv_file_handler:
        csv_reader = csv.DictReader(csv_file_handler)
 
        for row in csv_reader:
            key = row['ID'][:3]
            data_dict[key][row['ID']] = row
    
    for key, values in data_dict.items():
        with open(f'{json_base_path}_{key}.json', 'w', encoding='utf-8') as json_file_handler:
            json_file_handler.write(json.dumps(values, indent = 4))
                                                 
csv_to_json('input.csv', 'output')

The defaultdict is used to avoid needing to first test if a key is already present before using it.

This would create output_CSE.json and output_ECE.json, e.g.

{
    "ECE001": {
        "ID": "ECE001",
        "Name": "Josh",
        "Age": "22"
    },
    "ECE002": {
        "ID": "ECE002",
        "Name": "Peter",
        "Age": "23"
    }
}