Parsing CSV file with JSON data in Golang

235 views Asked by At

I've a CSV File having data in below format.

Date,RestaurantId,ItemRatings
2023-10-08,232,[{"item_id":8215117,"item_name":"The Farmers Breakfast","current_day_count":0,"current_day_sum":0,"mtd_count":1,"mtd_sum":5,"wtd_count":0,"wtd_sum":0},{"item_id":8215132,"item_name":"The Great White","current_day_count":0,"current_day_sum":0,"mtd_count":1,"mtd_sum":5,"wtd_count":0,"wtd_sum":0}]

I want to parse the CSV file to store the data in a struct

type ItemRatings struct {
    RestaurantId          int     `json:“item_id”`
    Date       string `json:"date"`
    ItemData   []ItemData `json:“item_data”`
}

type ItemData struct {
    ItemID          int     `json:“item_id”`
    ItemName        string  `json:“item_name”`
    CurrentDayCount int     `json:“current_day_count”`
    CurrentDaySum   int     `json:“current_day_sum”`
    MTDCount        int     `json:“mtd_count”`
    MTDSum          int     `json:“mtd_sum”`
    WTDCount        int     `json:“wtd_count”`
    WTDSum          int     `json:“wtd_sum”`
}

POC code:

reader := csv.NewReader(file)
reader.LazyQuotes = true

for {
   record, err := reader.Read()
   if err != nil {
      t.Fatalf("%v", err)
   }
   itemDetailsJson := record[2]
   var itemDetails []ItemData

   err = json.Unmarshal([]byte(itemDetailsJson), &itemDetails)
   if err != nil {
      t.Fatalf("Error unmarshalling: %v", err)
   }
   fmt.Printf("Unmarshalled Array: %+v\n", itemDetails)
}

Please suggest a way to do it in Golang. Facing issues due to double quotes and commas used in the json data list. Please suggest code changes or alternative CSV format which can be used to achieve the objective with data aggregated on restaurantId field in the CSV.

Edit 1 - item_name can even contain special characters like " or + or / or [] e.g. 6" pizza

4

There are 4 answers

1
Freeman On

Is that what you want ?

data.csv

Date,RestaurantId,ItemRatings
2023-10-08,232,[{"item_id":8215117,"item_name":"The Farmers Breakfast","current_day_count":0,"current_day_sum":0,"mtd_count":1,"mtd_sum":5,"wtd_count":0,"wtd_sum":0},{"item_id":8215132,"item_name":"The Great White","current_day_count":0,"current_day_sum":0,"mtd_count":1,"mtd_sum":5,"wtd_count":0,"wtd_sum":0}]

script.go

package main

import (
    "bufio"
    "encoding/json"
    "fmt"
    "os"
    "strings"
)

type ItemRating struct {
    ItemID          int    `json:"item_id"`
    ItemName        string `json:"item_name"`
    CurrentDayCount int    `json:"current_day_count"`
    CurrentDaySum   int    `json:"current_day_sum"`
    MTDCount        int    `json:"mtd_count"`
    MTDSum          int    `json:"mtd_sum"`
    WTDCount        int    `json:"wtd_count"`
    WTDSum          int    `json:"wtd_sum"`
}

type Data struct {
    Date         string        `json:"date"`
    RestaurantID int           `json:"restaurant_id"`
    ItemRatings  []ItemRating `json:"item_ratings"`
}

func main() {
    file, err := os.Open("data.csv")
    if err != nil {
        fmt.Println("Error opening file:", err)
        return
    }
    defer file.Close()

    scanner := bufio.NewScanner(file)

    // Skip the header line
    if scanner.Scan() {
        // Read and parse each line
        for scanner.Scan() {
            line := scanner.Text()
            parts := strings.Split(line, ",")
            date := parts[0]
            restaurantID := parts[1]
            itemRatingsJSON := strings.Join(parts[2:], ",")
            var itemRatings []ItemRating
            err := json.Unmarshal([]byte(itemRatingsJSON), &itemRatings)
            if err != nil {
                fmt.Println("Error parsing item ratings:", err)
                continue
            }

            // Process the data as needed
            fmt.Println("Date:", date)
            fmt.Println("Restaurant ID:", restaurantID)
            fmt.Println("Item Ratings:")
            for _, item := range itemRatings {
                fmt.Println("  Item ID:", item.ItemID)
                fmt.Println("  Item Name:", item.ItemName)
                fmt.Println("  Current Day Count:", item.CurrentDayCount)
                fmt.Println("  Current Day Sum:", item.CurrentDaySum)
                fmt.Println("  MTD Count:", item.MTDCount)
                fmt.Println("  MTD Sum:", item.MTDSum)
                fmt.Println("  WTD Count:", item.WTDCount)
                fmt.Println("  WTD Sum:", item.WTDSum)
            }
            fmt.Println()
        }
    }

    if err := scanner.Err(); err != nil {
        fmt.Println("Error reading file:", err)
    }
}

output

Date: 2023-10-08
Restaurant ID: 232
Item Ratings:
  Item ID: 8215117
  Item Name: The Farmers Breakfast
  Current Day Count: 0
  Current Day Sum: 0
  MTD Count: 1
  MTD Sum: 5
  WTD Count: 0
  WTD Sum: 0
  Item ID: 8215132
  Item Name: The Great White
  Current Day Count: 0
  Current Day Sum: 0
  MTD Count: 1
  MTD Sum: 5
  WTD Count: 0
  WTD Sum: 0
1
Tamires Araujo On

You can use the encoding/csv and enconding/json packages in Go to parse the CSV data and unmarshal it into your struct. you'll need parse the JSON data from the CSV and then unmarshal it into your ItemData struct.

package main

import (
    "encoding/csv"
    "encoding/json"
    "fmt"
    "log"
    "os"
    "strings"
)

type ItemRatings struct {
    RestaurantId int       `json:"item_id"`
    Date         string    `json:"date"`
    ItemData     []ItemData `json:"item_data"`
}

type ItemData struct {
    ItemID          int    `json:"item_id"`
    ItemName        string `json:"item_name"`
    CurrentDayCount int    `json:"current_day_count"`
    CurrentDaySum   int    `json:"current_day_sum"`
    MTDCount        int    `json:"mtd_count"`
    MTDSum          int    `json:"mtd_sum"`
    WTDCount        int    `json:"wtd_count"`
    WTDSum          int    `json:"wtd_sum"`
}

func main() {    
    file, err := os.Open("data.csv")
    if err != nil {
        log.Fatal(err)
    }
    defer file.Close()

    reader := csv.NewReader(file)

    var itemRatingsList []ItemRatings

    for {
        record, err := reader.Read()
        if err != nil {
            break
        }

        jsonStr := record[2]

        var itemDataList []ItemData
        if err := json.Unmarshal([]byte(jsonStr), &itemDataList); err != nil {
            log.Fatalf("Error unmarshalling JSON: %v", err)
        }

        itemRating := ItemRatings{
            RestaurantId: 232, // Assuming a constant RestaurantId for this example
            Date:         record[0],
            ItemData:     itemDataList,
        }

        itemRatingsList = append(itemRatingsList, itemRating)
    }

    for _, itemRating := range itemRatingsList {
        fmt.Printf("RestaurantId: %d, Date: %s\n", itemRating.RestaurantId, itemRating.Date)
        for _, itemData := range itemRating.ItemData {
            fmt.Printf("ItemID: %d, ItemName: %s\n", itemData.ItemID, itemData.ItemName)
        }
    }
}

2
Ömer Sezer On

Because of the encoding/csv, csv file was updated with a double quote. I spent some time, so I added it here as an alternative way.

CSV (test.csv):

Date,RestaurantId,ItemRatings
2023-10-08,232,"[{""item_id"":8215117,""item_name"":""The Farmers Breakfast"",""current_day_count"":0,""current_day_sum"":0,""mtd_count"":1,""mtd_sum"":5,""wtd_count"":0,""wtd_sum"":0},{""item_id"":8215132,""item_name"":""The Great White"",""current_day_count"":0,""current_day_sum"":0,""mtd_count"":1,""mtd_sum"":5,""wtd_count"":0,""wtd_sum"":0}]"

Code:

package main

import (
    "encoding/csv"
    "fmt"
    "os"
    "strings"
)

func main() {
    file, err := os.Open("test.csv")
    if err != nil {
        fmt.Println("Error:", err)
        return
    }
    defer file.Close()

    csvReader := csv.NewReader(file)

    for {
        record, err := csvReader.Read()
        if err != nil {
            break
        }

        if len(record) != 3 {
            fmt.Println("Invalid:", record)
            continue
        }

        date := record[0]
        restaurantID := record[1]
        itemRatingsCSV := record[2]

        fmt.Println("Date:", date)
        fmt.Println("Restaurant ID:", restaurantID)
        fmt.Println("Item Ratings (CSV):")

        itemRatingsJSON := strings.Trim(itemRatingsCSV, "[]")
        itemRatings := strings.Split(itemRatingsJSON, "},{")
        for _, item := range itemRatings {
            item = strings.Trim(item, "{}")
            fmt.Println(item)
        }

        fmt.Println()
    }
}

Output:

enter image description here

0
Cetin Basoz On

Since your data is neither a CSV nor a JSON, you would want to handle it differently. Instead of trying to parse as an CSV, IMHO you should first read it line by line and then parse the lines to parts. ie:

package main

import (
    "bufio"
    "encoding/json"
    "fmt"
    "log"
    "os"
    "strconv"
    "strings"
)

type ItemRatings struct {
    RestaurantId int        `json:"item_id"`
    Date         string     `json:"date"`
    ItemData     []ItemData `json:"item_data"`
}

type ItemData struct {
    ItemID          int    `json:"item_id"`
    ItemName        string `json:"item_name"`
    CurrentDayCount int    `json:"current_day_count"`
    CurrentDaySum   int    `json:"current_day_sum"`
    MTDCount        int    `json:"mtd_count"`
    MTDSum          int    `json:"mtd_sum"`
    WTDCount        int    `json:"wtd_count"`
    WTDSum          int    `json:"wtd_sum"`
}

func main() {
    file, err := os.Open("mydata.csv")
    if err != nil {
        log.Fatal(err)
    }
    defer file.Close()
    var itemRatings []ItemRatings

    scanner := bufio.NewScanner(file)

    counter := 0
    for scanner.Scan() {
        counter++

        if counter > 1 {
            items := strings.SplitN(scanner.Text(), ",", 3)
            date := items[0] // date, _ := time.Parse("2006-01-02", items[0])
            restaurantId, _ := strconv.Atoi(items[1])
            var itemDetails []ItemData
            err = json.Unmarshal([]byte(items[2]), &itemDetails)
            if err != nil {
                log.Fatalf("Error unmarshalling: %v", err)
            }
            itemRatings = append(itemRatings, ItemRatings{
                RestaurantId: restaurantId,
                Date:         date,
                ItemData:     itemDetails,
            })
        }
    }

    for _, rating := range itemRatings {
        fmt.Printf("RestaurantID: %d, Date: %s\n", rating.RestaurantId, rating.Date)
        for _, item := range rating.ItemData {
            fmt.Printf("   ItemID: %d, ItemName: %s, CurrentDayCount: %d, CurrentDaySum: %d, MTDCount: %d, MTDSum: %d, WTDCount: %d, WTDSum: %d\n", item.ItemID, item.ItemName, item.CurrentDayCount, item.CurrentDaySum, item.MTDCount, item.MTDSum, item.WTDCount, item.WTDSum)
        }
    }
}

Sample output from:

Date,RestaurantId,ItemRatings
2023-10-08,232,[{"item_id":8215117,"item_name":"The Farmers Breakfast","current_day_count":0,"current_day_sum":0,"mtd_count":1,"mtd_sum":5,"wtd_count":0,"wtd_sum":0},{"item_id":8215132,"item_name":"The Great White","current_day_count":0,"current_day_sum":0,"mtd_count":1,"mtd_sum":5,"wtd_count":0,"wtd_sum":0}]
2023-10-09,235,[{"item_id":8215117,"item_name":"The Farmers Breakfast","current_day_count":0,"current_day_sum":0,"mtd_count":1,"mtd_sum":5,"wtd_count":0,"wtd_sum":0},{"item_id":8215132,"item_name":"The Great White","current_day_count":0,"current_day_sum":0,"mtd_count":1,"mtd_sum":5,"wtd_count":0,"wtd_sum":0}]

Is like this:

RestaurantID: 232, Date: 2023-10-08
   ItemID: 8215117, ItemName: The Farmers Breakfast, CurrentDayCount: 0, CurrentDaySum: 0, MTDCount: 1, MTDSum: 5, WTDCount: 0, WTDSum: 0
   ItemID: 8215132, ItemName: The Great White, CurrentDayCount: 0, CurrentDaySum: 0, MTDCount: 1, MTDSum: 5, WTDCount: 0, WTDSum: 0
RestaurantID: 235, Date: 2023-10-09
   ItemID: 8215117, ItemName: The Farmers Breakfast, CurrentDayCount: 0, CurrentDaySum: 0, MTDCount: 1, MTDSum: 5, WTDCount: 0, WTDSum: 0
   ItemID: 8215132, ItemName: The Great White, CurrentDayCount: 0, CurrentDaySum: 0, MTDCount: 1, MTDSum: 5, WTDCount: 0, WTDSum: 0