Node.js filter/extract subset of data from csv / JSON

4.6k views Asked by At

I am trying to extract information from a csv file, the file has many rows and I would like to return only some values/columns from a specific row. I use papa/babyparse to convert the csv file to JSON but struggle to display/extract a particular row.

var baby = require('babyparse');

var csv2 = baby.parseFiles("netreqs.csv",{
        header:true,
        skipEmptyLines: true,
        step: function(row) {
            console.log("Row:", row.data); 

        },
        complete: function() {
            console.log("All done!");
        }
});

The output I get seems to be nice JSON.

   Row: [ { Req: 'RQ0342384',
    'Requestor country': 'UK',
    ReqType: 'other',
    'ATOS Approved': '21.09.2016',
    Urgent: 'No',
    Assignee: 'Hans Gans',
    'Change number': 'NA',
    'Implementation Date': '',
    'Change fully approved': 'No',
    'Completion Date': '',
    'Req Closed': 'No' } ]
    Row: [ { Req: 'RQ0343423',
    'Requestor country': 'US',
    ReqType: 'Firewall',
    'ATOS Approved': '04.11.2016',
    Urgent: 'No',
    Assignee: 'Peter Mueller',
    'Change number': 'C9343449',
    'Implementation Date': '',
    'Change fully approved': 'No',
    'Completion Date': '31.01.2017',
    'Req Closed': 'No' } ]
...

I tried to use row.data.req for my "if" but get an "undefined" back. Also tried it with .filter and .hasOwnProperty but somehow I seem to miss something (tried also to JSON.stringify prior to the if but without success). After many hours of trail & error and googling I thought I ask here.

Idealy I'm able to use a variable to filter the "row" by Req (this is the input I get from another function) and then query other key/value pairs from this "row" as I would like to implement different responses based on the data.

I have to admit I'm fairly new to this, appreciate your support. Many thanks

1

There are 1 answers

11
Celso Agra On BEST ANSWER

I believe you don't need use a parser for that. Just need to use the readLine:

var output = [];
var count = 0

var lineReader = require('readline').createInterface({
  input: require('fs').createReadStream('file.csv')
});

lineReader.on('line', function (line) {
  var jsonFromLine = {};

  if (count == 2) { // this is my conditional. Set line 2
    var lineSplit = line.split(',');
    // select columns you want
    jsonFromLine.column0 = lineSplit[0];
    jsonFromLine.column1 = lineSplit[1];
    // ...  
    output.push(jsonFromLine);
  }
  count++;
});

lineReader.on('close', function (line) {
    console.log(output); // list output 
});

I hope it helps.

Edit.:

If you need a specific value, you can set a different conditional:

var output = [];

var lineReader = require('readline').createInterface({
  input: require('fs').createReadStream('file.csv')
});

lineReader.on('line', function (line) {
    var jsonFromLine = {};
    var lineSplit = line.split(',');
    // select columns you want
    jsonFromLine.req = lineSplit[0];
    jsonFromLine.column1 = lineSplit[1];
    // ...  
    if (jsonFromLine.req === 'RQ0191223') {
        output.push(jsonFromLine);
    }
});

lineReader.on('close', function (line) {
    console.log(output); // list output 
});

this is works fine to me

Edit.2:

You can use promises as well:

var method = function () {
    return new Promise(function(resolve) {
        var output = [];
        var lineReader = require('readline').createInterface({
          input: require('fs').createReadStream('file.csv')
        });

        lineReader.on('line', function (line) {
            var jsonFromLine = {};
            var lineSplit = line.split(',');
            // select columns you want
            jsonFromLine.req = lineSplit[0];
            jsonFromLine.column1 = lineSplit[1];
            // ...  
            if (jsonFromLine.req === 'RQ0191223') {
                output.push(jsonFromLine);
            }
        });

        lineReader.on('close', function (line) {
            resolve(output);
        }); 
    });
}


method().then(function(outputOfResolve) { console.log(outputOfResolve); });

I hope it helps