I'm using Node and Express to fetch a .CSV file from a URL that I want to parse. The process of downloading it works just fine.
But when I use csv-parser
to parse the file the output in the console looks like this:
Just tonnes of lines of weird looking byte code? If I use excel or numbers to open the CSV file regularly it works fine.
This is how the data in the csv file looks like:
My goal is to use a csv parser to convert each row into an object - I've seen many examples of parsers doing this.
Here's the code from node:
const broadcasterController = require('./broadcasterController');
const https = require('https');
const fs = require('fs');
const parser = require('csv-parser');
exports.queryFi = async (io) => {
// let today = new Date().toISOString().slice(0, 10);
// let today = '2021-03-20';
fetchInsidersCSV = async () => {
// Download a file and extract the destination
function downloadFromUrl(url, dest) {
return new Promise((resolve, reject) => {
var file = fs.createWriteStream(dest);
https.get(url, function (response) {
response.pipe(file);
file.on('finish', function () {
file.close();
resolve(dest);
});
}).on('error', function (err) {
// Handle errors
fs.unlinkSync(dest);
reject(new Error('Download failed.'));
});
});
}
await downloadFromUrl(
`https://marknadssok.fi.se/Publiceringsklient/sv-SE/Search/Search?SearchFunctionType=Insyn&Utgivare=&PersonILedandeSt%C3%A4llningNamn=&Transaktionsdatum.From=&Transaktionsdatum.To=&Publiceringsdatum.From=2021-04-01&Publiceringsdatum.To=2021-04-01&button=export&Page=1`,
'bar.csv'
);
};
try {
const results = [];
// crawl the web page and start att page 1
await fetchInsidersCSV();
fs.createReadStream('bar.csv')
.pipe(parser({ separator: ';' }))
.on('data', (data) => results.push(data))
.on('end', () => {
console.log(results);
});
} catch (error) {
console.log(error);
}
};
Could anyone please help me with a code solution where the output is correct?
It looks to me like that data is in utf-16 encoding. So if we specify this in our fs.createReadStream(), we should get the correct result:
With this setting, I see an output like so: