Convert JSON.gz to JSON in node js

6k views Asked by At

I am retrieving an object form S3 bucket, in a node module. The object is in object.json.gz format. I need to decompress it to object.json in order to be able to parse it in the node module. Following is the code snippet

aws.config.update({ accessKeyId: <my_key>, secretAccessKey: <my_secret_key>, region: <my_region> });
var s3 = new aws.S3();
s3.getObject(
  { Bucket: "<my_bucket>", Key: "<my_file_key>"},
  function (error, data) {
    if (error != null) {
        console.log("Error retrieving the Object from the S3 bucket.")
        console.log(error);
    } else {
        zlib.gunzip(data, function(err, buffer){
              if (!err) {
                console.log(buffer);
              }
              else console.log(err);
            });
    }
  }
);

If I log the object data to the console, it logs the following,

{ AcceptRanges: 'bytes',
LastModified: 'Thu, 04 Jun 2015 17:41:12 GMT',
ContentLength: '12677',
ETag: '"ebb8f339f569b9aea1038c005442eedd"',
ContentEncoding: 'gzip',
ContentType: 'application/json',
ServerSideEncryption: 'AES256',
Metadata: {},
Body: <Buffer 1f 8b 08 00 00 00 00 00 00 00 ed 7d fb 73 1a 47 d6 f6   bf a2 f2 4f ef 5b b5 c3 f6 fd 32 bf 39 de d8 eb dd 38 71 6c 25 ce e6 ab ad ad be da bc 91 84 02 92 ...> }

If I log buffer, it logs the following,

[TypeError: Invalid non-string/buffer chunk]

If I make zlib.gunzip run on data.body, it logs the following,

<Buffer >

I've tried many workarounds over the internet, didn't work. Being a newbie in node.js, this is really frustrating me. Any help would be appreciated.

2

There are 2 answers

0
Paritosh On BEST ANSWER

I found the solution for unzipping the JSON.gz file. I've used the zlib module.

 var zlib = require('zlib');

 zlib.gunzip(data,function(error, buff){
   if(error != null){                                       
      //An error occured while unzipping the .gz file.
   }
   else{                                                    
    //Use the buff which contains the unzipped JSON.
   }
 });

zlib can be installed using npm install zlib.

0
Neo On

Paritosh Walvekar's answer helped me but I wanted to add a little more implementation details for anyone else having this issue. My use case is trying to read ELB logs from S3 (which are not json but are .gz files none the less).

  • Don't worry about ResponseContentType. You can leave it as the default.
  • To turn the buff into a string you can use buff.toString('utf8') to at a minimum verify that something is working.
  • If using lambda with node runtime 6.10 (latest as of this writing), then just put var zlib = require('zlib'); at the top

My Code:

var AWS = require('aws-sdk');
var zlib = require('zlib');
var s3 = new AWS.S3({apiVersion: '2006-03-01'});

exports.handler = (event, context, callback) => {
    //Other code to keep lambda alive, other bus logic, etc...
    
    //Use s3 sdk to list objects
    //This code is inside a loop of list results
    (function(key){
        var params = {
            Bucket: bucket, /* required */
            Key: key /* required */
        };
        s3.getObject(params, function(err, data) {
            if (err) getLogEvents(null, err); // an error occurred
            else     getLogEvents(data);           // successful response
        });
    })(data.Contents[i].Key);
    
    function getLogEvents(data, err){
        if(err == undefined){
           zlib.gunzip(data.Body,function(error, buff){
           if(error != null){                                 
              console.log(error)
           }
           else{                                             
            console.log(buff.toString('utf8'))
           }
           context.done(null, null);
         });
        }else{
            console.log(err, err.stack);
        }
        
        //Do final bus logic before exiting
     }
};

I was relieved to finally get plain text output (yes I jumbled some numbers to hide sensitive info):

2017-05-07T18:03:06.319Z 6a92874b-334f-11e7-b021-031f400273ab http 2017-05-07T17:47:12.249487Z app/app-elb/165c3641668dfeh4 50.123.146.312:59275 - -1 -1 -1 503 - 435 788 "GET http://app-elb-9898692007.us-east-1.elb.amazonaws.com:80/ HTTP/1.1" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.96 Safari/537.36" - - arn:aws:elasticloadbalancing:us-east-1:676540590099:targetgroup/app-elb-targetgroup/980f40fc23gc342e "Root=1-590f5da0-37650b96857fe0943aa7900b"