I've adapted the Amazon example of resizing a photo in lambda to create multiple thumbnail sizes and run in parallel.
My code runs fine locally in a few seconds, but in the the lambda cloud, it will not run in parallel, throwing an error after resizing the first thumbnail size.. and if I switch it to be serial instead of parallel it takes around 60 seconds to run serially.
Why would running resize code in parallel in lambda cause the stream yields empty buffer error. How can I increase the performance so that I can create the sizes in a few seconds but still get good value and efficiency out of lambda in terms of processor cost?
// dependencies
var async = require('async');
var AWS = require('aws-sdk');
var gm = require('gm')
.subClass({ imageMagick: true }); // Enable ImageMagick integration.
var util = require('util');
// constants
var SIZES = [100, 320, 640];
// get reference to S3 client
var s3 = new AWS.S3();
exports.handler = function(event, context) {
// Read options from the event.
console.log("Reading options from event:\n", util.inspect(event, {depth: 5}));
var srcBucket = event.Records[0].s3.bucket.name;
var srcKey = event.Records[0].s3.object.key;
var dstBucket = srcBucket + "-resized";
// Infer the image type.
var typeMatch = srcKey.match(/\.([^.]*)$/);
if (!typeMatch) {
console.error('unable to infer image type for key ' + srcKey);
return context.done();
}
var imageType = typeMatch[1];
if (imageType != "jpg" && imageType != "png") {
console.log('skipping non-image ' + srcKey);
return context.done();
}
// Sanity check: validate that source and destination are different buckets.
if (srcBucket == dstBucket) {
console.error("Destination bucket must not match source bucket.");
return context.done();
}
// Download the image from S3
s3.getObject({
Bucket: srcBucket,
Key: srcKey
},
function(err, response){
if (err)
return console.error('unable to download image ' + err);
var contentType = response.ContentType;
var original = gm(response.Body);
original.size(function(err, size){
if(err)
return console.error(err);
//transform, and upload to a different S3 bucket.
async.each(SIZES,
function (max_size, callback) {
resize_photo(size, max_size, imageType, original, srcKey, dstBucket, contentType, callback);
},
function (err) {
if (err) {
console.error(
'Unable to resize ' + srcBucket +
' due to an error: ' + err
);
} else {
console.log(
'Successfully resized ' + srcBucket
);
}
context.done();
});
});
});
};
//wrap up variables into an options object
var resize_photo = function(size, max_size, imageType, original, srcKey, dstBucket, contentType, done) {
var dstKey = max_size + "_" + srcKey;
// transform, and upload to a different S3 bucket.
async.waterfall([
function transform(next) {
// Infer the scaling factor to avoid stretching the image unnaturally.
var scalingFactor = Math.min(
max_size / size.width,
max_size / size.height
);
var width = scalingFactor * size.width;
var height = scalingFactor * size.height;
// Transform the image buffer in memory.
original.resize(width, height)
.toBuffer(imageType, function(err, buffer) {
if (err) {
next(err);
} else {
next(null, buffer);
}
});
},
function upload(data, next) {
// Stream the transformed image to a different S3 bucket.
s3.putObject({
Bucket: dstBucket,
Key: dstKey,
Body: data,
ContentType: contentType
},
next);
}
], function (err) {
console.log('finished resizing ' + dstBucket + '/' + dstKey);
if (err) {
console.error(err)
;
} else {
console.log(
'Successfully resized ' + dstKey
);
}
done(err);
}
);
};
I just ran into the same issue tonight.
Although there may be something else you can do, I updated the memory of the lambda task and the buffer issue went away.
I'm resizing images around 2.1mb and 5000x3000 into 3 smaller sizes.
Hope that helps