I am working in ReactJs and one of the main aspects of our project is the ability to upload a scorecard and have all of its results parsed and placed into objects. However, due to the nature of these pdfs that get uploaded, there's a LOT of information, an average of 12-14 pages. Most of the information is irrelevant, I usually will only need pages 5-7, but users will be users, and they upload all 12. I am using the pdfParser API which is very good, we're not looking for replacements on that. However, due to how large the file is, if I am somewhere with only half-decent connection, I am hit with a 504 error since the process takes so long. If I have good to great connection, there's no issue. This being said I have two questions:
- Is there a way to extend the amount of time that needs to elapse before my computer gives up on the process
- Is there a way to parse only SOME of the pages that get submitted?
The relevant code will be shown below...
var url = 'https://pdftables.com/api?key=770oukvvx1wl&format=xlsx-single';
const pdfToExcel = (pdfFile) => {
var req = request.post({encoding: null, url: url}, async function (err, resp, body) {
if (!err && resp.statusCode == 200) {
fs.writeFile(`${pdfFile.path}.xlsx`, body, function(err) {
if (err) {
console.log('error writing file');
}
});
} else {
console.log('error retrieving URL');
};
});
var form = req.form();
form.append('file', fs.createReadStream(`./${pdfFile.path}`));
}
const parseExcel = async (file) => {
let workSheetsFromFile;
if (file.path.search(".xlsx") === -1) {
const filePath = await path.resolve(`./${file.path}.xlsx`)
workSheetsFromFile = await xlsx.parse(`./${file.path}.xlsx`);
await fs.unlinkSync(`./${file.path}`)
await fs.unlinkSync(filePath)
return workSheetsFromFile[0].data
}
if (file.path.search(".xlsx") !== -1) {
const filePath = await path.resolve(`./${file.path}`)
workSheetsFromFile = await xlsx.parse(`./${file.path}`);
await fs.unlinkSync(filePath)
return workSheetsFromFile[0].data
}
}