Error: Setting up fake worker failed: "Cannot find module './pdf.worker.js'

157 views Asked by At

I am using pdfjs-dist version 3.11.174 in my nodejs application and i'm creating executable from nodejs

I am packaging app using below commands …

below this json of sea-config.json file

{ "main": "app-out.cjs", "output": "sea-prep.blob" }

node --experimental-sea-config sea-config.json node -e "require('fs').copyFileSync(process.execPath, 'CatExport.exe') npx postject CatExport.exe NODE_SEA_BLOB sea-prep.blob ^ --sentinel-fuse NODE_SEA_FUSE_fce680ab2cc467b6e072b8b5df1996b2 ..

When its packaged as EXE and I run it, I am getting below error

Error: Setting up fake worker failed: "Cannot find module './pdf.worker.js'

Though, it works fine in DEV (without EXE)

import pdfjs from 'pdfjs-dist/legacy/build/pdf.js';
import fs from 'fs';

pdfjs.disableWorker = true;

const pdfToText = async function ({ file, dataBuffer, startPage = 1, endPage = Number.MAX_VALUE, columnSeparator = '', rowSeparator = '\n', renderOptions }) {

    if (file) {
        dataBuffer = fs.readFileSync(file);
    }
    const doc = await pdfjs.getDocument(dataBuffer).promise;
    const result = {
        version: pdfjs.version,
        numPages: doc.numPages,
        metaData: await doc.getMetadata(),
    }
    result.info = result.metaData.info;

    endPage = Math.min(endPage, doc.numPages);

    const text = [];

    for (let pageNumber = startPage; pageNumber <= endPage; pageNumber++) {
        const page = await doc.getPage(pageNumber);

        const textContent = await page.getTextContent(renderOptions);
        let lastY, row = [];
        const pageText = [];
        for (const item of textContent.items) {
            if (lastY !== item.transform[5]) {
                row = [];
                pageText.push(row);
                lastY = item.transform[5];
            }
            row.push(item.str);
        }

        text.push(...pageText.map(row => row.join(columnSeparator)));
    }
    doc.destroy();
    result.text = text.join(rowSeparator);

    return result;
}

export default pdfToText;

this is the code i'm using to extract the pdf file into text

0

There are 0 answers