Tesseract.js error handling - I can't catch the error in worker.recognize()

676 views Asked by At

My app is simple, there are lots of image files in img directory and app OCRs them one by one using tesseract.js with Node JS. However some images cause error and the app crashes.

I want my app to work continiously until it finishes all files in the toOCR folder, move successful files to 'completed' folder, and failed files to 'error' folder.

My code is shown below. I tried try-catch and then-catch everywhere but didn't work. So I restart my app manually when an error occurs. Is there anyway that error can be handled and app can run uninterrupted?

import { createWorker, PSM } from 'tesseract.js';
import * as fs from 'fs';
import { exec } from 'child_process';

// toOCR file list
var belgeadlari = fs.readdirSync('./OCRBekleyen3k');
//Tesseract worker
const worker = createWorker(
    {
        errorHandler: (err) => {
            console.log(err);
            console.log('\u0007');
            if (err) throw err;
        },
        logger: (m) => {console.log(m);}
    }

);

(async () => {

    await worker.load();
    await worker.loadLanguage('tur');
    await worker.initialize('tur');

    //loop to OCR one by one
    for (let i = 0; i < belgeadlari.length; i++) {
        try {
            console.log("Processing: " + belgeadlari[i]);

            const { data: { text } } = await worker.recognize("./OCRBekleyen3k/" + belgeadlari[i], {
                tessedit_pageseg_mode: PSM.AUTO_OSD,
                errorHandler: (err) => {
                    if (err) throw err;
                    console.log('\u0007');
                }
            })
                .catch(e => {
                    console.log('\u0007');
                    if (e) throw e;
                });

            //ON success, write text to .txt 
            let fileTitle = "\n\n ******** \n\n" + belgeadlari[i] + "\n" + text;
            fs.appendFile('sonuclar2.txt', fileTitle, function (err) {
                if (err) throw err;
                console.log(belgeadlari[i] + ' Saved!');
                console.log('\u0007');
                exec(`rundll32 user32.dll,MessageBeep`);
            });
            // and move the successful file to completed folder
            fs.rename("./OCRBekleyen3k/" + belgeadlari[i], "./tamamlanan/" + belgeadlari[i], function (err) {
                if (err) throw err
                console.log('Successfully renamed - AKA moved!')
            });

        } catch (error) {
            //on error, move the failed file to errorFile folder and continue
            fs.rename("./OCRBekleyen3k/" + belgeadlari[i], "./OCRHata/" + belgeadlari[i], function (err) {
                if (err) throw err
                console.log('Successfully renamed - AKA moved!')
            });
            continue;
        }
    }

    //loop ends
    await worker.terminate();

})();

and error code is:

# Fatal error in , line 0

# Fatal process out of memory: base::SmallVector::Grow


#FailureMessage Object: 000000CFB48FE070
1: 00007FF77B37E3EF v8::internal::CodeObjectRegistry::~CodeObjectRegistry+111951
2: 00007FF77B29BE1F v8::CFunctionInfo::HasOptions+7039
3: 00007FF77BF72C62 V8_Fatal+162
4: 00007FF77B7273D9 v8::internal::wasm::GenerateLiftoffDebugSideTable+122057
5: 00007FF77B9F132D v8::base::TimeDelta::operator!=+35261
6: 00007FF77B9EFF8A v8::base::TimeDelta::operator!=+30234
7: 00007FF77BBC8D90 v8::internal::Builtins::builtin_handle+71696
8: 00007FF77BBC92E3 v8::internal::Builtins::builtin_handle+73059
9: 00007FF77BCD2C11 v8::internal::SetupIsolateDelegate::SetupHeap+473105
10: 00007FF77BC6A57F v8::internal::SetupIsolateDelegate::SetupHeap+45439
11: 00007FF77BC993A4 v8::internal::SetupIsolateDelegate::SetupHeap+237476
12: 00007FF77BC6A57F v8::internal::SetupIsolateDelegate::SetupHeap+45439
13: 00007FF77BC6874F v8::internal::SetupIsolateDelegate::SetupHeap+37711
14: 00007FF77BC6834B v8::internal::SetupIsolateDelegate::SetupHeap+36683
15: 00007FF77BB25E33 v8::internal::Execution::CallWasm+1507
16: 00007FF77BB2572F v8::internal::Execution::Call+191
17: 00007FF77BC2C6CB v8::Function::Call+475
18: 00007FF77B3ACC41 node::CallbackScope::~CallbackScope+1745
19: 00007FF77B3A4BC4 v8::internal::compiler::Operator::EffectOutputCount+228
20: 00007FF77B22BEE3 v8::internal::wasm::WasmCode::code_comments_offset+20707
21: 00007FF77B22CA5D v8::internal::wasm::WasmCode::code_comments_offset+23645
22: 00007FF77B227379 v8::internal::wasm::WasmCode::code_comments_offset+1401
23: 00007FF77B3CEFF2 uv_thread_self+3986
24: 00007FF77B3CF067 uv_thread_self+4103
25: 00007FF77B3D1D6F uv_pipe_pending_type+3343
26: 00007FF77B3DCB3C uv_loop_init+940
27: 00007FF77B3DCE3A uv_run+202
28: 00007FF77B3ABF45 node::SpinEventLoop+309
29: 00007FF77B2C6403 cppgc::internal::NormalPageSpace::linear_allocation_buffer+53747
30: 00007FF77B3436AD node::Start+221
31: 00007FF77B16878C RC4_options+347788
32: 00007FF77C1C70B8 v8::internal::compiler::RepresentationChanger::Uint32OverflowOperatorFor+14472
33: 00007FFDB0947034 BaseThreadInitThunk+20
34: 00007FFDB2922651 RtlUserThreadStart+33
1

There are 1 answers

0
Sachin Sondh On

The reason why your catch block is failing is because you are throwing the exception from inside the scope which is not recommended. You can simply return the error from the errorHandler method and catch it while using recognize.

const worker = createWorker({
    
    logger: (m) => {console.log(m);},
    errorHandler: (err) => {
      console.log(err)
      
      return err
    }
});

This way you can easily catch it either in a try catch block or with the .catch() method.