I'm attempting to extract plain text out of a word doc using word-extractor (https://www.npmjs.com/package/word-extractor) and convert it to a csv.

Unfortunately the csv gets created before the data can be extracted from the document. I'm new to async/await but it seems like the best option. Unfortunately I'm struggling to wrap my callback function in a promise (I think).

var WordExtractor = require("word-extractor");
var extractor = new WordExtractor();

let value = '';

// array of paths to files
const files = glob.sync('./desktop/docs/**/*.doc');

// loop through files   
for (let item of files) {

  // The object returned from the extract() method is a promise.
  let extracted = extractor.extract(item);

  // I need this part to finish before anything else happens. 
  function extractDocs() {
    return new Promise((resolve, reject) => {
      extracted.then(function(doc) {
        value = doc.getBody(); //takes around 1s
      });
    });
  }

// async await function 
async function test() {
return await extractDocs();
}

test() 

//rest of the code that writes a csv from the data extracted from docs 

Apologies for the badly worded question and any help at all is appreciated.

2 Answers

1
spaceout On Best Solutions

Since the package word-extractor already supports promise you can do the following:

// need async function to use await
async function extractDocs() {
  // loop through files
  // values = []; maybe store individual value if you plan to use it?
  for (let item of files) {

    // The object returned from the extract() method is a promise.
    // since it already returns a promise you can await
    // await will pause execution till the promise is resolved, sync like
    let extracted = await extractor.extract(item);
    const value = extracted.getBody();
    // now use value
    // values.push[value]
  }
  // return values so you can use it somewhere
  // return values
}
// execute

// extractDocs returns promise, so to use the return value you can do
async function useExtracted() {
  const values = await extractDocs(); // values is an array if you've returned
  //rest of the code that writes a csv from the data extracted from docs 
}
// execute
useExtracted()

General syntax of async/await is:

async function someThing() {
  try {
    const result = await getSomePromise() // if getSomePromise returns a promise
  } catch (e) {
    // handle error or rethrow
    console.error(e)
  }
}

Note: await is valid only inside async function and anything that is returned by an async function is also wrapped in Promise.

3
Ben. Ayoub On

Use the following :

async function extractDocs() {
  let promise = new Promise((resolve, reject) => {
    extracted.then(function(doc) {
      value = doc.getBody(); //takes around 1s
    });
  });
  let result = await promise; // wait till the promise
  return result
}

To run the code after it use .then():

extractDocs()
  .then(console.log)
  .catch(console.error)