Puppeteer Async Await Loop in NodeJS

1.7k views Asked by At

I am trying to make a script that :

  1. Grabs all urls from a sitemap
  2. Takes a screenshot of it with puppeteer

I am currently trying to understand how to code asynchronously but I still have troubles with finding the right coding pattern for this problem. Here is the code I currently have :

// const spider = require('./spider');
const Promise = require('bluebird');
const puppeteer = require('puppeteer');
const SpiderConstructor = require('sitemapper');

async function crawl(url, timeout) {
  const results = await spider(url, timeout);
  await Promise.each(results, async (result, index) => {
    await screen(result, index);
  });
}

async function screen(result, index) {
  const browser = await puppeteer.launch();
  console.log('doing', index);
  const page = await browser.newPage();
  await page.goto(result);
  const path = await 'screenshots/' + index + page.title() + '.png';
  await page.screenshot({path});
  browser.close();
}

async function spider(url, timeout) {
  const spider = await new SpiderConstructor({
    url: url,
    timeout: timeout
  });
  const data = await spider.fetch();
  console.log(data.sites.length);
  return data.sites;
};

crawl('https://www.google.com/sitemap.xml', 15000)
  .catch(err => {
    console.error(err);
  });

I am having the following problems :

  • The length of the results array is not a constant, it varies every time I launch the script, which I guess resides in the fact it is not fully resolved when I display it, but I thought the whole point of await was so that we are guarantied that on next line the promise is resolved.
  • The actual screenshotting action part of the script doesn't work half the time and I am pretty sure I have unresolved promises but I have no of the actual pattern for looping over an async function, right now it seems like it does a screenshot after the other (linear and incremental) but I get alot of duplicates.

Any help is appreciated. Thank you for your time

0

There are 0 answers