Data from one async execution of a function being mixed with another execution of the function

84 views Asked by At

I have been writing a web scraping API built on top of NodeJS, using Cheerio, node-fetch and fs-extra. In the following piece of code, I call the getReport method, for each string in the config.supportedMountains array. For each of the items, I want to run them through the fetchAndStore function, which makes the html request, runs it through the specific parser, and then stores the json results.

// const fs = require('fs-extra');
const _ = require('lodash');
// const Promise = require('promise');
const schedule = require('node-schedule');

const fetchAndStore = require('./fetchAndStore.js');

const config = require('../config.js');

exports.run = function() {
  schedule.scheduleJob('*/20 * * * * *', function() {
    // Get the most recent reports
    // And write them to storage
    _.forEach(config.supportedMountains, function(fName) {
      getReport(fName);
    });
  });
};

/**
 * Gets the lift statuses for every mountain
 * @param {string} fName the file name of the mountain
 * @return {promise} the promise resolved when the file is written
 */
function getReport(fName) {
  return fetchAndStore.run(fName);
}

Here you can see the fetch and store file. This file, takes the fName, and requires the corresponding staticData file. This file contains the url to fetch the page with. Now, the html request is made, and it is run through the parser. Then, with the resulting parsed json, this goes through a few steps to store it. The final output should be two files, one which stores the reports and the other which stores the historicSnowfall, most of the logic in the fs.outputJson functions is to deal with missing files.

const fs = require('fs-extra');
const fetch = require('node-fetch');

exports.run = (function(fName) {
  // Get the staticJson
  let staticJson = require(`../staticData/mountains/${fName}.json`);
  // console.log(staticJson.id)

  // Output the report
  return fetch(staticJson.urls.reportFetchUrl).then(function(res) {
    return res.text();
  }).then(function(html) {
    // Run the html through the parser
    let parser = require(`../scrapers/${staticJson.sName}.js`);
    parsed = parser.run(html);
    // Output the report
    return fs.outputJson(
      `data/reports/${staticJson.id}.json`,
      parsed.report
    ).then(function() {
      // console.log(parsed.report.lifts[0].name);
      // Once output is completed
      if (parsed.snowHistory) {
        // If snow history is defined
        // Read the old file
        return fs.readJson(
          `data/snowHistory/${staticJson.id}.json`
        ).then(function(oldJson) {
          // If the date of the old json is todays date
          if (oldJson[0].date === parsed.snowHistory.date) {
            // Replace the first element in array
            oldJson[0] = parsed.snowHistory;
            return fs.outputJson(
              `data/snowHistory/${staticJson.id}.json`,
              oldJson
            );
          } else {
            // If this is a fresh entry
            oldJson.unshift(parsed.snowHistory);
            // If the record does not exist
            return fs.outputJson(
              `data/snowHistory/${staticJson.id}.json`,
              oldJson
            );
          }
        }).catch(function(e) {
          // If the old file cannot be read
          if (e.code === 'ENOENT') {
            // If the file does not exist
            // Write brand new file
            return fs.outputJson(
              `data/snowHistory/${staticJson.id}.json`,
              [parsed.snowHistory]
            );
          }
        });
      }
    });
  });
});

For some reason, when the scraper is running, about 1/4 of the time, the data from one execution of fetchAndStore will get mixed up with the data from another execution of fetchAndStore, meaning the wrong data will get written into the file system. How is this possible? I figured that since I am making the calls to fetchAndStore.run() separately, data would not be able to get mixed up. Any idea why this is happening?

1

There are 1 answers

1
Evert On

The first thing I see is that parsed is globally scoped. Could that be the issue? As an aside, if you're nesting that deeply, you should really consider breaking this up in a couple of functions.