Batch Update Bigquery results into google sheets(about 50k results)

969 views Asked by At

I am trying to use Script editor(Apps script) to stream BigQuery results into google sheets. I'm following these [document][1] .

The issue I have, the data doesn't load fully, it hangs. I have a large number of rows(more than 12 rows). I think I need to optimise the "Append the results" section with some sort of batch update. Now it's a loop which i'm guessing is not very efficient. I cannot figure it out. I tried to use ".next()" but get an error, function didn't exist. I use these [document][2].

How can I optimise the append results section? Heres the whole code(later i've included just the part im looking to modify):

  
  var ui = SpreadsheetApp.getUi();
  ui.createMenu("Update")
      .addItem('Update','update')
      .addToUi();
}


function update() {
  
  run1("\"Filter1\"","\"FilterA\"","Sheet1);
  run1("\"Filter2\"","\"FilterB\"","Sheet2");

  
};


function run1(filter1,filter2,output) {
  
  var projectId = 'xxx';
  var request = {
    useLegacySql: false,
    useQueryCache: false,
    query: 'select * from table ' +
           'where a1.col1 = ' + filter1 + ' and a1.col2  in ( ' + filter2 + ' ); ' 
     
  };
  
  var queryResults = BigQuery.Jobs.query(request, projectId);
  var jobId = queryResults.jobReference.jobId;
  


  // Check on status of the Query Job.
  var sleepTimeMs = 500;
  while (!queryResults.jobComplete) {
    Utilities.sleep(sleepTimeMs);
    sleepTimeMs *= 2;
    queryResults = BigQuery.Jobs.getQueryResults(projectId,jobId);
  }

  // Get all the rows of results.
  var rows = queryResults.rows;
  while (queryResults.pageToken) {
    queryResults = BigQuery.Jobs.getQueryResults(projectId,jobId, {
      pageToken: queryResults.pageToken
    });
    rows = rows.concat(queryResults.rows);
  }

  if (rows) {
    var spreadsheet = SpreadsheetApp.getActive();
    var sheet = spreadsheet.getSheetByName(output);
    sheet.clearContents();
    
    // Append the headers.
    var headers = queryResults.schema.fields.map(function(field) {
      return field.name;
    });
    sheet.appendRow(headers);

    spreadsheet.getSheetByName(output).getRange("C1").setValue("Modified_col_name");

    // Append the results.
    var data = new Array(rows.length);
    for (var i = 0; i < rows.length; i++) {
      var cols = rows[i].f;
      data[i] = new Array(cols.length);
      for (var j = 0; j < cols.length; j++) {
        data[i][j] = cols[j].v;
      }
    }

   


    sheet.getRange(2, 1, rows.length, headers.length).setValues(data);

    Logger.log("Results spreadsheet created: %s",
        spreadsheet.getUrl());
  } else {
    Logger.log("No rows returned.");
  }
};

Specifically this part of the code:

for (var i = 0; i < rows.length; i++) {
      var cols = rows[i].f;
      data[i] = new Array(cols.length);
      for (var j = 0; j < cols.length; j++) {
        data[i][j] = cols[j].v;
      }
    }
2

There are 2 answers

1
Tanaike On BEST ANSWER

I believe your goal is as follows.

  • You want to reduce the process cost of your script.

In this case, how about the following modification?

Modified script:

Before you use this script, please enable Sheets API at Advanced Google services.

From:

if (rows) {
  var spreadsheet = SpreadsheetApp.getActive();
  var sheet = spreadsheet.getSheetByName(output);
  sheet.clearContents();
  
  // Append the headers.
  var headers = queryResults.schema.fields.map(function(field) {
    return field.name;
  });
  sheet.appendRow(headers);

  spreadsheet.getSheetByName(output).getRange("C1").setValue("Modified_col_name");

  // Append the results.
  var data = new Array(rows.length);
  for (var i = 0; i < rows.length; i++) {
    var cols = rows[i].f;
    data[i] = new Array(cols.length);
    for (var j = 0; j < cols.length; j++) {
      data[i][j] = cols[j].v;
    }
  }

 


  sheet.getRange(2, 1, rows.length, headers.length).setValues(data);

  Logger.log("Results spreadsheet created: %s",
      spreadsheet.getUrl());
} else {
  Logger.log("No rows returned.");
}

To:

if (rows) {
  var headers = queryResults.schema.fields.map(function (field) {
    return field.name;
  });
  var data = [headers, ...rows.map(({ f }) => f.map(({ v }) => v || ""))];
  var spreadsheet = SpreadsheetApp.getActive();
  var sheet = spreadsheet.getSheetByName(output);
  sheet.clearContents();
  SpreadsheetApp.flush();
  Sheets.Spreadsheets.Values.update({ values: data }, spreadsheet.getId(), output, { valueInputOption: "USER_ENTERED" });
  // spreadsheet.getSheetByName(output).getRange("C1").setValue("Modified_col_name"); // I'm not sure about this line.
  Logger.log("Results spreadsheet created: %s", spreadsheet.getUrl());
} else {
  Logger.log("No rows returned.");
}

Reference:

4
Raul Saucedo On

You need to use more filters in the query or specify less number of columns. Because one of the errors you could have is that the size of the file is more than 10MB. Because the limit of rows is 50,000 rows in this case you get an error between 12,000 or 15,000 rows. You can Split the data in several google sheets.

Here you can see some solutions and you can also see more documentation in this link.

Your query results might be too large. Your query will fail if:

Pivot tables have over 50K results. To reduce your query results, you can:

  • Use filters to limit results
  • Limit the number of rows per breakout
  • Turn off “show totals” when adding rows, columns, values, and filters
  • Results’ size is more than 10MB. To reduce size, return fewer rows or columns.