Chrome memory issue - File API + AngularJS

2k views Asked by At

I have a web app that needs to upload large files to Azure BLOB storage. My solution uses HTML5 File API to slice into chunks which are then put as blob blocks, the IDs of the blocks are stored in an array and then the blocks are committed as a blob.

The solution works fine in IE. On 64 bit Chrome I have successfully uploaded 4Gb files but see very heavy memory usage (2Gb+). On 32 bit Chrome the specific chrome process will get to around 500-550Mb and then crash.

I can't see any obvious memory leaks or things I can change to help garbage collection. I store the block IDs in an array so obviously there will be some memory creeep but this shouldn't be massive. It's almost as if the File API is holding the whole file it slices into memory.

It's written as an Angular service called from a controller, I think just the service code is pertinent:

(function() {
    'use strict';

    angular
    .module('app.core')
    .factory('blobUploadService',
    [
        '$http', 'stringUtilities',
        blobUploadService
    ]);

function blobUploadService($http, stringUtilities) {

    var defaultBlockSize = 1024 * 1024; // Default to 1024KB
    var stopWatch = {};
    var state = {};

    var initializeState = function(config) {
        var blockSize = defaultBlockSize;
        if (config.blockSize) blockSize = config.blockSize;

        var maxBlockSize = blockSize;
        var numberOfBlocks = 1;

        var file = config.file;

        var fileSize = file.size;
        if (fileSize < blockSize) {
            maxBlockSize = fileSize;
        }

        if (fileSize % maxBlockSize === 0) {
            numberOfBlocks = fileSize / maxBlockSize;
        } else {
            numberOfBlocks = parseInt(fileSize / maxBlockSize, 10) + 1;
        }

        return {
            maxBlockSize: maxBlockSize,
            numberOfBlocks: numberOfBlocks,
            totalBytesRemaining: fileSize,
            currentFilePointer: 0,
            blockIds: new Array(),
            blockIdPrefix: 'block-',
            bytesUploaded: 0,
            submitUri: null,
            file: file,
            baseUrl: config.baseUrl,
            sasToken: config.sasToken,
            fileUrl: config.baseUrl + config.sasToken,
            progress: config.progress,
            complete: config.complete,
            error: config.error,
            cancelled: false
        };
    };

    /* config: {
      baseUrl: // baseUrl for blob file uri (i.e. http://<accountName>.blob.core.windows.net/<container>/<blobname>),
      sasToken: // Shared access signature querystring key/value prefixed with ?,
      file: // File object using the HTML5 File API,
      progress: // progress callback function,
      complete: // complete callback function,
      error: // error callback function,
      blockSize: // Use this to override the defaultBlockSize
    } */
    var upload = function(config) {
        state = initializeState(config);

        var reader = new FileReader();
        reader.onloadend = function(evt) {
            if (evt.target.readyState === FileReader.DONE && !state.cancelled) { // DONE === 2
                var uri = state.fileUrl + '&comp=block&blockid=' + state.blockIds[state.blockIds.length - 1];
                var requestData = new Uint8Array(evt.target.result);

                $http.put(uri,
                        requestData,
                        {
                            headers: {
                                'x-ms-blob-type': 'BlockBlob',
                                'Content-Type': state.file.type
                            },
                            transformRequest: []
                        })
                    .success(function(data, status, headers, config) {
                        state.bytesUploaded += requestData.length;

                        var percentComplete = ((parseFloat(state.bytesUploaded) / parseFloat(state.file.size)) * 100
                        ).toFixed(2);
                        if (state.progress) state.progress(percentComplete, data, status, headers, config);

                        uploadFileInBlocks(reader, state);
                    })
                    .error(function(data, status, headers, config) {
                        if (state.error) state.error(data, status, headers, config);
                    });
            }
        };

        uploadFileInBlocks(reader, state);

        return {
            cancel: function() {
                state.cancelled = true;
            }
        };
    };

    function cancel() {
        stopWatch = {};
        state.cancelled = true;
        return true;
    }

    function startStopWatch(handle) {
        if (stopWatch[handle] === undefined) {
            stopWatch[handle] = {};
            stopWatch[handle].start = Date.now();
        }
    }

    function stopStopWatch(handle) {
        stopWatch[handle].stop = Date.now();
        var duration = stopWatch[handle].stop - stopWatch[handle].start;
        delete stopWatch[handle];
        return duration;
    }

    var commitBlockList = function(state) {
        var uri = state.fileUrl + '&comp=blocklist';

        var requestBody = '<?xml version="1.0" encoding="utf-8"?><BlockList>';
        for (var i = 0; i < state.blockIds.length; i++) {
            requestBody += '<Latest>' + state.blockIds[i] + '</Latest>';
        }
        requestBody += '</BlockList>';

        $http.put(uri,
                requestBody,
                {
                    headers: {
                        'x-ms-blob-content-type': state.file.type
                    }
                })
            .success(function(data, status, headers, config) {
                if (state.complete) state.complete(data, status, headers, config);
            })
            .error(function(data, status, headers, config) {
                if (state.error) state.error(data, status, headers, config);
                // called asynchronously if an error occurs
                // or server returns response with an error status.
            });
    };

    var uploadFileInBlocks = function(reader, state) {
        if (!state.cancelled) {
            if (state.totalBytesRemaining > 0) {

                var fileContent = state.file.slice(state.currentFilePointer,
                    state.currentFilePointer + state.maxBlockSize);
                var blockId = state.blockIdPrefix + stringUtilities.pad(state.blockIds.length, 6);

                state.blockIds.push(btoa(blockId));
                reader.readAsArrayBuffer(fileContent);

                state.currentFilePointer += state.maxBlockSize;
                state.totalBytesRemaining -= state.maxBlockSize;
                if (state.totalBytesRemaining < state.maxBlockSize) {
                    state.maxBlockSize = state.totalBytesRemaining;
                }
            } else {
                commitBlockList(state);
            }
        }
    };

    return {
        upload: upload,
        cancel: cancel,
        startStopWatch: startStopWatch,
        stopStopWatch: stopStopWatch
    };
};
})();

Are there any ways I can move the scope of objects to help with Chrome GC? I have seen other people mentioning similar issues but understood Chromium had resolved some.

I should say my solution is heavily based on Gaurav Mantri's blog post here:

http://gauravmantri.com/2013/02/16/uploading-large-files-in-windows-azure-blob-storage-using-shared-access-signature-html-and-javascript/#comment-47480

1

There are 1 answers

20
guest271314 On

I can't see any obvious memory leaks or things I can change to help garbage collection. I store the block IDs in an array so obviously there will be some memory creeep but this shouldn't be massive. It's almost as if the File API is holding the whole file it slices into memory.

You are correct. The new Blobs created by .slice() are being held in memory.

The solution is to call Blob.prototype.close() on the Blob reference when processing Blob or File object is complete.

Note also, at javascript at Question also creates a new instance of FileReader if upload function is called more than once.

4.3.1. The slice method

The slice() method returns a new Blob object with bytes ranging from the optional start parameter up to but not including the optional end parameter, and with a type attribute that is the value of the optional contentType parameter.

Blob instances exist for the life of document. Though Blob should be garbage collected once removed from Blob URL Store

9.6. Lifetime of Blob URLs

Note: User agents are free to garbage collect resources removed from the Blob URL Store.

Each Blob must have an internal snapshot state, which must be initially set to the state of the underlying storage, if any such underlying storage exists, and must be preserved through StructuredClone. Further normative definition of snapshot state can be found for Files.

4.3.2. The close method

The close() method is said to close a Blob, and must act as follows:

  1. If the readability state of the context object is CLOSED, terminate this algorithm.
  2. Otherwise, set the readability state of the context object to CLOSED.
  3. If the context object has an entry in the Blob URL Store, remove the entry that corresponds to the context object.

If Blob object is passed to URL.createObjectURL(), call URL.revokeObjectURL() on Blob or File object, then call .close().

The revokeObjectURL(url) static method

Revokes the Blob URL provided in the string url by removing the corresponding entry from the Blob URL Store. This method must act as follows: 1. If the url refers to a Blob that has a readability state of CLOSED OR if the value provided for the url argument is not a Blob URL, OR if the value provided for the url argument does not have an entry in the Blob URL Store, this method call does nothing. User agents may display a message on the error console. 2. Otherwise, user agents must remove the entry from the Blob URL Store for url.

You can view the result of these calls by opening

chrome://blob-internals 

reviewing details of before and after calls which create Blob and close Blob.

For example, from

xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
Refcount: 1
Content Type: text/plain
Type: data
Length: 3

to

xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
Refcount: 1
Content Type: text/plain

following call to .close(). Similarly from

blob:http://example.com/c2823f75-de26-46f9-a4e5-95f57b8230bd
Uuid: 29e430a6-f093-40c2-bc70-2b6838a713bc

An alternative approach could be to send file as an ArrayBuffer or chunks of array buffers. Then re-assemble the file at server.

Or you can call FileReader constructor, FileReader.prototype.readAsArrayBuffer(), and load event of FileReader each once.

At load event of FileReader pass ArrayBuffer to Uint8Array, use ReadableStream, TypedArray.prototype.subarray(), .getReader(), .read() to get N chunks of ArrayBuffer as a TypedArray at pull from Uint8Array. When N chunks equaling .byteLength of ArrayBuffer have been processed, pass array of Uint8Arrays to Blob constructor to recombine file parts into single file at browser; then send Blob to server.

<!DOCTYPE html>
<html>

<head>
</head>

<body>
  <input id="file" type="file">
  <br>
  <progress value="0"></progress>
  <br>
  <output for="file"><img alt="preview"></output>
  <script type="text/javascript">
    const [input, output, img, progress, fr, handleError, CHUNK] = [
      document.querySelector("input[type='file']")
      , document.querySelector("output[for='file']")
      , document.querySelector("output img")
      , document.querySelector("progress")
      , new FileReader
      , (err) => console.log(err)
      , 1024 * 1024
    ];

    progress.addEventListener("progress", e => {
      progress.value = e.detail.value;
      e.detail.promise();
    });

    let [chunks, NEXT, CURR, url, blob] = [Array(), 0, 0];

    input.onchange = () => {
      NEXT = CURR = progress.value = progress.max = chunks.length = 0;
      if (url) {
        URL.revokeObjectURL(url);
        if (blob.hasOwnProperty("close")) {
          blob.close();
        }
      }

      if (input.files.length) {
        console.log(input.files[0]);
        progress.max = input.files[0].size;
        progress.step = progress.max / CHUNK;
        fr.readAsArrayBuffer(input.files[0]);
      }

    }

    fr.onload = () => {
      const VIEW = new Uint8Array(fr.result);
      const LEN = VIEW.byteLength;
      const {type, name:filename} = input.files[0];
      const stream = new ReadableStream({
          pull(controller) {
            if (NEXT < LEN) {
              controller
              .enqueue(VIEW.subarray(NEXT, !NEXT ? CHUNK : CHUNK + NEXT));
               NEXT += CHUNK;
            } else {
              controller.close();
            }
          },
          cancel(reason) {
            console.log(reason);
            throw new Error(reason);
          }
      });

      const [reader, processData] = [
        stream.getReader()
        , ({value, done}) => {
            if (done) {
              return reader.closed.then(() => chunks);
            }
            chunks.push(value);
            return new Promise(resolve => {
              progress.dispatchEvent(
                new CustomEvent("progress", {
                  detail:{
                    value:CURR += value.byteLength,
                    promise:resolve
                  }
                })
              );                
            })
            .then(() => reader.read().then(data => processData(data)))
            .catch(e => reader.cancel(e))
        }
      ];

      reader.read()
      .then(data => processData(data))
      .then(data => {
        blob = new Blob(data, {type});
        console.log("complete", data, blob);
        if (/image/.test(type)) {
          url = URL.createObjectURL(blob);
          img.onload = () => {
            img.title = filename;
            input.value = "";
          }
          img.src = url;
        } else {
          input.value = "";
        }             
      })
      .catch(e => handleError(e))

    }
  </script>

</body>

</html>

plnkr http://plnkr.co/edit/AEZ7iQce4QaJOKut71jk?p=preview


You can also use utilize fetch()

fetch(new Request("/path/to/server/", {method:"PUT", body:blob}))

To transmit body for a request request, run these steps:

  1. Let body be request’s body.
  2. If body is null, then queue a fetch task on request to process request end-of-body for request and abort these steps.

  3. Let read be the result of reading a chunk from body’s stream.

    • When read is fulfilled with an object whose done property is false and whose value property is a Uint8Array object, run these substeps:

      1. Let bytes be the byte sequence represented by the Uint8Array object.
      2. Transmit bytes.

      3. Increase body’s transmitted bytes by bytes’s length.

      4. Run the above step again.

    • When read is fulfilled with an object whose done property is true, queue a fetch task on request to process request end-of-body for request.

    • When read is fulfilled with a value that matches with neither of the above patterns, or read is rejected, terminate the ongoing fetch with reason fatal.

See also