How to inspect network traffic and get the URL of resource requests?

11k views Asked by At

I want to monitor the network of a page and get all the URLs of the JavaScript network events, similar to what PhantomJS' page.onResourceRequested is doing, but I couldn't figure it out how to do this with Google Chrome's Puppeteer.

I've been dabbling with Google Chrome's puppeteer, but I couldn't figure out how to make it work, as the output of it looks like this:

Page {
    domain: null,
    _events: {
        request: [Function]
    },
    _eventsCount: 1,
    _maxListeners: undefined,
    _client: Session {
        domain: null,
        _events: {
            'Page.frameAttached': [Function],
            'Page.frameNavigated': [Function],
            'Page.frameDetached': [Function],
            'Runtime.executionContextCreated': [Function],
            'Network.requestWillBeSent': [Function: bound _onRequestWillBeSent],
            'Network.requestIntercepted': [Function: bound _onRequestIntercepted],
            'Network.responseReceived': [Function: bound _onResponseReceived],
            'Network.loadingFinished': [Function: bound _onLoadingFinished],
            'Network.loadingFailed': [Function: bound _onLoadingFailed],
            'Page.loadEventFired': [Function],
            'Runtime.consoleAPICalled': [Function],
            'Page.javascriptDialogOpening': [Function],
            'Runtime.exceptionThrown': [Function],
            'Security.certificateError': [Function],
            'Inspector.targetCrashed': [Function]
        },
        _eventsCount: 15,
        _maxListeners: undefined,
        _lastId: 14,
        _callbacks: Map {},
        _connection: Connection {
            domain: null,
            _events: {},
            _eventsCount: 0,
            _maxListeners: undefined,
            _url: 'ws://127.0.0.1:65343/devtools/browser/ca214df4-4357-4b8f-8552-a1524d6652ff',
            _lastId: 17,
            _callbacks: Map {},
            _delay: 0,
            _ws: [Object],
            _sessions: [Object]
        },
        _targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
        _sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
    },
    _keyboard: Keyboard {
        _client: Session {
            domain: null,
            _events: [Object],
            _eventsCount: 15,
            _maxListeners: undefined,
            _lastId: 14,
            _callbacks: Map {},
            _connection: [Object],
            _targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
            _sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
        },
        _modifiers: 0,
        _pressedKeys: Set {}
    },
    _mouse: Mouse {
        _client: Session {
            domain: null,
            _events: [Object],
            _eventsCount: 15,
            _maxListeners: undefined,
            _lastId: 14,
            _callbacks: Map {},
            _connection: [Object],
            _targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
            _sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
        },
        _keyboard: Keyboard {
            _client: [Object],
            _modifiers: 0,
            _pressedKeys: Set {}
        },
        _x: 0,
        _y: 0,
        _button: 'none'
    },
    _frameManager: FrameManager {
        domain: null,
        _events: {
            frameattached: [Function],
            framedetached: [Function],
            framenavigated: [Function]
        },
        _eventsCount: 3,
        _maxListeners: undefined,
        _client: Session {
            domain: null,
            _events: [Object],
            _eventsCount: 15,
            _maxListeners: undefined,
            _lastId: 14,
            _callbacks: Map {},
            _connection: [Object],
            _targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
            _sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
        },
        _mouse: Mouse {
            _client: [Object],
            _keyboard: [Object],
            _x: 0,
            _y: 0,
            _button: 'none'
        },
        _frames: Map {
            '232.1' => [Object]
        },
        _mainFrame: Frame {
            _client: [Object],
            _mouse: [Object],
            _parentFrame: null,
            _url: 'http://mytestdomain.com/',
            _id: '232.1',
            _defaultContextId: 4,
            _waitTasks: Set {},
            _childFrames: Set {},
            _name: undefined,
            _loadingFailed: false
        }
    },
    _networkManager: NetworkManager {
        domain: null,
        _events: {
            request: [Function],
            response: [Function],
            requestfailed: [Function],
            requestfinished: [Function]
        },
        _eventsCount: 4,
        _maxListeners: undefined,
        _client: Session {
            domain: null,
            _events: [Object],
            _eventsCount: 15,
            _maxListeners: undefined,
            _lastId: 14,
            _callbacks: Map {},
            _connection: [Object],
            _targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
            _sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
        },
        _requestIdToRequest: Map {},
        _interceptionIdToRequest: Map {
            null => [Object], 'id-1' => [Object], 'id-2' => [Object], 'id-3' => [Object]
        },
        _extraHTTPHeaders: Map {},
        _requestInterceptionEnabled: true,
        _requestHashToRequestIds: Multimap {
            _map: [Object]
        },
        _requestHashToInterceptions: Multimap {
            _map: Map {}
        }
    },
    _emulationManager: EmulationManager {
        _client: Session {
            domain: null,
            _events: [Object],
            _eventsCount: 15,
            _maxListeners: undefined,
            _lastId: 14,
            _callbacks: Map {},
            _connection: [Object],
            _targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
            _sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
        },
        _emulatingMobile: false,
        _injectedTouchScriptId: null
    },
    _tracing: Tracing {
        _client: Session {
            domain: null,
            _events: [Object],
            _eventsCount: 15,
            _maxListeners: undefined,
            _lastId: 14,
            _callbacks: Map {},
            _connection: [Object],
            _targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
            _sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
        },
        _recording: false,
        _path: ''
    },
    _pageBindings: Map {},
    _ignoreHTTPSErrors: false,
    _screenshotTaskQueue: TaskQueue {
        _chain: Promise {
            undefined
        }
    },
    _viewport: {
        width: 800,
        height: 600
    }
}

Can you please tell me how can I get all the URLs of the JavaScript network events with Puppeteer?

3

There are 3 answers

1
ebidel On BEST ANSWER

Check out the sample that intercepts image requests. Easy to modify that to look at other types of resource requests:

await page.setRequestInterceptionEnabled(true);
page.on('request', request => {
  if (/\.js$/i.test(request.url)) {
    // request for js resource
  }
  request.continue();
});
await page.goto('https://example.com');
0
theflowersoftime On

I think a solution more accurate to the question, and that does not interfere with the traffic, is using Page.on() listeners.

Something like:

page.on('request', (req) => console.log(req)); // 'requestFinished' and 'requestFailed' are other options
page.on('response', (res) => console.log(res));
0
Dionysi Yakiza On

Came across this post and SetRequestInterceptionEnabled has been renamed to

page.setRequestInterception(value)

Here is a piece of code i found on the Documentation:

const puppeteer = require('puppeteer');

puppeteer.launch().then(async browser => {
  const page = await browser.newPage();
  await page.setRequestInterception(true);
  page.on('request', interceptedRequest => {
    if (interceptedRequest.url.endsWith('.png') || interceptedRequest.url.endsWith('.jpg'))
      interceptedRequest.abort();
    else
      interceptedRequest.continue();
  });
  await page.goto('https://example.com');
  await browser.close();
});

NOTE Enabling request interception disables page caching.

Here is the URL for the puppeteer Documentation: Puppeteer Documentation