How can I scrape this websocket?

224 views Asked by At

I'm trying to scrape a websocket by using node.js but it's being impossible to do it for me.

I'm trying to scrape the websocket from this website: https://dexscreener.com/new-pairs

That actually open a websocket pointing to wss://io.dexscreener.com/dex/screener/pairs/h24/1?rankBy[key]=trendingScoreH6&rankBy[order]=desc&filters[pairAge][max]=24&filters[liquidity][min]=1000 (for example)

The problem is that websocket are protected by Cloudflare, and I tried like 30-40 different ways and still didn't get the correct solution for this.

This is the code i'm trying:

const WebSocket = require('ws');
const crypto = require('crypto');
const tls = require('tls');
const UserAgent = require('user-agents');

class ListPage {
    constructor() {
        this.base_url = 'wss://io.dexscreener.com/dex/screener/pairs/h24/1?rankBy[key]=trendingScoreH6&rankBy[order]=desc&filters[chainIds][0]=solana&filters[liquidity][min]=1000&filters[pairAge][max]=24';
    }

    generateWebSocketKey() {
        const buffer = crypto.randomBytes(16);
        const key = buffer.toString('base64');
        return key;
    }

    openConnection(num) {
        const header = {
            'Sec-WebSocket-Key': this.generateWebSocketKey(),
            'Sec-WebSocket-Version': '1',
            'Sec-WebSocket-Extensions': 'permessage-deflate; client_max_window_bits',
            'Origin': 'wss://io.dexscreener.com',
            'User-Agent': UserAgent.random().toString(),
        };

        const url = this.base_url;
        console.info(`Request ${num}: ${url}`, header);

        const defaultCiphers = tls.DEFAULT_CIPHERS.split(':');
        const shuffledCiphers = [
            defaultCiphers[0],
            defaultCiphers[2],
            defaultCiphers[1],
            ...defaultCiphers.slice(3)
        ].join(':');

        const ws = new WebSocket(url, {
            headers: header,
            ciphers: shuffledCiphers,
        });

        ws.on('open', function open() {
            console.log('Connected through proxy');
        });

        ws.on('message', function incoming(data) {
            console.log(data);
        });

        ws.on('error', function error(err) {
            console.log('Error: ', err);
        });

        return ws;
    }
}

(async () => {
    // Usage example with proxy
    const listPage = new ListPage();
    const wsConnection = listPage.openConnection(1);
    console.log(`Listening..`);
})();

Anyway, it always return 403 (Cloudflare protected). Do you have any idea on mind on how to make this work? I'm working with node.js

0

There are 0 answers