Why is Xpath not yielding correct number of elements in Selenium?

59 views Asked by At

I'm trying to scrape data from a website using Selenium and Xpath, but I'm running into an odd issue.

Website link: dexcheck

Expected Result: When I view the page, I expect the Xpath to yield 16 "Realized ROI %" data points.

Actual Result: The Xpath sometimes gives only 11 or even fewer results.

At 100% screen zoom, the xpath doesn't seem to be working as seen in the ss (only 11 elements- but actually its 16)

Observations:

This problem isn't consistent. Sometimes it works, other times it doesn't. Interestingly, if I zoom out my browser view to 25%, the problem seems to vanish when using Chrome DevTools. But the same doesn't replicate when using Selenium, even if the browser is initiated with a 25% zoom out.

zooming out to 25% trick seems to work, but only on chrome dev tool, but not on actual execution

I have ensured that I'm scrolling down to load all elements. Here's my scrolling mechanism:

def scroll_to_load(driver, container_xpath):
    try:
        inside_table = driver.find_element(By.XPATH,'((//div[@class="crypto-pnl-table"]/div)[3]/div/p)[1]')
        inside_table.click()
    except:
        pass
    while True:
        old_page = driver.page_source
        actions = ActionChains(driver)
        for _ in range(16):
            actions.send_keys(Keys.PAGE_DOWN).perform()
        time.sleep(5)
        new_page = driver.page_source
        if new_page == old_page:
            print('new page == old page')
            break

This Xpath and method worked perfectly in the past, but it stopped after a recent update. I'm not sure if the website structure changed or if I'm missing something.

The full code is here below!


import time
import pandas as pd
from scrapy import Selector
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from seleniumbase import Driver

def scroll_to_load(driver, container_xpath):
    try:
        inside_table = driver.find_element(By.XPATH, '((//div[@class="crypto-pnl-table"]/div)[3]/div/p)[1]')
        inside_table.click()
    except Exception:
        pass

    while True:
        old_page = driver.page_source
        actions = ActionChains(driver)
        for _ in range(16):
            actions.send_keys(Keys.PAGE_DOWN).perform()
        time.sleep(5)
        new_page = driver.page_source
        if new_page == old_page:
            break

def get_driver():
    options = Options()
    options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36")
    options.set_capability("pageLoadStrategy", "normal")
    options.add_argument("window-size=1200x800")
    options.add_argument("--enable-javascript")
    options.add_argument("--headless")
    prefs = {"profile.managed_default_content_settings.images": 2, "permissions.default.stylesheet": 2}
    options.add_experimental_option("prefs", prefs)
    driver = Driver(uc=True)
    driver.maximize_window()
    return driver

def exporter(row):
    file_name = 'DexCheck.csv'
    if not exporter.switch:
        pd.DataFrame(row, index=[0]).to_csv(file_name, index=False, mode='a')
    else:
        pd.DataFrame(row, index=[0]).to_csv(file_name, index=False, mode='a', header=False)
    exporter.switch = not exporter.switch
exporter.switch = True

def scraper(address, driver):
    data_combined = {'Wallet Address': address}
    for x in [30, 7, 1]:
        driver.get(f'https://dexcheck.ai/app/address-analyzer/{address}?chain=eth&timeframe={x}')
        time.sleep(25 if x == 30 else 15)
        container_xpath = '//div[@class="crypto-pnl-table"]'
        scroll_to_load(driver, container_xpath)
        response = Selector(text=driver.page_source)
        data_combined.update(ScrapeData(response, x))
    exporter(data_combined)

def ScrapeData(response, x):
    PNL_total = response.xpath('//div/p[contains(text(),"PNL")]/span/text()').get()
    Trading_vol_total_lst = response.xpath('//div/p[contains(text(),"Trading Volume(")]/span/text()').getall()
    Trading_vol_total = ''.join(Trading_vol_total_lst)
    total_trades = response.xpath('//div/p[contains(text(),"Total Trades(")]/span/text()').get()
    Realized_Profit = response.xpath('(((//div[@class="py-0.5"]/div/p)[position() mod 3=2])/text())[position() mod 2=1]').getall()

    myprofit = sum(float(profit.replace('$', '').replace(',', '').replace('%', '')) for profit in Realized_Profit)
    try:
        Averaged_Realized_Profit = myprofit / len(Realized_Profit) if Realized_Profit else 'N/A'
    except Exception:
        Averaged_Realized_Profit = "N/A"

    prefix = {30: '30', 7: '7', 1: '1'}[x]
    return {
        f'PNL Total {prefix}': PNL_total,
        f'Trading Volume Total {prefix}': Trading_vol_total,
        f'Total Trades {prefix}': total_trades,
        f'Average ROI {prefix}': Averaged_Realized_Profit,
    }

if __name__ == "__main__":
    driver = get_driver()
    df = pd.read_csv('./walletAddress.csv')['address'].tolist()
    for address in df:
        scraper(address, driver)
    driver.close()

Any guidance on this issue would be greatly appreciated. Thanks in advance!

0

There are 0 answers