Why is Xpath not yielding correct number of elements in Selenium?

82 views Asked by Shah Zeb At 22 October 2023 at 14:09

I'm trying to scrape data from a website using Selenium and Xpath, but I'm running into an odd issue.

Website link: dexcheck

Expected Result: When I view the page, I expect the Xpath to yield 16 "Realized ROI %" data points.

Actual Result: The Xpath sometimes gives only 11 or even fewer results.

Observations:

This problem isn't consistent. Sometimes it works, other times it doesn't. Interestingly, if I zoom out my browser view to 25%, the problem seems to vanish when using Chrome DevTools. But the same doesn't replicate when using Selenium, even if the browser is initiated with a 25% zoom out.

I have ensured that I'm scrolling down to load all elements. Here's my scrolling mechanism:

def scroll_to_load(driver, container_xpath):
    try:
        inside_table = driver.find_element(By.XPATH,'((//div[@class="crypto-pnl-table"]/div)[3]/div/p)[1]')
        inside_table.click()
    except:
        pass
    while True:
        old_page = driver.page_source
        actions = ActionChains(driver)
        for _ in range(16):
            actions.send_keys(Keys.PAGE_DOWN).perform()
        time.sleep(5)
        new_page = driver.page_source
        if new_page == old_page:
            print('new page == old page')
            break

This Xpath and method worked perfectly in the past, but it stopped after a recent update. I'm not sure if the website structure changed or if I'm missing something.

The full code is here below!


import time
import pandas as pd
from scrapy import Selector
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from seleniumbase import Driver

def scroll_to_load(driver, container_xpath):
    try:
        inside_table = driver.find_element(By.XPATH, '((//div[@class="crypto-pnl-table"]/div)[3]/div/p)[1]')
        inside_table.click()
    except Exception:
        pass

    while True:
        old_page = driver.page_source
        actions = ActionChains(driver)
        for _ in range(16):
            actions.send_keys(Keys.PAGE_DOWN).perform()
        time.sleep(5)
        new_page = driver.page_source
        if new_page == old_page:
            break

def get_driver():
    options = Options()
    options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36")
    options.set_capability("pageLoadStrategy", "normal")
    options.add_argument("window-size=1200x800")
    options.add_argument("--enable-javascript")
    options.add_argument("--headless")
    prefs = {"profile.managed_default_content_settings.images": 2, "permissions.default.stylesheet": 2}
    options.add_experimental_option("prefs", prefs)
    driver = Driver(uc=True)
    driver.maximize_window()
    return driver

def exporter(row):
    file_name = 'DexCheck.csv'
    if not exporter.switch:
        pd.DataFrame(row, index=[0]).to_csv(file_name, index=False, mode='a')
    else:
        pd.DataFrame(row, index=[0]).to_csv(file_name, index=False, mode='a', header=False)
    exporter.switch = not exporter.switch
exporter.switch = True

def scraper(address, driver):
    data_combined = {'Wallet Address': address}
    for x in [30, 7, 1]:
        driver.get(f'https://dexcheck.ai/app/address-analyzer/{address}?chain=eth&timeframe={x}')
        time.sleep(25 if x == 30 else 15)
        container_xpath = '//div[@class="crypto-pnl-table"]'
        scroll_to_load(driver, container_xpath)
        response = Selector(text=driver.page_source)
        data_combined.update(ScrapeData(response, x))
    exporter(data_combined)

def ScrapeData(response, x):
    PNL_total = response.xpath('//div/p[contains(text(),"PNL")]/span/text()').get()
    Trading_vol_total_lst = response.xpath('//div/p[contains(text(),"Trading Volume(")]/span/text()').getall()
    Trading_vol_total = ''.join(Trading_vol_total_lst)
    total_trades = response.xpath('//div/p[contains(text(),"Total Trades(")]/span/text()').get()
    Realized_Profit = response.xpath('(((//div[@class="py-0.5"]/div/p)[position() mod 3=2])/text())[position() mod 2=1]').getall()

    myprofit = sum(float(profit.replace('$', '').replace(',', '').replace('%', '')) for profit in Realized_Profit)
    try:
        Averaged_Realized_Profit = myprofit / len(Realized_Profit) if Realized_Profit else 'N/A'
    except Exception:
        Averaged_Realized_Profit = "N/A"

    prefix = {30: '30', 7: '7', 1: '1'}[x]
    return {
        f'PNL Total {prefix}': PNL_total,
        f'Trading Volume Total {prefix}': Trading_vol_total,
        f'Total Trades {prefix}': total_trades,
        f'Average ROI {prefix}': Averaged_Realized_Profit,
    }

if __name__ == "__main__":
    driver = get_driver()
    df = pd.read_csv('./walletAddress.csv')['address'].tolist()
    for address in df:
        scraper(address, driver)
    driver.close()

Any guidance on this issue would be greatly appreciated. Thanks in advance!

Original Q&A

TechQA.

Why is Xpath not yielding correct number of elements in Selenium?

There are 0 answers

Related Questions in PYTHON

Related Questions in SELENIUM-WEBDRIVER

Related Questions in WEB-SCRAPING

Related Questions in XPATH

Related Questions in SELENIUMBASE

Popular Questions

Popular Tags

Trending Questions