I have this program that is scraping products from Lowes's website. I am trying to make it so that I can also see the products that are currently unavailable so can scrape those as well.
import undetected_chromedriver as uc
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
from selenium.common.exceptions import NoSuchElementException
def scrape_page_data():
WebDriverWait(driver, 5).until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'pl')))
container = driver.find_element(By.CLASS_NAME, 'pl')
# scroll down to load all content on the page
for _ in range(4):
driver.execute_script("window.scrollBy(0, 2000);")
time.sleep(2)
skus = container.find_elements(By.CLASS_NAME, 'tooltip-custom')
prices = container.find_elements(By.CSS_SELECTOR, 'div.prdt-actl-pr')
description = container.find_elements(By.CSS_SELECTOR, '.titl-cnt.titl.brnd-desc')
return skus, prices, description
def pagination(url, pages=1):
prod_num = []
prod_price = []
prod_desc = []
page_num = 0
# iterate over the pages
for i in range(1, pages + 1):
# print(f"this is page {i}")
driver.get(f"{url}?offset={page_num}")
skus, prices, description = scrape_page_data()
for sku in skus:
prod_num.append(sku.text)
for price in prices:
prod_price.append(price.text)
for desc in description:
prod_desc.append(desc.text)
print(f"prod_num: {prod_num}")
print(f"prod_price: {prod_price}")
print(f"prod_desc: {prod_desc}")
print(f"prod_num: {len(prod_num)}")
print(f"prod_price: {len(prod_price)}")
print(f"prod_desc: {len(prod_desc)}")
# increment it by 24 since each page has 24 data
page_num += 24
time.sleep(1)
return prod_num, prod_price, prod_desc
def click_show_unavailable_button():
try:
unavailable_button = driver.find_element(By.CLASS_NAME, 'styles__StyledSVG-sc-1houmlx-0 hGNiQH icon icon-plus')
unavailable_button.click()
WebDriverWait(driver, 5).until(EC.invisibility_of_element((By.CLASS_NAME, 'styles__StyledSVG-sc-1houmlx-0 hGNiQH icon icon-plus')))
filter_button = driver.find_element(By.CLASS_NAME, 'CheckboxWrapper-j2pgcr-0 ikAusL')
filter_button.click()
WebDriverWait(driver, 5).until(EC.invisibility_of_element((By.CLASS_NAME, 'CheckboxWrapper-j2pgcr-0 ikAusL')))
except Exception as e:
print("Failed to click the 'Show Unavailable Products' button:", str(e))
# set the website URL and initialize the Chrome driver
website = 'https://www.lowes.com/pl/Drywall-joint-compound-Drywall-Building-supplies/4294858286'
options = Options()
# options.add_argument("--geolocation=47.8410,-122.2947") # set geolocation to Lynnwood, WA
driver = uc.Chrome()
click_show_unavailable_button()
# call the pagination function to scrape data and store it in three separate lists
prod_num, prod_price, prod_desc = pagination(website, pages=3)
# convert the three lists to a pandas dataframe and save it as a CSV file
df = pd.DataFrame({'code': prod_num, 'price': prod_price, 'brand': prod_desc})
df.to_csv('lowesjctest.csv', index=False)
print(df)
# quit the Chrome driver
driver.quit()
I have the click_show_unavailable_button function trying to click the proper buttons to filter but it does not seem to be working. Why is that? I have also tried changing the website link from https://www.lowes.com/pl/Drywall-joint-compound-Drywall-Building-supplies/4294858286 to https://www.lowes.com/pl/Drywall-joint-compound-Drywall-Building-supplies/4294858286?refinement=1 once it is filtered but this just gives me an Error 404. If anyone could help me find a solution that would be greatly appreciated!