I am trying to call the function from file 1 to file 2 (boh codes reproduced below. While File 1 script works independently, the call isn't giving me the results? I have checked that the call is going through and the function loop gets initiated. The input isn't entering the async def scrape_data function. Not sure what I am missing
Appreciate the support. thanks
File 1
import asyncio
import nest_asyncio
from pyppeteer import launch
import time
def get_KW(KW1):
for i in KW1:
nest_asyncio.apply()
# KW1 = ['vaccum cleaner', 'Kurti for women']
async def scrape_data(i, browser):
try:
context = await browser.createIncognitoBrowserContext()
page = await context.newPage()
url = 'https://www.helium10.com/tools/free/amazon-keyword-tool/'
await page.goto(url)
await page.type('input[placeholder="Enter keyword"]', i)
flag = await page.querySelector('div.selected-flag[data-value="amazon.com"]')
if flag:
await page.evaluate('(flag) => { flag.setAttribute("data-value", "amazon.in"); }', flag)
await page.evaluate('() => { document.querySelector(".btn.btn-primary.btn-block").click(); }')
# Wait for the specific element you need, use a custom function if necessary
await page.waitForSelector('div[class="sc-jaBegh dUfEDl"]', {'timeout': 10000}) # Adjust the timeout as needed
productHandles = await page.querySelectorAll('div[class="sc-dVSYCO sc-jEWLvH hPbcsG fSVlOB"]')
# Evaluate multiple elements in parallel
elements = await asyncio.gather(*[
page.evaluate('(el) => el.querySelector(".sc-fpAkvV.sc-lmOJGc.geUMvU.bktErp .sc-pRPz.kwdpAU").textContent', el)
for el in productHandles
])
# Extracting search volumes in parallel
search_volumes = await asyncio.gather(*[
page.evaluate('(el) => el.querySelector("div:nth-child(7) > div:nth-child(3) > div > div").textContent', el)
for el in productHandles
])
items = [{
"keyword": i,
"element": element,
"search_volume": volume
} for element, volume in zip(elements, search_volumes)]
print(items)
return items
except Exception as e:
print('An error occurred:', str(e))
finally:
await page.close()
await context.close()
async def main():
browser = await launch(
headless=False, # Use headless mode
defaultViewport=None,
userDataDir='./temp'
)
start_time = time.time() # Record the start time
tasks = []
for keyword in KW1:
task = asyncio.ensure_future(scrape_data(keyword, browser))
tasks.append(task)
await asyncio.gather(*tasks)
await browser.close()
end_time = time.time() # Record the end time
total_time = end_time - start_time
print(f'Total Execution Time: {total_time} seconds')
if __name__ == '__main__':
asyncio.run(main())
File 2 for calling function in file 1
from Helium10 import *
KW1=["vaccum cleaner", "Kurti for women"]
items=get_KW(KW1)
print(items)