Unable to call function from another python pyppeteer file

32 views Asked by At

I am trying to call the function from file 1 to file 2 (boh codes reproduced below. While File 1 script works independently, the call isn't giving me the results? I have checked that the call is going through and the function loop gets initiated. The input isn't entering the async def scrape_data function. Not sure what I am missing

Appreciate the support. thanks

File 1

import asyncio
import nest_asyncio
from pyppeteer import launch
import time

def get_KW(KW1):
    for i in KW1:
    nest_asyncio.apply()  

# KW1 = ['vaccum cleaner', 'Kurti for women']

    async def scrape_data(i, browser):
        try:
            context = await browser.createIncognitoBrowserContext()
            page = await context.newPage()
            
            url = 'https://www.helium10.com/tools/free/amazon-keyword-tool/'
            await page.goto(url)
            
            await page.type('input[placeholder="Enter keyword"]', i)

            flag = await page.querySelector('div.selected-flag[data-value="amazon.com"]')
            if flag:
                await page.evaluate('(flag) => { flag.setAttribute("data-value", "amazon.in"); }', flag)

            await page.evaluate('() => { document.querySelector(".btn.btn-primary.btn-block").click(); }')
            
            # Wait for the specific element you need, use a custom function if necessary
            await page.waitForSelector('div[class="sc-jaBegh dUfEDl"]', {'timeout': 10000})  # Adjust the timeout as needed

            productHandles = await page.querySelectorAll('div[class="sc-dVSYCO sc-jEWLvH hPbcsG fSVlOB"]')
            
            # Evaluate multiple elements in parallel
            elements = await asyncio.gather(*[
                page.evaluate('(el) => el.querySelector(".sc-fpAkvV.sc-lmOJGc.geUMvU.bktErp .sc-pRPz.kwdpAU").textContent', el)
                for el in productHandles
            ])
            
            # Extracting search volumes in parallel
            search_volumes = await asyncio.gather(*[
                page.evaluate('(el) => el.querySelector("div:nth-child(7) > div:nth-child(3) > div > div").textContent', el)
                for el in productHandles
            ])

            items = [{
                "keyword": i,
                "element": element,
                "search_volume": volume
            } for element, volume in zip(elements, search_volumes)]
            
            print(items)
            return items
            
        except Exception as e:
            print('An error occurred:', str(e))
        finally:
            await page.close()
            await context.close()

    async def main():
        browser = await launch(
            headless=False,  # Use headless mode
            defaultViewport=None,
            userDataDir='./temp'
        )

        start_time = time.time()  # Record the start time

        tasks = []
        for keyword in KW1:
            task = asyncio.ensure_future(scrape_data(keyword, browser))
            tasks.append(task)

        await asyncio.gather(*tasks)

        await browser.close()

        end_time = time.time()  # Record the end time
        total_time = end_time - start_time
        print(f'Total Execution Time: {total_time} seconds')

    if __name__ == '__main__':
        asyncio.run(main())

File 2 for calling function in file 1

from Helium10 import *
KW1=["vaccum cleaner", "Kurti for women"]
items=get_KW(KW1)

print(items)
0

There are 0 answers