PermissionError Multiprocessing argument pyppeteer.Page

165 views Asked by At

PermissionError Multiprocessing argument pyppeteer.Page

successful but inefficient

    import asyncio
    from pyppeteer import launch
    from multiprocessing import Process

    async def f(x):
        print("async def f(x,page):",x)    
        browser = await launch(headless=False, autoClose=False)
        page = (await browser.pages())[0]    
        await page.goto('https://example.com')
        h1 = await page.querySelector("body > div > h1")
        await page.evaluate(f'(element) => element.textContent="{x}"', h1)    

    def p(x):
        print("def p(x,page):",x)
        asyncio.run(f(x))
    
    async def main():
        pro = Process(target=p, args=("1111",))
        pro.start()    
        pro = Process(target=p, args=("2222",))
        pro.start()    

    if __name__ =="__main__":
        asyncio.get_event_loop().run_until_complete(main())

In order to process a lot, it is burdensome to create multiple browsers. So, I try to create a lot of tabs.

This is the code I want, but I get an PermissionError How can I solve this?

    import asyncio
    from pyppeteer import launch
    from multiprocessing import Process

    async def f(x,page):
        print("async def f(x,page):",x)

        await page.goto('https://example.com')
        h1 = await page.querySelector("body > div > h1")
        await page.evaluate(f'(element) => element.textContent="{x}"', h1)
    
    def p(x,page):
        print("def p(x,page):",x)
        asyncio.run(f(x,page))
    
    async def main():
        browser = await launch(headless=False, autoClose=False)
        page = (await browser.pages())[0]

        pro = Process(target=p, args=("1111",page))
        pro.start()    

    if __name__ =="__main__":
        asyncio.get_event_loop().run_until_complete(main())

error message

    c:\Users\mimmi\python\ttttt.py:24: DeprecationWarning: There is no current event loop
      asyncio.get_event_loop().run_until_complete(main())
    Traceback (most recent call last):
      File "c:\Users\mimmi\python\ttttt.py", line 24, in <module>
        asyncio.get_event_loop().run_until_complete(main())
      File "C:\python\python311\Lib\asyncio\base_events.py", line 650, in run_until_complete
        return future.result()
               ^^^^^^^^^^^^^^^
      File "c:\Users\mimmi\python\ttttt.py", line 21, in main
        pro.start()    
        ^^^^^^^^^^^
      File "C:\python\python311\Lib\multiprocessing\process.py", line 121, in start
        self._popen = self._Popen(self)
                  ^^^^^^^^^^^^^^^^^
      File "C:\python\python311\Lib\multiprocessing\context.py", line 224, in _Popen
        return _default_context.get_context().Process._Popen(process_obj)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "C:\python\python311\Lib\multiprocessing\context.py", line 336, in _Popen
        return Popen(process_obj)
               ^^^^^^^^^^^^^^^^^^
      File "C:\python\python311\Lib\multiprocessing\popen_spawn_win32.py", line 94, in __init__
        reduction.dump(process_obj, to_child)
      File "C:\python\python311\Lib\multiprocessing\reduction.py", line 60, in dump
        ForkingPickler(file, protocol).dump(obj)
    TypeError: cannot pickle '_thread.lock' object
    Traceback (most recent call last):
      File "<string>", line 1, in <module>
      File "C:\python\python311\Lib\multiprocessing\spawn.py", line 111, in spawn_main
        new_handle = reduction.duplicate(pipe_handle,
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "C:\python\python311\Lib\multiprocessing\reduction.py", line 79, in duplicate
        return _winapi.DuplicateHandle(
               ^^^^^^^^^^^^^^^^^^^^^^^^
    PermissionError: [WinError 5] Access is denied

my environment windows11 python3.11 pyppeteer1.0.2

1

There are 1 answers

1
금밈미 On

I got the desired result with this code.

    queue = asyncio.Queue()
    browser = await launch(headless=False, autoClose=False)
    
    for i in range(MAX_TASK_COUNT-1): 
        await browser.newPage()       
    
    pages = await browser.pages()

    for page in pages:
        asyncio.create_task(crawlingTask(queue, page))
    
    await asyncio.create_task(queuePutter(queue, session, appendList))
    await queue.join()