My headless browser works on its own, but fail when used as a FastAPI endpoint

145 views Asked by At

I coded a class to use as a headless browser for my FastAPI app. The problem is the class works just fine when I use it directly, but it fails when used in a FastAPI endpoint.

Here's the class

from playwright.async_api import async_playwright
import logging


logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class CustomBrowser:
    browser = None  # Class-level attribute for Playwright browser instance
    playwright = None  # Class-level attribute for Playwright process
    @classmethod
    async def initialize_browser(cls):
        """
        Initialize the Playwright browser instance asynchronously.

        Example:
            await CustomBrowser.initialize_browser()
        """
        try:
            if cls.browser is None:
                if cls.playwright is None:
                    cls.playwright = await async_playwright().__aenter__()  # Initialize Playwright process
                cls.browser = await cls.playwright.chromium.launch(headless=True)
            logger.info('Browser successfully initialized.')
        except Exception as e:
            logger.error(f'Failed to initialize browser: {e}')

    async def get_page_content(self, url):
        """
        Browse to the given URL and return the page content.

        Args:
            url (str): URL to browse.

        Returns:
            str: Page content as HTML.

        Example:
            content = await instance.get_page_content("https://example.com")
        """
        if self.browser is None:
            raise RuntimeError("Browser not initialized. Call initialize_browser first.")

        context = await self.browser.new_context()
        page = await context.new_page()
        await page.goto(url)
        content = await page.content()
        await context.close()
        return content

    @classmethod
    async def setup_and_get_content(cls, url):
        """
        Initialize the browser (if not already) and get content from the given URL.

        Args:
            url (str): URL to browse.

        Returns:
            str: Page content as HTML.

        Example:
            content = await CustomBrowser.setup_and_get_content("https://example.com")
        """
        await cls.initialize_browser()  # Initialize the browser
        instance = cls()  # Create an instance
        return await instance.get_page_content(url)  # Get and return the content


It works when used like this:

async def main():
    content = await CustomBrowser.setup_and_get_content("https://example.com")
    print(content)


asyncio.run(main())

But it fails when I use it in a FastAPI endpoint:

from fastapi import FastAPI

app = FastAPI()



@app.get("/parse/")
async def read_item():
    content = await CustomBrowser.setup_and_get_content('https://example.com')
    return content

Here's the error

    raise RuntimeError("Browser not initialized. Call initialize_browser first.")
RuntimeError: Browser not initialized. Call initialize_browser first.

1

There are 1 answers

0
Mehdi Zare On

The quick solve is to run FastAPI with disabled reload. I'm hoping to find a more robust solution.