I coded a class to use as a headless browser for my FastAPI app. The problem is the class works just fine when I use it directly, but it fails when used in a FastAPI endpoint.
Here's the class
from playwright.async_api import async_playwright
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class CustomBrowser:
browser = None # Class-level attribute for Playwright browser instance
playwright = None # Class-level attribute for Playwright process
@classmethod
async def initialize_browser(cls):
"""
Initialize the Playwright browser instance asynchronously.
Example:
await CustomBrowser.initialize_browser()
"""
try:
if cls.browser is None:
if cls.playwright is None:
cls.playwright = await async_playwright().__aenter__() # Initialize Playwright process
cls.browser = await cls.playwright.chromium.launch(headless=True)
logger.info('Browser successfully initialized.')
except Exception as e:
logger.error(f'Failed to initialize browser: {e}')
async def get_page_content(self, url):
"""
Browse to the given URL and return the page content.
Args:
url (str): URL to browse.
Returns:
str: Page content as HTML.
Example:
content = await instance.get_page_content("https://example.com")
"""
if self.browser is None:
raise RuntimeError("Browser not initialized. Call initialize_browser first.")
context = await self.browser.new_context()
page = await context.new_page()
await page.goto(url)
content = await page.content()
await context.close()
return content
@classmethod
async def setup_and_get_content(cls, url):
"""
Initialize the browser (if not already) and get content from the given URL.
Args:
url (str): URL to browse.
Returns:
str: Page content as HTML.
Example:
content = await CustomBrowser.setup_and_get_content("https://example.com")
"""
await cls.initialize_browser() # Initialize the browser
instance = cls() # Create an instance
return await instance.get_page_content(url) # Get and return the content
It works when used like this:
async def main():
content = await CustomBrowser.setup_and_get_content("https://example.com")
print(content)
asyncio.run(main())
But it fails when I use it in a FastAPI endpoint:
from fastapi import FastAPI
app = FastAPI()
@app.get("/parse/")
async def read_item():
content = await CustomBrowser.setup_and_get_content('https://example.com')
return content
Here's the error
raise RuntimeError("Browser not initialized. Call initialize_browser first.")
RuntimeError: Browser not initialized. Call initialize_browser first.
The quick solve is to run FastAPI with disabled reload. I'm hoping to find a more robust solution.