support new browser type: cdp connect (#1875)
This commit is contained in:
@@ -10,6 +10,7 @@ class Settings(BaseSettings):
|
||||
ADDITIONAL_MODULES: list[str] = []
|
||||
|
||||
BROWSER_TYPE: str = "chromium-headful"
|
||||
BROWSER_REMOTE_DEBUGGING_URL: str = "http://127.0.0.1:9222"
|
||||
MAX_SCRAPING_RETRIES: int = 0
|
||||
VIDEO_PATH: str | None = "./video"
|
||||
HAR_PATH: str | None = "./har"
|
||||
|
||||
@@ -9,6 +9,7 @@ from pathlib import Path
|
||||
from typing import Any, Awaitable, Callable, Protocol
|
||||
|
||||
import aiofiles
|
||||
import httpx
|
||||
import structlog
|
||||
from playwright.async_api import BrowserContext, ConsoleMessage, Download, Error, Page, Playwright
|
||||
from pydantic import BaseModel, PrivateAttr
|
||||
@@ -349,8 +350,55 @@ async def _create_headful_chromium(
|
||||
return browser_context, browser_artifacts, None
|
||||
|
||||
|
||||
async def _create_cdp_connection_browser(
|
||||
playwright: Playwright, proxy_location: ProxyLocation | None = None, **kwargs: dict
|
||||
) -> tuple[BrowserContext, BrowserArtifacts, BrowserCleanupFunc]:
|
||||
browser_args = BrowserContextFactory.build_browser_args()
|
||||
|
||||
browser_artifacts = BrowserContextFactory.build_browser_artifacts(
|
||||
har_path=browser_args["record_har_path"],
|
||||
)
|
||||
|
||||
remote_browser_url = None
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(f"{settings.BROWSER_REMOTE_DEBUGGING_URL}/json/version")
|
||||
remote_browser_url = response.json().get("webSocketDebuggerUrl")
|
||||
except Exception:
|
||||
raise Exception(
|
||||
f"Cannot find the webSocketDebuggerUrl from the browser remote debugging {settings.BROWSER_REMOTE_DEBUGGING_URL}"
|
||||
)
|
||||
|
||||
if not remote_browser_url:
|
||||
raise Exception(
|
||||
f"Cannot find the webSocketDebuggerUrl from the browser remote debugging {settings.BROWSER_REMOTE_DEBUGGING_URL}"
|
||||
)
|
||||
|
||||
LOG.info("Connecting browser CDP connection", remote_browser_url=remote_browser_url)
|
||||
browser = await playwright.chromium.connect_over_cdp(remote_browser_url)
|
||||
|
||||
contexts = browser.contexts
|
||||
browser_context = None
|
||||
|
||||
if contexts:
|
||||
# Use the first existing context if available
|
||||
LOG.info("Using existing browser context")
|
||||
browser_context = contexts[0]
|
||||
else:
|
||||
browser_context = await browser.new_context(
|
||||
record_video_dir=browser_args["record_video_dir"],
|
||||
viewport=browser_args["viewport"],
|
||||
)
|
||||
LOG.info(
|
||||
"Launched browser CDP connection",
|
||||
remote_browser_url=remote_browser_url,
|
||||
)
|
||||
return browser_context, browser_artifacts, None
|
||||
|
||||
|
||||
BrowserContextFactory.register_type("chromium-headless", _create_headless_chromium)
|
||||
BrowserContextFactory.register_type("chromium-headful", _create_headful_chromium)
|
||||
BrowserContextFactory.register_type("cdp-connect", _create_cdp_connection_browser)
|
||||
|
||||
|
||||
class BrowserState:
|
||||
|
||||
Reference in New Issue
Block a user