Move the code over from private repository (#3)
This commit is contained in:
152
skyvern/webeye/browser_manager.py
Normal file
152
skyvern/webeye/browser_manager.py
Normal file
@@ -0,0 +1,152 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import structlog
|
||||
from playwright.async_api import Browser, Playwright, async_playwright
|
||||
|
||||
from skyvern.exceptions import MissingBrowserState
|
||||
from skyvern.forge.sdk.schemas.tasks import ProxyLocation, Task
|
||||
from skyvern.forge.sdk.settings_manager import SettingsManager
|
||||
from skyvern.forge.sdk.workflow.models.workflow import WorkflowRun
|
||||
from skyvern.webeye.browser_factory import BrowserContextFactory, BrowserState
|
||||
|
||||
LOG = structlog.get_logger()
|
||||
|
||||
|
||||
class BrowserManager:
|
||||
instance = None
|
||||
pages: dict[str, BrowserState] = dict()
|
||||
|
||||
def __new__(cls) -> BrowserManager:
|
||||
if cls.instance is None:
|
||||
cls.instance = super().__new__(cls)
|
||||
return cls.instance
|
||||
|
||||
@staticmethod
|
||||
async def _create_browser_state(
|
||||
proxy_location: ProxyLocation | None = None, url: str | None = None
|
||||
) -> BrowserState:
|
||||
pw = await async_playwright().start()
|
||||
browser_context, browser_artifacts = await BrowserContextFactory.create_browser_context(
|
||||
pw, proxy_location=proxy_location, url=url
|
||||
)
|
||||
return BrowserState(pw=pw, browser_context=browser_context, page=None, browser_artifacts=browser_artifacts)
|
||||
|
||||
async def get_or_create_for_task(self, task: Task) -> BrowserState:
|
||||
if task.task_id in self.pages:
|
||||
return self.pages[task.task_id]
|
||||
elif task.workflow_run_id in self.pages:
|
||||
LOG.info(
|
||||
"Browser state for task not found. Using browser state for workflow run",
|
||||
task_id=task.task_id,
|
||||
workflow_run_id=task.workflow_run_id,
|
||||
)
|
||||
self.pages[task.task_id] = self.pages[task.workflow_run_id]
|
||||
return self.pages[task.task_id]
|
||||
LOG.info("Creating browser state for task", task_id=task.task_id)
|
||||
browser_state = await self._create_browser_state(task.proxy_location, task.url)
|
||||
|
||||
# The URL here is only used when creating a new page, and not when using an existing page.
|
||||
# This will make sure browser_state.page is not None.
|
||||
await browser_state.get_or_create_page(task.url)
|
||||
|
||||
self.pages[task.task_id] = browser_state
|
||||
return browser_state
|
||||
|
||||
async def get_or_create_for_workflow_run(self, workflow_run: WorkflowRun, url: str | None = None) -> BrowserState:
|
||||
if workflow_run.workflow_run_id in self.pages:
|
||||
return self.pages[workflow_run.workflow_run_id]
|
||||
LOG.info("Creating browser state for workflow run", workflow_run_id=workflow_run.workflow_run_id)
|
||||
browser_state = await self._create_browser_state(workflow_run.proxy_location, url=url)
|
||||
|
||||
# The URL here is only used when creating a new page, and not when using an existing page.
|
||||
# This will make sure browser_state.page is not None.
|
||||
await browser_state.get_or_create_page(url)
|
||||
|
||||
self.pages[workflow_run.workflow_run_id] = browser_state
|
||||
return browser_state
|
||||
|
||||
def set_video_artifact_for_task(self, task: Task, artifact_id: str) -> None:
|
||||
if task.workflow_run_id and task.workflow_run_id in self.pages:
|
||||
if self.pages[task.workflow_run_id].browser_artifacts.video_artifact_id:
|
||||
LOG.warning(
|
||||
"Video artifact is already set for workflow run. Overwriting",
|
||||
workflow_run_id=task.workflow_run_id,
|
||||
old_artifact_id=self.pages[task.workflow_run_id].browser_artifacts.video_artifact_id,
|
||||
new_artifact_id=artifact_id,
|
||||
)
|
||||
self.pages[task.workflow_run_id].browser_artifacts.video_artifact_id = artifact_id
|
||||
return
|
||||
if task.task_id in self.pages:
|
||||
if self.pages[task.task_id].browser_artifacts.video_artifact_id:
|
||||
LOG.warning(
|
||||
"Video artifact is already set for task. Overwriting",
|
||||
task_id=task.task_id,
|
||||
old_artifact_id=self.pages[task.task_id].browser_artifacts.video_artifact_id,
|
||||
new_artifact_id=artifact_id,
|
||||
)
|
||||
self.pages[task.task_id].browser_artifacts.video_artifact_id = artifact_id
|
||||
return
|
||||
|
||||
raise MissingBrowserState(task_id=task.task_id)
|
||||
|
||||
async def get_video_data(
|
||||
self, browser_state: BrowserState, task_id: str = "", workflow_id: str = "", workflow_run_id: str = ""
|
||||
) -> bytes:
|
||||
if browser_state:
|
||||
path = browser_state.browser_artifacts.video_path
|
||||
if path:
|
||||
with open(path, "rb") as f:
|
||||
return f.read()
|
||||
LOG.warning(
|
||||
"Video data not found for task", task_id=task_id, workflow_id=workflow_id, workflow_run_id=workflow_run_id
|
||||
)
|
||||
return b""
|
||||
|
||||
async def get_har_data(
|
||||
self, browser_state: BrowserState, task_id: str = "", workflow_id: str = "", workflow_run_id: str = ""
|
||||
) -> bytes:
|
||||
if browser_state:
|
||||
path = browser_state.browser_artifacts.har_path
|
||||
if path:
|
||||
with open(path, "rb") as f:
|
||||
return f.read()
|
||||
LOG.warning(
|
||||
"HAR data not found for task", task_id=task_id, workflow_id=workflow_id, workflow_run_id=workflow_run_id
|
||||
)
|
||||
return b""
|
||||
|
||||
@classmethod
|
||||
async def connect_to_scraping_browser(cls, pw: Playwright) -> Browser:
|
||||
if not SettingsManager.get_settings().REMOTE_BROWSER_KEY:
|
||||
raise Exception("REMOTE_BROWSER_KEY is empty. Cannot connect to remote browser.")
|
||||
browser = await pw.chromium.connect_over_cdp(SettingsManager.get_settings().REMOTE_BROWSER_KEY)
|
||||
LOG.info("Connected to remote browser", browser_type=SettingsManager.get_settings().BROWSER_TYPE)
|
||||
return browser
|
||||
|
||||
@classmethod
|
||||
async def close(cls) -> None:
|
||||
LOG.info("Closing BrowserManager")
|
||||
for browser_state in cls.pages.values():
|
||||
await browser_state.close()
|
||||
cls.pages = dict()
|
||||
LOG.info("BrowserManger is closed")
|
||||
|
||||
async def cleanup_for_task(self, task_id: str, close_browser_on_completion: bool = True) -> BrowserState | None:
|
||||
LOG.info("Cleaning up for task")
|
||||
browser_state_to_close = self.pages.pop(task_id, None)
|
||||
if browser_state_to_close:
|
||||
await browser_state_to_close.close(close_browser_on_completion=close_browser_on_completion)
|
||||
LOG.info("Task is cleaned up")
|
||||
|
||||
return browser_state_to_close
|
||||
|
||||
async def cleanup_for_workflow_run(
|
||||
self, workflow_run_id: str, close_browser_on_completion: bool = True
|
||||
) -> BrowserState | None:
|
||||
LOG.info("Cleaning up for workflow run")
|
||||
browser_state_to_close = self.pages.pop(workflow_run_id, None)
|
||||
if browser_state_to_close:
|
||||
await browser_state_to_close.close(close_browser_on_completion=close_browser_on_completion)
|
||||
LOG.info("Workflow run is cleaned up")
|
||||
|
||||
return browser_state_to_close
|
||||
Reference in New Issue
Block a user