Files
Dorod-Sky/skyvern/webeye/browser_factory.py

673 lines
26 KiB
Python
Raw Normal View History

from __future__ import annotations
import asyncio
2024-07-26 18:10:42 +08:00
import os
import pathlib
import platform
import random
import re
import shutil
import socket
import subprocess
import time
import uuid
from datetime import datetime
2024-11-06 10:15:47 +08:00
from pathlib import Path
from typing import Any, Awaitable, Callable, Protocol, cast
from urllib.parse import parse_qsl, urlparse
import psutil
import structlog
from playwright.async_api import Browser, BrowserContext, ConsoleMessage, Download, Page, Playwright
from skyvern.config import settings
2025-11-26 15:05:37 +08:00
from skyvern.constants import (
BROWSER_DOWNLOAD_TIMEOUT,
SKYVERN_DIR,
)
from skyvern.exceptions import UnknownBrowserType, UnknownErrorWhileCreatingBrowserContext
from skyvern.forge import app
from skyvern.forge.sdk.api.files import get_download_dir, make_temp_directory
from skyvern.forge.sdk.core.skyvern_context import current, ensure_context
from skyvern.schemas.runs import ProxyLocation, get_tzinfo_from_proxy
from skyvern.webeye.browser_artifacts import BrowserArtifacts, VideoArtifact
LOG = structlog.get_logger()
BrowserCleanupFunc = Callable[[], None] | None
2024-11-01 15:13:41 -07:00
def set_browser_console_log(browser_context: BrowserContext, browser_artifacts: BrowserArtifacts) -> None:
if browser_artifacts.browser_console_log_path is None:
2024-11-02 09:59:03 +08:00
log_path = f"{settings.LOG_PATH}/{datetime.utcnow().strftime('%Y-%m-%d')}/{uuid.uuid4()}.log"
2024-11-01 15:13:41 -07:00
try:
os.makedirs(os.path.dirname(log_path), exist_ok=True)
# create the empty log file
with open(log_path, "w") as _:
pass
except Exception:
LOG.warning(
"Failed to create browser log file",
log_path=log_path,
exc_info=True,
)
return
browser_artifacts.browser_console_log_path = log_path
async def browser_console_log(msg: ConsoleMessage) -> None:
current_time = datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%fZ")
key_values = " ".join([f"{key}={value}" for key, value in msg.location.items()])
format_log = f"{current_time}[{msg.type}]{msg.text} {key_values}\n"
await browser_artifacts.append_browser_console_log(format_log)
LOG.info("browser console log is saved", log_path=browser_artifacts.browser_console_log_path)
browser_context.on("console", browser_console_log)
def set_download_file_listener(
browser_context: BrowserContext, download_timeout: float | None = None, **kwargs: Any
) -> None:
2024-11-06 10:15:47 +08:00
async def listen_to_download(download: Download) -> None:
workflow_run_id = kwargs.get("workflow_run_id")
task_id = kwargs.get("task_id")
2024-11-06 10:15:47 +08:00
try:
async with asyncio.timeout(download_timeout or BROWSER_DOWNLOAD_TIMEOUT):
2024-11-06 10:15:47 +08:00
file_path = await download.path()
if file_path.suffix:
return
LOG.info(
"No file extensions, going to add file extension automatically",
workflow_run_id=workflow_run_id,
task_id=task_id,
suggested_filename=download.suggested_filename,
url=download.url,
)
suffix = Path(download.suggested_filename).suffix
if suffix:
LOG.info(
"Add extension according to suggested filename",
workflow_run_id=workflow_run_id,
task_id=task_id,
filepath=str(file_path) + suffix,
)
file_path.rename(str(file_path) + suffix)
return
parsed_url = urlparse(download.url)
parsed_qs = parse_qsl(parsed_url.query)
for key, value in parsed_qs:
if key.lower() == "filename":
suffix = Path(value).suffix
if suffix:
LOG.info(
"Add extension according to the parsed query params of download url",
workflow_run_id=workflow_run_id,
task_id=task_id,
filename=value,
)
file_path.rename(str(file_path) + suffix)
return
suffix = Path(parsed_url.path).suffix
2024-11-06 10:15:47 +08:00
if suffix:
LOG.info(
"Add extension according to download url path",
2024-11-06 10:15:47 +08:00
workflow_run_id=workflow_run_id,
task_id=task_id,
filepath=str(file_path) + suffix,
)
file_path.rename(str(file_path) + suffix)
return
# TODO: maybe should try to parse it from URL response
except asyncio.TimeoutError:
LOG.error(
"timeout to download file, going to cancel the download",
workflow_run_id=workflow_run_id,
task_id=task_id,
)
await download.cancel()
except Exception:
LOG.exception(
"Failed to add file extension name to downloaded file",
workflow_run_id=workflow_run_id,
task_id=task_id,
)
def listen_to_new_page(page: Page) -> None:
page.on("download", listen_to_download)
browser_context.on("page", listen_to_new_page)
def initialize_download_dir() -> str:
context = ensure_context()
2025-08-18 14:24:18 +08:00
return get_download_dir(
context.run_id if context and context.run_id else context.workflow_run_id or context.task_id
)
async def _apply_download_behaviour(browser: Browser) -> None:
context = ensure_context()
download_dir = get_download_dir(
context.run_id if context and context.run_id else context.workflow_run_id or context.task_id
)
cdp_session = await browser.new_browser_cdp_session()
await cdp_session.send(
"Browser.setDownloadBehavior",
{
"behavior": "allow",
"downloadPath": download_dir,
},
)
LOG.info("setDownloadBehavior applied", download_dir=download_dir)
class BrowserContextCreator(Protocol):
def __call__(
2024-12-16 11:22:51 +08:00
self, playwright: Playwright, proxy_location: ProxyLocation | None = None, **kwargs: dict[str, Any]
) -> Awaitable[tuple[BrowserContext, BrowserArtifacts, BrowserCleanupFunc]]: ...
class BrowserContextFactory:
_creators: dict[str, BrowserContextCreator] = {}
2024-06-17 11:22:36 +08:00
_validator: Callable[[Page], Awaitable[bool]] | None = None
@staticmethod
def get_subdir() -> str:
curr_context = current()
if curr_context and curr_context.task_id:
return curr_context.task_id
elif curr_context and curr_context.request_id:
return curr_context.request_id
return str(uuid.uuid4())
@staticmethod
def update_chromium_browser_preferences(user_data_dir: str, download_dir: str) -> None:
preference_dst_folder = f"{user_data_dir}/Default"
os.makedirs(preference_dst_folder, exist_ok=True)
preference_dst_file = f"{preference_dst_folder}/Preferences"
preference_template = f"{SKYVERN_DIR}/webeye/chromium_preferences.json"
preference_file_content = ""
with open(preference_template) as f:
preference_file_content = f.read()
preference_file_content = preference_file_content.replace("MASK_SAVEFILE_DEFAULT_DIRECTORY", download_dir)
preference_file_content = preference_file_content.replace("MASK_DOWNLOAD_DEFAULT_DIRECTORY", download_dir)
with open(preference_dst_file, "w") as f:
f.write(preference_file_content)
@staticmethod
def build_browser_args(
proxy_location: ProxyLocation | None = None,
cdp_port: int | None = None,
extra_http_headers: dict[str, str] | None = None,
) -> dict[str, Any]:
video_dir = f"{settings.VIDEO_PATH}/{datetime.utcnow().strftime('%Y-%m-%d')}"
har_dir = (
f"{settings.HAR_PATH}/{datetime.utcnow().strftime('%Y-%m-%d')}/{BrowserContextFactory.get_subdir()}.har"
)
extension_paths = []
if settings.EXTENSIONS and settings.EXTENSIONS_BASE_PATH:
try:
os.makedirs(settings.EXTENSIONS_BASE_PATH, exist_ok=True)
extension_paths = [str(Path(settings.EXTENSIONS_BASE_PATH) / ext) for ext in settings.EXTENSIONS]
LOG.info("Extensions paths constructed", extension_paths=extension_paths)
except Exception as e:
LOG.error("Error constructing extension paths", error=str(e))
browser_args = [
"--disable-blink-features=AutomationControlled",
"--disk-cache-size=1",
"--start-maximized",
"--kiosk-printing",
]
if cdp_port:
browser_args.append(f"--remote-debugging-port={cdp_port}")
if extension_paths:
joined_paths = ",".join(extension_paths)
browser_args.extend([f"--disable-extensions-except={joined_paths}", f"--load-extension={joined_paths}"])
LOG.info("Extensions added to browser args", extensions=joined_paths)
2024-12-16 11:22:51 +08:00
args = {
"locale": settings.BROWSER_LOCALE,
2024-08-28 01:03:22 +08:00
"color_scheme": "no-preference",
"args": browser_args,
"ignore_default_args": [
"--enable-automation",
],
"record_har_path": har_dir,
"record_video_dir": video_dir,
"viewport": {
"width": settings.BROWSER_WIDTH,
"height": settings.BROWSER_HEIGHT,
},
"extra_http_headers": extra_http_headers,
}
if settings.ENABLE_PROXY:
proxy_config = setup_proxy()
if proxy_config:
args["proxy"] = proxy_config
2024-12-16 11:22:51 +08:00
if proxy_location:
if tz_info := get_tzinfo_from_proxy(proxy_location=proxy_location):
args["timezone_id"] = tz_info.key
return args
@staticmethod
def build_browser_artifacts(
2024-08-09 10:46:52 +08:00
video_artifacts: list[VideoArtifact] | None = None,
har_path: str | None = None,
traces_dir: str | None = None,
browser_session_dir: str | None = None,
browser_console_log_path: str | None = None,
) -> BrowserArtifacts:
return BrowserArtifacts(
2024-08-09 10:46:52 +08:00
video_artifacts=video_artifacts or [],
har_path=har_path,
traces_dir=traces_dir,
browser_session_dir=browser_session_dir,
browser_console_log_path=browser_console_log_path,
)
@classmethod
def register_type(cls, browser_type: str, creator: BrowserContextCreator) -> None:
cls._creators[browser_type] = creator
@classmethod
async def create_browser_context(
cls, playwright: Playwright, **kwargs: Any
) -> tuple[BrowserContext, BrowserArtifacts, BrowserCleanupFunc]:
browser_type = settings.BROWSER_TYPE
2024-11-01 15:13:41 -07:00
browser_context: BrowserContext | None = None
try:
creator = cls._creators.get(browser_type)
if not creator:
raise UnknownBrowserType(browser_type)
2024-07-26 18:10:42 +08:00
browser_context, browser_artifacts, cleanup_func = await creator(playwright, **kwargs)
2025-11-11 09:32:49 -07:00
if settings.BROWSER_LOGS_ENABLED:
set_browser_console_log(browser_context=browser_context, browser_artifacts=browser_artifacts)
2024-11-06 10:15:47 +08:00
set_download_file_listener(browser_context=browser_context, **kwargs)
2024-12-16 11:22:51 +08:00
proxy_location: ProxyLocation | None = kwargs.get("proxy_location")
if proxy_location is not None:
context = ensure_context()
context.tz_info = get_tzinfo_from_proxy(proxy_location)
2024-07-26 18:10:42 +08:00
return browser_context, browser_artifacts, cleanup_func
except Exception as e:
2024-11-01 15:13:41 -07:00
if browser_context is not None:
# FIXME: sometimes it can't close the browser context?
LOG.error("unexpected error happens after created browser context, going to close the context")
await browser_context.close()
if isinstance(e, UnknownBrowserType):
raise e
raise UnknownErrorWhileCreatingBrowserContext(browser_type, e) from e
def setup_proxy() -> dict | None:
if not settings.HOSTED_PROXY_POOL or settings.HOSTED_PROXY_POOL.strip() == "":
LOG.warning("No proxy server value found. Continuing without using proxy...")
return None
proxy_servers = [server.strip() for server in settings.HOSTED_PROXY_POOL.split(",") if server.strip()]
if not proxy_servers:
LOG.warning("Proxy pool contains only empty values. Continuing without proxy...")
return None
valid_proxies = []
for proxy in proxy_servers:
if _is_valid_proxy_url(proxy):
valid_proxies.append(proxy)
else:
LOG.warning(f"Invalid proxy URL format: {proxy}")
if not valid_proxies:
LOG.warning("No valid proxy URLs found. Continuing without proxy...")
return None
try:
proxy_server = random.choice(valid_proxies)
proxy_creds = _get_proxy_server_creds(proxy_server)
LOG.info("Found proxy server creds, using them...")
return {
"server": proxy_server,
"username": proxy_creds.get("username", ""),
"password": proxy_creds.get("password", ""),
}
except Exception as e:
LOG.warning(f"Error setting up proxy: {e}. Continuing without proxy...")
return None
def _is_valid_proxy_url(url: str) -> bool:
PROXY_PATTERN = re.compile(r"^(http|https|socks5):\/\/([^:@]+(:[^@]*)?@)?[^\s:\/]+(:\d+)?$")
try:
parsed = urlparse(url)
if not parsed.scheme or not parsed.netloc:
return False
return bool(PROXY_PATTERN.match(url))
except Exception:
return False
def _get_proxy_server_creds(proxy: str) -> dict:
parsed_url = urlparse(proxy)
if parsed_url.username and parsed_url.password:
return {"username": parsed_url.username, "password": parsed_url.password}
LOG.warning("No credentials found in the proxy URL.")
return {}
def _get_cdp_port(kwargs: dict) -> int | None:
raw_cdp_port = kwargs.get("cdp_port")
if isinstance(raw_cdp_port, (int, str)):
try:
return int(raw_cdp_port)
except (ValueError, TypeError):
return None
return None
def _is_port_in_use(port: int) -> bool:
"""Check if a port is already in use."""
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
try:
s.bind(("localhost", port))
return False
except OSError:
return True
def _is_chrome_running() -> bool:
"""Check if Chrome is already running."""
chrome_process_names = ["chrome", "google-chrome", "google chrome"]
for proc in psutil.process_iter(["name"]):
try:
proc_name = proc.info["name"].lower()
if proc_name == "chrome_crashpad_handler":
continue
if any(chrome_name in proc_name for chrome_name in chrome_process_names):
return True
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
pass
return False
async def _create_headless_chromium(
playwright: Playwright,
proxy_location: ProxyLocation | None = None,
extra_http_headers: dict[str, str] | None = None,
**kwargs: dict,
) -> tuple[BrowserContext, BrowserArtifacts, BrowserCleanupFunc]:
2025-10-17 13:15:24 -06:00
if browser_address := kwargs.get("browser_address"):
return await _connect_to_cdp_browser(
playwright,
remote_browser_url=str(browser_address),
extra_http_headers=extra_http_headers,
apply_download_behaviour=True,
2025-10-17 13:15:24 -06:00
)
# Check for browser_profile_id and load from storage if available
browser_profile_id = cast(str | None, kwargs.get("browser_profile_id"))
organization_id_for_profile = cast(str | None, kwargs.get("organization_id"))
user_data_dir: str | None = None
if browser_profile_id and organization_id_for_profile:
profile_dir = await app.STORAGE.retrieve_browser_profile(
organization_id=organization_id_for_profile,
profile_id=browser_profile_id,
)
if profile_dir:
user_data_dir = profile_dir
LOG.info(
"Using browser profile",
browser_profile_id=browser_profile_id,
profile_dir=profile_dir,
)
else:
LOG.warning(
"Browser profile not found, using temp directory",
browser_profile_id=browser_profile_id,
organization_id=organization_id_for_profile,
)
if not user_data_dir:
user_data_dir = make_temp_directory(prefix="skyvern_browser_")
download_dir = initialize_download_dir()
BrowserContextFactory.update_chromium_browser_preferences(
user_data_dir=user_data_dir,
download_dir=download_dir,
)
cdp_port: int | None = _get_cdp_port(kwargs)
browser_args = BrowserContextFactory.build_browser_args(
proxy_location=proxy_location, cdp_port=cdp_port, extra_http_headers=extra_http_headers
)
browser_args.update(
{
"user_data_dir": user_data_dir,
"downloads_path": download_dir,
}
)
browser_artifacts = BrowserContextFactory.build_browser_artifacts(
har_path=browser_args["record_har_path"],
browser_session_dir=user_data_dir,
)
browser_context = await playwright.chromium.launch_persistent_context(**browser_args)
return browser_context, browser_artifacts, None
async def _create_headful_chromium(
playwright: Playwright,
proxy_location: ProxyLocation | None = None,
extra_http_headers: dict[str, str] | None = None,
**kwargs: dict,
) -> tuple[BrowserContext, BrowserArtifacts, BrowserCleanupFunc]:
2025-10-17 13:15:24 -06:00
if browser_address := kwargs.get("browser_address"):
return await _connect_to_cdp_browser(
playwright,
remote_browser_url=str(browser_address),
extra_http_headers=extra_http_headers,
apply_download_behaviour=True,
2025-10-17 13:15:24 -06:00
)
# Check for browser_profile_id and load from storage if available
browser_profile_id = cast(str | None, kwargs.get("browser_profile_id"))
organization_id_for_profile = cast(str | None, kwargs.get("organization_id"))
user_data_dir: str | None = None
if browser_profile_id and organization_id_for_profile:
profile_dir = await app.STORAGE.retrieve_browser_profile(
organization_id=organization_id_for_profile,
profile_id=browser_profile_id,
)
if profile_dir:
user_data_dir = profile_dir
LOG.info(
"Using browser profile",
browser_profile_id=browser_profile_id,
profile_dir=profile_dir,
)
else:
LOG.warning(
"Browser profile not found, using temp directory",
browser_profile_id=browser_profile_id,
organization_id=organization_id_for_profile,
)
if not user_data_dir:
user_data_dir = make_temp_directory(prefix="skyvern_browser_")
download_dir = initialize_download_dir()
BrowserContextFactory.update_chromium_browser_preferences(
user_data_dir=user_data_dir,
download_dir=download_dir,
)
cdp_port: int | None = _get_cdp_port(kwargs)
browser_args = BrowserContextFactory.build_browser_args(
proxy_location=proxy_location, cdp_port=cdp_port, extra_http_headers=extra_http_headers
)
browser_args.update(
{
"user_data_dir": user_data_dir,
"downloads_path": download_dir,
"headless": False,
}
)
browser_artifacts = BrowserContextFactory.build_browser_artifacts(
har_path=browser_args["record_har_path"],
browser_session_dir=user_data_dir,
)
browser_context = await playwright.chromium.launch_persistent_context(**browser_args)
return browser_context, browser_artifacts, None
def default_user_data_dir() -> pathlib.Path:
p = platform.system()
if p == "Darwin":
return pathlib.Path("~/Library/Application Support/Google/Chrome").expanduser()
if p == "Windows":
return pathlib.Path(os.environ["LOCALAPPDATA"]) / "Google" / "Chrome" / "User Data"
# Assume Linux/Unix
return pathlib.Path("~/.config/google-chrome").expanduser()
def is_valid_chromium_user_data_dir(directory: str) -> bool:
"""Check if a directory is a valid Chromium user data directory.
A valid Chromium user data directory should:
1. Exist
2. Not be empty
3. Contain a 'Default' directory
4. Have a 'Preferences' file in the 'Default' directory
"""
if not os.path.exists(directory):
return False
default_dir = os.path.join(directory, "Default")
preferences_file = os.path.join(default_dir, "Preferences")
return os.path.isdir(directory) and os.path.isdir(default_dir) and os.path.isfile(preferences_file)
async def _create_cdp_connection_browser(
playwright: Playwright,
proxy_location: ProxyLocation | None = None,
extra_http_headers: dict[str, str] | None = None,
**kwargs: dict,
) -> tuple[BrowserContext, BrowserArtifacts, BrowserCleanupFunc]:
2025-10-17 13:15:24 -06:00
if browser_address := kwargs.get("browser_address"):
return await _connect_to_cdp_browser(
playwright,
remote_browser_url=str(browser_address),
extra_http_headers=extra_http_headers,
apply_download_behaviour=True,
2025-10-17 13:15:24 -06:00
)
browser_type = settings.BROWSER_TYPE
browser_path = settings.CHROME_EXECUTABLE_PATH
if browser_type == "cdp-connect" and browser_path:
LOG.info("Local browser path is given. Connecting to local browser with CDP", browser_path=browser_path)
# First check if the debugging port is running and can be used
if not _is_port_in_use(9222):
LOG.info("Port 9222 is not in use, starting Chrome", browser_path=browser_path)
# Check if Chrome is already running
if _is_chrome_running():
raise Exception(
"Chrome is already running. Please close all Chrome instances before starting with remote debugging."
)
# check if ./tmp/user_data_dir exists and if it's a valid Chromium user data directory
try:
if os.path.exists("./tmp/user_data_dir") and not is_valid_chromium_user_data_dir("./tmp/user_data_dir"):
LOG.info("Removing invalid user data directory")
shutil.rmtree("./tmp/user_data_dir")
shutil.copytree(default_user_data_dir(), "./tmp/user_data_dir")
elif not os.path.exists("./tmp/user_data_dir"):
LOG.info("Copying default user data directory")
shutil.copytree(default_user_data_dir(), "./tmp/user_data_dir")
else:
LOG.info("User data directory is valid")
except FileExistsError:
# If directory exists, remove it first then copy
shutil.rmtree("./tmp/user_data_dir")
shutil.copytree(default_user_data_dir(), "./tmp/user_data_dir")
browser_process = subprocess.Popen(
[
browser_path,
"--remote-debugging-port=9222",
"--no-first-run",
"--no-default-browser-check",
"--remote-debugging-address=0.0.0.0",
"--user-data-dir=./tmp/user_data_dir",
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
# Add small delay to allow browser to start
time.sleep(1)
if browser_process.poll() is not None:
raise Exception(f"Failed to open browser. browser_path: {browser_path}")
else:
LOG.info("Port 9222 is in use, using existing browser")
2025-10-17 13:15:24 -06:00
return await _connect_to_cdp_browser(playwright, settings.BROWSER_REMOTE_DEBUGGING_URL, extra_http_headers)
async def _connect_to_cdp_browser(
playwright: Playwright,
remote_browser_url: str,
extra_http_headers: dict[str, str] | None = None,
apply_download_behaviour: bool = False,
2025-10-17 13:15:24 -06:00
) -> tuple[BrowserContext, BrowserArtifacts, BrowserCleanupFunc]:
browser_args = BrowserContextFactory.build_browser_args(extra_http_headers=extra_http_headers)
browser_artifacts = BrowserContextFactory.build_browser_artifacts(
har_path=browser_args["record_har_path"],
)
LOG.info("Connecting browser CDP connection", remote_browser_url=remote_browser_url)
browser = await playwright.chromium.connect_over_cdp(remote_browser_url)
if apply_download_behaviour:
await _apply_download_behaviour(browser)
contexts = browser.contexts
browser_context = None
if contexts:
# Use the first existing context if available
LOG.info("Using existing browser context")
browser_context = contexts[0]
else:
browser_context = await browser.new_context(
record_video_dir=browser_args["record_video_dir"],
viewport=browser_args["viewport"],
extra_http_headers=browser_args["extra_http_headers"],
)
LOG.info(
"Launched browser CDP connection",
remote_browser_url=remote_browser_url,
)
return browser_context, browser_artifacts, None
BrowserContextFactory.register_type("chromium-headless", _create_headless_chromium)
BrowserContextFactory.register_type("chromium-headful", _create_headful_chromium)
BrowserContextFactory.register_type("cdp-connect", _create_cdp_connection_browser)