Fix chrome user data dir problem (#2503)
This commit is contained in:
@@ -40,6 +40,7 @@ def update_or_add_env_var(key: str, value: str) -> None:
|
||||
"BROWSER_ACTION_TIMEOUT_MS": "5000",
|
||||
"MAX_STEPS_PER_RUN": "50",
|
||||
"LOG_LEVEL": "INFO",
|
||||
"LITELLM_LOG": "CRITICAL",
|
||||
"DATABASE_STRING": "postgresql+psycopg://skyvern@localhost/skyvern",
|
||||
"PORT": "8000",
|
||||
"ANALYTICS_ID": "anonymous",
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
|
||||
import asyncio
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
import typer
|
||||
from rich.panel import Panel
|
||||
@@ -11,6 +10,7 @@ from rich.progress import Progress, SpinnerColumn, TextColumn
|
||||
# Import console after skyvern.cli to ensure proper initialization
|
||||
from skyvern.cli.console import console
|
||||
from skyvern.cli.init_command import init # init is used directly
|
||||
from skyvern.cli.utils import start_services
|
||||
|
||||
quickstart_app = typer.Typer(help="Quickstart command to set up and run Skyvern with one command.")
|
||||
|
||||
@@ -29,40 +29,6 @@ def check_docker() -> bool:
|
||||
return False
|
||||
|
||||
|
||||
async def start_services(server_only: bool = False) -> None:
|
||||
"""Start Skyvern services in the background.
|
||||
|
||||
Args:
|
||||
server_only: If True, only start the server, not the UI.
|
||||
"""
|
||||
try:
|
||||
# Start server in the background
|
||||
server_process = await asyncio.create_subprocess_exec(
|
||||
sys.executable, "-m", "skyvern.cli.commands", "run", "server"
|
||||
)
|
||||
|
||||
# Give server a moment to start
|
||||
await asyncio.sleep(2)
|
||||
|
||||
if not server_only:
|
||||
# Start UI in the background
|
||||
ui_process = await asyncio.create_subprocess_exec(sys.executable, "-m", "skyvern.cli.commands", "run", "ui")
|
||||
|
||||
console.print("\n🎉 [bold green]Skyvern is now running![/bold green]")
|
||||
console.print("🌐 [bold]Access the UI at:[/bold] [cyan]http://localhost:8080[/cyan]")
|
||||
console.print("🔑 [bold]Your API key is in your .env file as SKYVERN_API_KEY[/bold]")
|
||||
|
||||
# Wait for processes to complete (they won't unless killed)
|
||||
if not server_only:
|
||||
await asyncio.gather(server_process.wait(), ui_process.wait())
|
||||
else:
|
||||
await server_process.wait()
|
||||
|
||||
except Exception as e:
|
||||
console.print(f"[bold red]Error starting services: {str(e)}[/bold red]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
|
||||
@quickstart_app.callback(invoke_without_command=True)
|
||||
def quickstart(
|
||||
ctx: typer.Context,
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import asyncio
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
@@ -12,6 +13,7 @@ from mcp.server.fastmcp import FastMCP
|
||||
from rich.panel import Panel
|
||||
from rich.prompt import Confirm
|
||||
|
||||
from skyvern.cli.utils import start_services
|
||||
from skyvern.config import settings
|
||||
from skyvern.library.skyvern import Skyvern
|
||||
from skyvern.utils import detect_os
|
||||
@@ -153,6 +155,12 @@ def run_ui() -> None:
|
||||
return
|
||||
|
||||
|
||||
@run_app.command(name="all")
|
||||
def run_all() -> None:
|
||||
"""Run the Skyvern API server and UI server in parallel."""
|
||||
asyncio.run(start_services())
|
||||
|
||||
|
||||
@run_app.command(name="mcp")
|
||||
def run_mcp() -> None:
|
||||
"""Run the MCP server."""
|
||||
|
||||
40
skyvern/cli/utils.py
Normal file
40
skyvern/cli/utils.py
Normal file
@@ -0,0 +1,40 @@
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
import typer
|
||||
|
||||
from skyvern.cli.console import console
|
||||
|
||||
|
||||
async def start_services(server_only: bool = False) -> None:
|
||||
"""Start Skyvern services in the background.
|
||||
|
||||
Args:
|
||||
server_only: If True, only start the server, not the UI.
|
||||
"""
|
||||
try:
|
||||
# Start server in the background
|
||||
server_process = await asyncio.create_subprocess_exec(
|
||||
sys.executable, "-m", "skyvern.cli.commands", "run", "server"
|
||||
)
|
||||
|
||||
# Give server a moment to start
|
||||
await asyncio.sleep(2)
|
||||
|
||||
if not server_only:
|
||||
# Start UI in the background
|
||||
ui_process = await asyncio.create_subprocess_exec(sys.executable, "-m", "skyvern.cli.commands", "run", "ui")
|
||||
|
||||
console.print("\n🎉 [bold green]Skyvern is now running![/bold green]")
|
||||
console.print("🌐 [bold]Access the UI at:[/bold] [cyan]http://localhost:8080[/cyan]")
|
||||
console.print("🔑 [bold]Your API key is in your .env file as SKYVERN_API_KEY[/bold]")
|
||||
|
||||
# Wait for processes to complete (they won't unless killed)
|
||||
if not server_only:
|
||||
await asyncio.gather(server_process.wait(), ui_process.wait())
|
||||
else:
|
||||
await server_process.wait()
|
||||
|
||||
except Exception as e:
|
||||
console.print(f"[bold red]Error starting services: {str(e)}[/bold red]")
|
||||
raise typer.Exit(1)
|
||||
@@ -1,6 +1,5 @@
|
||||
import asyncio
|
||||
import os
|
||||
import subprocess
|
||||
import typing
|
||||
from typing import Any
|
||||
|
||||
@@ -66,20 +65,19 @@ class Skyvern(AsyncSkyvern):
|
||||
# TODO validate browser_path
|
||||
# Supported Browsers: Google Chrome, Brave Browser, Microsoft Edge, Firefox
|
||||
if "Chrome" in browser_path or "Brave" in browser_path or "Edge" in browser_path:
|
||||
browser_process = subprocess.Popen(
|
||||
[browser_path, "--remote-debugging-port=9222"], stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
||||
)
|
||||
if browser_process.poll() is not None:
|
||||
raise Exception(f"Failed to open browser. browser_path: {browser_path}")
|
||||
|
||||
self._cdp_url = "http://127.0.0.1:9222"
|
||||
settings.BROWSER_TYPE = "cdp-connect"
|
||||
settings.BROWSER_REMOTE_DEBUGGING_URL = self._cdp_url
|
||||
settings.CHROME_EXECUTABLE_PATH = browser_path
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unsupported browser or invalid path: {browser_path}. "
|
||||
"Here's a list of supported browsers Skyvern can connect to: Google Chrome, Brave Browser, Microsoft Edge, Firefox."
|
||||
)
|
||||
elif cdp_url:
|
||||
self._cdp_url = cdp_url
|
||||
settings.BROWSER_TYPE = "cdp-connect"
|
||||
settings.BROWSER_REMOTE_DEBUGGING_URL = self._cdp_url
|
||||
elif base_url is None and api_key is None:
|
||||
if not browser_type:
|
||||
# if "BROWSER_TYPE" not in os.environ:
|
||||
|
||||
@@ -2,8 +2,11 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import pathlib
|
||||
import platform
|
||||
import random
|
||||
import re
|
||||
import shutil
|
||||
import socket
|
||||
import subprocess
|
||||
import time
|
||||
@@ -447,6 +450,34 @@ async def _create_headful_chromium(
|
||||
return browser_context, browser_artifacts, None
|
||||
|
||||
|
||||
def default_user_data_dir() -> pathlib.Path:
|
||||
p = platform.system()
|
||||
if p == "Darwin":
|
||||
return pathlib.Path("~/Library/Application Support/Google/Chrome").expanduser()
|
||||
if p == "Windows":
|
||||
return pathlib.Path(os.environ["LOCALAPPDATA"]) / "Google" / "Chrome" / "User Data"
|
||||
# Assume Linux/Unix
|
||||
return pathlib.Path("~/.config/google-chrome").expanduser()
|
||||
|
||||
|
||||
def is_valid_chromium_user_data_dir(directory: str) -> bool:
|
||||
"""Check if a directory is a valid Chromium user data directory.
|
||||
|
||||
A valid Chromium user data directory should:
|
||||
1. Exist
|
||||
2. Not be empty
|
||||
3. Contain a 'Default' directory
|
||||
4. Have a 'Preferences' file in the 'Default' directory
|
||||
"""
|
||||
if not os.path.exists(directory):
|
||||
return False
|
||||
|
||||
default_dir = os.path.join(directory, "Default")
|
||||
preferences_file = os.path.join(default_dir, "Preferences")
|
||||
|
||||
return os.path.isdir(directory) and os.path.isdir(default_dir) and os.path.isfile(preferences_file)
|
||||
|
||||
|
||||
async def _create_cdp_connection_browser(
|
||||
playwright: Playwright, proxy_location: ProxyLocation | None = None, **kwargs: dict
|
||||
) -> tuple[BrowserContext, BrowserArtifacts, BrowserCleanupFunc]:
|
||||
@@ -454,31 +485,48 @@ async def _create_cdp_connection_browser(
|
||||
browser_path = settings.CHROME_EXECUTABLE_PATH
|
||||
|
||||
if browser_type == "cdp-connect" and browser_path:
|
||||
# First check if Chrome is already running
|
||||
if _is_chrome_running():
|
||||
raise Exception(
|
||||
"Chrome is already running. Please close all Chrome instances before starting with remote debugging."
|
||||
LOG.info("Local browser path is given. Connecting to local browser with CDP", browser_path=browser_path)
|
||||
# First check if the debugging port is running and can be used
|
||||
if not _is_port_in_use(9222):
|
||||
LOG.info("Port 9222 is not in use, starting Chrome", browser_path=browser_path)
|
||||
# Check if Chrome is already running
|
||||
if _is_chrome_running():
|
||||
raise Exception(
|
||||
"Chrome is already running. Please close all Chrome instances before starting with remote debugging."
|
||||
)
|
||||
# check if ./tmp/user_data_dir exists and if it's a valid Chromium user data directory
|
||||
try:
|
||||
if os.path.exists("./tmp/user_data_dir") and not is_valid_chromium_user_data_dir("./tmp/user_data_dir"):
|
||||
LOG.info("Removing invalid user data directory")
|
||||
shutil.rmtree("./tmp/user_data_dir")
|
||||
shutil.copytree(default_user_data_dir(), "./tmp/user_data_dir")
|
||||
elif not os.path.exists("./tmp/user_data_dir"):
|
||||
LOG.info("Copying default user data directory")
|
||||
shutil.copytree(default_user_data_dir(), "./tmp/user_data_dir")
|
||||
else:
|
||||
LOG.info("User data directory is valid")
|
||||
except FileExistsError:
|
||||
# If directory exists, remove it first then copy
|
||||
shutil.rmtree("./tmp/user_data_dir")
|
||||
shutil.copytree(default_user_data_dir(), "./tmp/user_data_dir")
|
||||
browser_process = subprocess.Popen(
|
||||
[
|
||||
browser_path,
|
||||
"--remote-debugging-port=9222",
|
||||
"--no-first-run",
|
||||
"--no-default-browser-check",
|
||||
"--remote-debugging-address=0.0.0.0",
|
||||
"--user-data-dir=./tmp/user_data_dir",
|
||||
],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
)
|
||||
|
||||
# Then check if the debugging port is already in use
|
||||
if _is_port_in_use(9222):
|
||||
raise Exception("Port 9222 is already in use. Another process may be using this port.")
|
||||
|
||||
browser_process = subprocess.Popen(
|
||||
[
|
||||
browser_path,
|
||||
"--remote-debugging-port=9222",
|
||||
"--no-first-run",
|
||||
"--no-default-browser-check",
|
||||
"--remote-debugging-address=0.0.0.0",
|
||||
],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
)
|
||||
# Add small delay to allow browser to start
|
||||
time.sleep(2)
|
||||
if browser_process.poll() is not None:
|
||||
raise Exception(f"Failed to open browser. browser_path: {browser_path}")
|
||||
# Add small delay to allow browser to start
|
||||
time.sleep(1)
|
||||
if browser_process.poll() is not None:
|
||||
raise Exception(f"Failed to open browser. browser_path: {browser_path}")
|
||||
else:
|
||||
LOG.info("Port 9222 is in use, using existing browser")
|
||||
|
||||
browser_args = BrowserContextFactory.build_browser_args()
|
||||
|
||||
|
||||
@@ -545,7 +545,8 @@ async def scrape_web_unsafe(
|
||||
try:
|
||||
skyvern_frame = await SkyvernFrame.create_instance(frame=page)
|
||||
html = await skyvern_frame.get_content()
|
||||
window_dimension = Resolution(width=page.viewport_size["width"], height=page.viewport_size["height"])
|
||||
if page.viewport_size:
|
||||
window_dimension = Resolution(width=page.viewport_size["width"], height=page.viewport_size["height"])
|
||||
except Exception:
|
||||
LOG.error(
|
||||
"Failed out to get HTML content",
|
||||
|
||||
Reference in New Issue
Block a user