From cf08ca951e6e6a44842b8656fc64d6bcc2834e4e Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Wed, 28 May 2025 22:41:06 -0700 Subject: [PATCH] Fix chrome user data dir problem (#2503) --- README.md | 58 +++++++++++++--- fern/getting-started/quickstart.mdx | 8 ++- fern/running-tasks/run-tasks.mdx | 104 +++++++++++++++++++++++++++- pyproject.toml | 2 +- skyvern/cli/llm_setup.py | 1 + skyvern/cli/quickstart.py | 36 +--------- skyvern/cli/run_commands.py | 8 +++ skyvern/cli/utils.py | 40 +++++++++++ skyvern/library/skyvern.py | 12 ++-- skyvern/webeye/browser_factory.py | 96 ++++++++++++++++++------- skyvern/webeye/scraper/scraper.py | 3 +- 11 files changed, 283 insertions(+), 85 deletions(-) create mode 100644 skyvern/cli/utils.py diff --git a/README.md b/README.md index 56492878..a31a1cd8 100644 --- a/README.md +++ b/README.md @@ -67,9 +67,9 @@ skyvern = Skyvern() task = await skyvern.run_task(prompt="Find the top post on hackernews today") print(task) ``` -A browser will pop up. Skyvern will start executing the task in the browser and close the it when the task is done. You will be able to review the task from http://localhost:8080/history +Skyvern starts running the task in a browser that pops up and closes it when the task is done. You will be able to review the task from http://localhost:8080/history -You can also run a task autonomously on Skyvern Cloud: +You can also run a task on Skyvern Cloud: ```python from skyvern import Skyvern @@ -78,28 +78,65 @@ task = await skyvern.run_task(prompt="Find the top post on hackernews today") print(task) ``` -Or any hosted Skyvern service: +Or your local Skyvern service from step 2: ```python -skyvern = Skyvern(base_url="http://localhost:8000", api_key="SKYVERN API KEY") +skyvern = Skyvern(base_url="http://localhost:8000", api_key="LOCAL SKYVERN API KEY") task = await skyvern.run_task(prompt="Find the top post on hackernews today") print(task) ``` Check out more features to use for Skyvern task in our [official doc](https://docs.skyvern.com/running-tasks/run-tasks). Here are a couple of interesting examples: -#### Let Skyvern control your own browser -Firstly, add two variables to your .env file: +#### Control your own browser (Chrome) +> āš ļø WARNING: Since [Chrome 136](https://developer.chrome.com/blog/remote-debugging-port), Chrome refuses any CDP connect to the browser using the default user_data_dir. In order to use your browser data, Skyvern copies your default user_data_dir to `./tmp/user_data_dir` the first time connecting to your local browser. āš ļø + +1. Just With Python Code +```python +from skyvern import Skyvern + +# The path to your Chrome browser. This example path is for Mac. +browser_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" +skyvern = Skyvern( + base_url="http://localhost:8000", + api_key="YOUR_API_KEY", + browser_path=browser_path, +) +task = await skyvern.run_task( + prompt="Find the top post on hackernews today", +) ``` -# This is the path to your local chromium-compatible browser. We're using Google Chrome in Mac as an example + +2. With Skyvern Service + +Add two variables to your .env file: +```bash +# The path to your Chrome browser. This example path is for Mac. CHROME_EXECUTABLE_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" BROWSER_TYPE=cdp-connect ``` -Secondly, make sure you quit your browser (Skyvern will restart it) and run the task: +Restart Skyvern service `skyvern run all` and run the task through UI or code: ```python from skyvern import Skyvern -skyvern = Skyvern() -task = await skyvern.run_task(prompt="Find the top post on hackernews today") +skyvern = Skyvern( + base_url="http://localhost:8000", + api_key="YOUR_API_KEY", +) +task = await skyvern.run_task( + prompt="Find the top post on hackernews today", +) +``` + +#### Run Skyvern with any remote browser +Grab the cdp connection url and pass it to Skyvern + +```python +from skyvern import Skyvern + +skyvern = Skyvern(cdp_url="your cdp connection url") +task = await skyvern.run_task( + prompt="Find the top post on hackernews today", +) ``` #### Get consistent output schema from your run @@ -130,7 +167,6 @@ task = await skyvern.run_task( ) ``` - ### Helpful commands to debug issues **Launch the Skyvern Server Separately** diff --git a/fern/getting-started/quickstart.mdx b/fern/getting-started/quickstart.mdx index 993435ef..2a12a183 100644 --- a/fern/getting-started/quickstart.mdx +++ b/fern/getting-started/quickstart.mdx @@ -38,9 +38,9 @@ print(task) More API & SDK information can be found in the [API Reference](/api-reference) section. ### Run Task Locally -You can also run browser tasks locally in your Python code, though it takes a bit more effort to set up the environment: +You can also run browser tasks locally with Python code, with a little bit of set up: -1. **Configure Skyvern** Run the setup wizard which will guide you through the configuration process, including Skyvern [MCP](/integrations/mcp) integration. This will generate a `.env` as the configuration settings file. +1. **Configure Skyvern** Run the setup wizard which will guide you through the configuration process. This will generate a `.env` as the configuration settings file. ```bash skyvern init ``` @@ -51,7 +51,9 @@ You can also run browser tasks locally in your Python code, though it takes a bi skyvern = Skyvern() - task = await skyvern.run_task(prompt="Find the top post on hackernews today") + task = await skyvern.run_task( + prompt="Find the top post on hackernews today", + ) print(task.model_dump()) ``` A local browser will pop up. Skyvern will start executing the task in the browser and close the browser when the task is done. diff --git a/fern/running-tasks/run-tasks.mdx b/fern/running-tasks/run-tasks.mdx index e2cf1f69..3615cb1a 100644 --- a/fern/running-tasks/run-tasks.mdx +++ b/fern/running-tasks/run-tasks.mdx @@ -1,9 +1,8 @@ --- -title: Task Features +title: Run Tasks slug: running-tasks/run-tasks --- -## Run A Task - [Quickstart](/getting-started/quickstart) to run a task. - [Run Task API](/api-reference/api-reference/agent/run-task) @@ -13,6 +12,8 @@ Every feature in this page is enabled through API & SDK. Some features are enabl Configure advanced settings in the UI +## Parameters + ### [Engine](/api-reference/api-reference/agent/run-task#request.body.engine) Parameter: `engine` @@ -75,4 +76,101 @@ Parameter: `browser_session_id` You can set a browser session for a task. Having a browser session persist the real-time state of the browser, so that the next run can continue from where the previous run left off. -See the [Browser Sessions](/browser-sessions/introduction) section to see how to create a browser session. \ No newline at end of file +See the [Browser Sessions](/browser-sessions/introduction) section to see how to create a browser session. + +## Use Cases +### Control Your Own Browser (Chrome) +Since [Chrome 136](https://developer.chrome.com/blog/remote-debugging-port), Chrome refuses any CDP connect to the browser using the default user_data_dir. In order to use your browser data, Skyvern copies your default user_data_dir to `./tmp/user_data_dir` the first time connecting to your local browser. + +**Just With Python Code** +```python +from skyvern import Skyvern + +# The path to your Chrome browser. This example path is for Mac. +browser_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" +skyvern = Skyvern( + base_url="http://localhost:8000", + api_key="YOUR_API_KEY", + browser_path=browser_path, +) +task = await skyvern.run_task( + prompt="Find the top post on hackernews today", +) +``` + +**With Skyvern Service** +```bash +# The path to your Chrome browser. This example path is for Mac. +CHROME_EXECUTABLE_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" +BROWSER_TYPE=cdp-connect +``` + +Restart your Skyvern service `skyvern run server` and run the task through UI or code: +```python +from skyvern import Skyvern + +skyvern = Skyvern( + base_url="http://localhost:8000", + api_key="YOUR_API_KEY", +) +task = await skyvern.run_task( + prompt="Find the top post on hackernews today", +) +``` + +### Get Consistent Output Schema +You can do it by adding the `data_extraction_schema` parameter to your task. + +For example, if you want to get the title, URL, and points of the top post on Hacker News today, you can add the following to your task: +```python +from skyvern import Skyvern + +skyvern = Skyvern() +task = await skyvern.run_task( + prompt="Find the top post on hackernews today", + data_extraction_schema={ + "type": "object", + "properties": { + "title": { + "type": "string", + "description": "The title of the top post" + }, + "url": { + "type": "string", + "description": "The URL of the top post" + }, + "points": { + "type": "integer", + "description": "Number of points the post has received" + } + } + } +) +``` + +### Wait Until Task Is Done +When you are sending a run task request the Skyvern service, you can set the `wait_for_completion` to `True` and wait until the task is done. +```python +from skyvern import Skyvern + +skyvern = Skyvern() +task = await skyvern.run_task( + prompt="Find the top post on hackernews today", + # the request will be hanging until the task is done + wait_for_completion=True, +) +print(task.output) +``` + +### Send Run Result To Your Webhook +Instead of waiting, you can also set the `webhook_url` in the run task request and get the result in your webhook whenever it's done. +```python +from skyvern import Skyvern + +skyvern = Skyvern() +task = await skyvern.run_task( + prompt="Find the top post on hackernews today", + webhook_url="https://your-webhook-url.com", +) +``` +You can also use the [GET RUN API](/api-reference/api-reference/agent/get-run) to get the current status of the task. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 4673419d..45f5f0ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "skyvern" -version = "0.1.88" +version = "0.1.89" description = "" authors = ["Skyvern AI "] readme = "README.md" diff --git a/skyvern/cli/llm_setup.py b/skyvern/cli/llm_setup.py index 52c28b0c..64b39a50 100644 --- a/skyvern/cli/llm_setup.py +++ b/skyvern/cli/llm_setup.py @@ -40,6 +40,7 @@ def update_or_add_env_var(key: str, value: str) -> None: "BROWSER_ACTION_TIMEOUT_MS": "5000", "MAX_STEPS_PER_RUN": "50", "LOG_LEVEL": "INFO", + "LITELLM_LOG": "CRITICAL", "DATABASE_STRING": "postgresql+psycopg://skyvern@localhost/skyvern", "PORT": "8000", "ANALYTICS_ID": "anonymous", diff --git a/skyvern/cli/quickstart.py b/skyvern/cli/quickstart.py index efa9fe1b..e026cba2 100644 --- a/skyvern/cli/quickstart.py +++ b/skyvern/cli/quickstart.py @@ -2,7 +2,6 @@ import asyncio import subprocess -import sys import typer from rich.panel import Panel @@ -11,6 +10,7 @@ from rich.progress import Progress, SpinnerColumn, TextColumn # Import console after skyvern.cli to ensure proper initialization from skyvern.cli.console import console from skyvern.cli.init_command import init # init is used directly +from skyvern.cli.utils import start_services quickstart_app = typer.Typer(help="Quickstart command to set up and run Skyvern with one command.") @@ -29,40 +29,6 @@ def check_docker() -> bool: return False -async def start_services(server_only: bool = False) -> None: - """Start Skyvern services in the background. - - Args: - server_only: If True, only start the server, not the UI. - """ - try: - # Start server in the background - server_process = await asyncio.create_subprocess_exec( - sys.executable, "-m", "skyvern.cli.commands", "run", "server" - ) - - # Give server a moment to start - await asyncio.sleep(2) - - if not server_only: - # Start UI in the background - ui_process = await asyncio.create_subprocess_exec(sys.executable, "-m", "skyvern.cli.commands", "run", "ui") - - console.print("\nšŸŽ‰ [bold green]Skyvern is now running![/bold green]") - console.print("🌐 [bold]Access the UI at:[/bold] [cyan]http://localhost:8080[/cyan]") - console.print("šŸ”‘ [bold]Your API key is in your .env file as SKYVERN_API_KEY[/bold]") - - # Wait for processes to complete (they won't unless killed) - if not server_only: - await asyncio.gather(server_process.wait(), ui_process.wait()) - else: - await server_process.wait() - - except Exception as e: - console.print(f"[bold red]Error starting services: {str(e)}[/bold red]") - raise typer.Exit(1) - - @quickstart_app.callback(invoke_without_command=True) def quickstart( ctx: typer.Context, diff --git a/skyvern/cli/run_commands.py b/skyvern/cli/run_commands.py index fa54fec8..5abdd0ac 100644 --- a/skyvern/cli/run_commands.py +++ b/skyvern/cli/run_commands.py @@ -1,3 +1,4 @@ +import asyncio import os import shutil import subprocess @@ -12,6 +13,7 @@ from mcp.server.fastmcp import FastMCP from rich.panel import Panel from rich.prompt import Confirm +from skyvern.cli.utils import start_services from skyvern.config import settings from skyvern.library.skyvern import Skyvern from skyvern.utils import detect_os @@ -153,6 +155,12 @@ def run_ui() -> None: return +@run_app.command(name="all") +def run_all() -> None: + """Run the Skyvern API server and UI server in parallel.""" + asyncio.run(start_services()) + + @run_app.command(name="mcp") def run_mcp() -> None: """Run the MCP server.""" diff --git a/skyvern/cli/utils.py b/skyvern/cli/utils.py new file mode 100644 index 00000000..546285a4 --- /dev/null +++ b/skyvern/cli/utils.py @@ -0,0 +1,40 @@ +import asyncio +import sys + +import typer + +from skyvern.cli.console import console + + +async def start_services(server_only: bool = False) -> None: + """Start Skyvern services in the background. + + Args: + server_only: If True, only start the server, not the UI. + """ + try: + # Start server in the background + server_process = await asyncio.create_subprocess_exec( + sys.executable, "-m", "skyvern.cli.commands", "run", "server" + ) + + # Give server a moment to start + await asyncio.sleep(2) + + if not server_only: + # Start UI in the background + ui_process = await asyncio.create_subprocess_exec(sys.executable, "-m", "skyvern.cli.commands", "run", "ui") + + console.print("\nšŸŽ‰ [bold green]Skyvern is now running![/bold green]") + console.print("🌐 [bold]Access the UI at:[/bold] [cyan]http://localhost:8080[/cyan]") + console.print("šŸ”‘ [bold]Your API key is in your .env file as SKYVERN_API_KEY[/bold]") + + # Wait for processes to complete (they won't unless killed) + if not server_only: + await asyncio.gather(server_process.wait(), ui_process.wait()) + else: + await server_process.wait() + + except Exception as e: + console.print(f"[bold red]Error starting services: {str(e)}[/bold red]") + raise typer.Exit(1) diff --git a/skyvern/library/skyvern.py b/skyvern/library/skyvern.py index c1cad06a..c8e7fbbf 100644 --- a/skyvern/library/skyvern.py +++ b/skyvern/library/skyvern.py @@ -1,6 +1,5 @@ import asyncio import os -import subprocess import typing from typing import Any @@ -66,20 +65,19 @@ class Skyvern(AsyncSkyvern): # TODO validate browser_path # Supported Browsers: Google Chrome, Brave Browser, Microsoft Edge, Firefox if "Chrome" in browser_path or "Brave" in browser_path or "Edge" in browser_path: - browser_process = subprocess.Popen( - [browser_path, "--remote-debugging-port=9222"], stdout=subprocess.PIPE, stderr=subprocess.PIPE - ) - if browser_process.poll() is not None: - raise Exception(f"Failed to open browser. browser_path: {browser_path}") - self._cdp_url = "http://127.0.0.1:9222" settings.BROWSER_TYPE = "cdp-connect" settings.BROWSER_REMOTE_DEBUGGING_URL = self._cdp_url + settings.CHROME_EXECUTABLE_PATH = browser_path else: raise ValueError( f"Unsupported browser or invalid path: {browser_path}. " "Here's a list of supported browsers Skyvern can connect to: Google Chrome, Brave Browser, Microsoft Edge, Firefox." ) + elif cdp_url: + self._cdp_url = cdp_url + settings.BROWSER_TYPE = "cdp-connect" + settings.BROWSER_REMOTE_DEBUGGING_URL = self._cdp_url elif base_url is None and api_key is None: if not browser_type: # if "BROWSER_TYPE" not in os.environ: diff --git a/skyvern/webeye/browser_factory.py b/skyvern/webeye/browser_factory.py index ba7f3f4b..bba67b13 100644 --- a/skyvern/webeye/browser_factory.py +++ b/skyvern/webeye/browser_factory.py @@ -2,8 +2,11 @@ from __future__ import annotations import asyncio import os +import pathlib +import platform import random import re +import shutil import socket import subprocess import time @@ -447,6 +450,34 @@ async def _create_headful_chromium( return browser_context, browser_artifacts, None +def default_user_data_dir() -> pathlib.Path: + p = platform.system() + if p == "Darwin": + return pathlib.Path("~/Library/Application Support/Google/Chrome").expanduser() + if p == "Windows": + return pathlib.Path(os.environ["LOCALAPPDATA"]) / "Google" / "Chrome" / "User Data" + # Assume Linux/Unix + return pathlib.Path("~/.config/google-chrome").expanduser() + + +def is_valid_chromium_user_data_dir(directory: str) -> bool: + """Check if a directory is a valid Chromium user data directory. + + A valid Chromium user data directory should: + 1. Exist + 2. Not be empty + 3. Contain a 'Default' directory + 4. Have a 'Preferences' file in the 'Default' directory + """ + if not os.path.exists(directory): + return False + + default_dir = os.path.join(directory, "Default") + preferences_file = os.path.join(default_dir, "Preferences") + + return os.path.isdir(directory) and os.path.isdir(default_dir) and os.path.isfile(preferences_file) + + async def _create_cdp_connection_browser( playwright: Playwright, proxy_location: ProxyLocation | None = None, **kwargs: dict ) -> tuple[BrowserContext, BrowserArtifacts, BrowserCleanupFunc]: @@ -454,31 +485,48 @@ async def _create_cdp_connection_browser( browser_path = settings.CHROME_EXECUTABLE_PATH if browser_type == "cdp-connect" and browser_path: - # First check if Chrome is already running - if _is_chrome_running(): - raise Exception( - "Chrome is already running. Please close all Chrome instances before starting with remote debugging." + LOG.info("Local browser path is given. Connecting to local browser with CDP", browser_path=browser_path) + # First check if the debugging port is running and can be used + if not _is_port_in_use(9222): + LOG.info("Port 9222 is not in use, starting Chrome", browser_path=browser_path) + # Check if Chrome is already running + if _is_chrome_running(): + raise Exception( + "Chrome is already running. Please close all Chrome instances before starting with remote debugging." + ) + # check if ./tmp/user_data_dir exists and if it's a valid Chromium user data directory + try: + if os.path.exists("./tmp/user_data_dir") and not is_valid_chromium_user_data_dir("./tmp/user_data_dir"): + LOG.info("Removing invalid user data directory") + shutil.rmtree("./tmp/user_data_dir") + shutil.copytree(default_user_data_dir(), "./tmp/user_data_dir") + elif not os.path.exists("./tmp/user_data_dir"): + LOG.info("Copying default user data directory") + shutil.copytree(default_user_data_dir(), "./tmp/user_data_dir") + else: + LOG.info("User data directory is valid") + except FileExistsError: + # If directory exists, remove it first then copy + shutil.rmtree("./tmp/user_data_dir") + shutil.copytree(default_user_data_dir(), "./tmp/user_data_dir") + browser_process = subprocess.Popen( + [ + browser_path, + "--remote-debugging-port=9222", + "--no-first-run", + "--no-default-browser-check", + "--remote-debugging-address=0.0.0.0", + "--user-data-dir=./tmp/user_data_dir", + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, ) - - # Then check if the debugging port is already in use - if _is_port_in_use(9222): - raise Exception("Port 9222 is already in use. Another process may be using this port.") - - browser_process = subprocess.Popen( - [ - browser_path, - "--remote-debugging-port=9222", - "--no-first-run", - "--no-default-browser-check", - "--remote-debugging-address=0.0.0.0", - ], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - # Add small delay to allow browser to start - time.sleep(2) - if browser_process.poll() is not None: - raise Exception(f"Failed to open browser. browser_path: {browser_path}") + # Add small delay to allow browser to start + time.sleep(1) + if browser_process.poll() is not None: + raise Exception(f"Failed to open browser. browser_path: {browser_path}") + else: + LOG.info("Port 9222 is in use, using existing browser") browser_args = BrowserContextFactory.build_browser_args() diff --git a/skyvern/webeye/scraper/scraper.py b/skyvern/webeye/scraper/scraper.py index 1163d68d..fd885557 100644 --- a/skyvern/webeye/scraper/scraper.py +++ b/skyvern/webeye/scraper/scraper.py @@ -545,7 +545,8 @@ async def scrape_web_unsafe( try: skyvern_frame = await SkyvernFrame.create_instance(frame=page) html = await skyvern_frame.get_content() - window_dimension = Resolution(width=page.viewport_size["width"], height=page.viewport_size["height"]) + if page.viewport_size: + window_dimension = Resolution(width=page.viewport_size["width"], height=page.viewport_size["height"]) except Exception: LOG.error( "Failed out to get HTML content",