Fix chrome user data dir problem (#2503)
This commit is contained in:
58
README.md
58
README.md
@@ -67,9 +67,9 @@ skyvern = Skyvern()
|
|||||||
task = await skyvern.run_task(prompt="Find the top post on hackernews today")
|
task = await skyvern.run_task(prompt="Find the top post on hackernews today")
|
||||||
print(task)
|
print(task)
|
||||||
```
|
```
|
||||||
A browser will pop up. Skyvern will start executing the task in the browser and close the it when the task is done. You will be able to review the task from http://localhost:8080/history
|
Skyvern starts running the task in a browser that pops up and closes it when the task is done. You will be able to review the task from http://localhost:8080/history
|
||||||
|
|
||||||
You can also run a task autonomously on Skyvern Cloud:
|
You can also run a task on Skyvern Cloud:
|
||||||
```python
|
```python
|
||||||
from skyvern import Skyvern
|
from skyvern import Skyvern
|
||||||
|
|
||||||
@@ -78,28 +78,65 @@ task = await skyvern.run_task(prompt="Find the top post on hackernews today")
|
|||||||
print(task)
|
print(task)
|
||||||
```
|
```
|
||||||
|
|
||||||
Or any hosted Skyvern service:
|
Or your local Skyvern service from step 2:
|
||||||
```python
|
```python
|
||||||
skyvern = Skyvern(base_url="http://localhost:8000", api_key="SKYVERN API KEY")
|
skyvern = Skyvern(base_url="http://localhost:8000", api_key="LOCAL SKYVERN API KEY")
|
||||||
task = await skyvern.run_task(prompt="Find the top post on hackernews today")
|
task = await skyvern.run_task(prompt="Find the top post on hackernews today")
|
||||||
print(task)
|
print(task)
|
||||||
```
|
```
|
||||||
|
|
||||||
Check out more features to use for Skyvern task in our [official doc](https://docs.skyvern.com/running-tasks/run-tasks). Here are a couple of interesting examples:
|
Check out more features to use for Skyvern task in our [official doc](https://docs.skyvern.com/running-tasks/run-tasks). Here are a couple of interesting examples:
|
||||||
#### Let Skyvern control your own browser
|
#### Control your own browser (Chrome)
|
||||||
Firstly, add two variables to your .env file:
|
> ⚠️ WARNING: Since [Chrome 136](https://developer.chrome.com/blog/remote-debugging-port), Chrome refuses any CDP connect to the browser using the default user_data_dir. In order to use your browser data, Skyvern copies your default user_data_dir to `./tmp/user_data_dir` the first time connecting to your local browser. ⚠️
|
||||||
|
|
||||||
|
1. Just With Python Code
|
||||||
|
```python
|
||||||
|
from skyvern import Skyvern
|
||||||
|
|
||||||
|
# The path to your Chrome browser. This example path is for Mac.
|
||||||
|
browser_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
|
||||||
|
skyvern = Skyvern(
|
||||||
|
base_url="http://localhost:8000",
|
||||||
|
api_key="YOUR_API_KEY",
|
||||||
|
browser_path=browser_path,
|
||||||
|
)
|
||||||
|
task = await skyvern.run_task(
|
||||||
|
prompt="Find the top post on hackernews today",
|
||||||
|
)
|
||||||
```
|
```
|
||||||
# This is the path to your local chromium-compatible browser. We're using Google Chrome in Mac as an example
|
|
||||||
|
2. With Skyvern Service
|
||||||
|
|
||||||
|
Add two variables to your .env file:
|
||||||
|
```bash
|
||||||
|
# The path to your Chrome browser. This example path is for Mac.
|
||||||
CHROME_EXECUTABLE_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
|
CHROME_EXECUTABLE_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
|
||||||
BROWSER_TYPE=cdp-connect
|
BROWSER_TYPE=cdp-connect
|
||||||
```
|
```
|
||||||
|
|
||||||
Secondly, make sure you quit your browser (Skyvern will restart it) and run the task:
|
Restart Skyvern service `skyvern run all` and run the task through UI or code:
|
||||||
```python
|
```python
|
||||||
from skyvern import Skyvern
|
from skyvern import Skyvern
|
||||||
|
|
||||||
skyvern = Skyvern()
|
skyvern = Skyvern(
|
||||||
task = await skyvern.run_task(prompt="Find the top post on hackernews today")
|
base_url="http://localhost:8000",
|
||||||
|
api_key="YOUR_API_KEY",
|
||||||
|
)
|
||||||
|
task = await skyvern.run_task(
|
||||||
|
prompt="Find the top post on hackernews today",
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Run Skyvern with any remote browser
|
||||||
|
Grab the cdp connection url and pass it to Skyvern
|
||||||
|
|
||||||
|
```python
|
||||||
|
from skyvern import Skyvern
|
||||||
|
|
||||||
|
skyvern = Skyvern(cdp_url="your cdp connection url")
|
||||||
|
task = await skyvern.run_task(
|
||||||
|
prompt="Find the top post on hackernews today",
|
||||||
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Get consistent output schema from your run
|
#### Get consistent output schema from your run
|
||||||
@@ -130,7 +167,6 @@ task = await skyvern.run_task(
|
|||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
### Helpful commands to debug issues
|
### Helpful commands to debug issues
|
||||||
|
|
||||||
**Launch the Skyvern Server Separately**
|
**Launch the Skyvern Server Separately**
|
||||||
|
|||||||
@@ -38,9 +38,9 @@ print(task)
|
|||||||
More API & SDK information can be found in the [API Reference](/api-reference) section.
|
More API & SDK information can be found in the [API Reference](/api-reference) section.
|
||||||
|
|
||||||
### Run Task Locally
|
### Run Task Locally
|
||||||
You can also run browser tasks locally in your Python code, though it takes a bit more effort to set up the environment:
|
You can also run browser tasks locally with Python code, with a little bit of set up:
|
||||||
|
|
||||||
1. **Configure Skyvern** Run the setup wizard which will guide you through the configuration process, including Skyvern [MCP](/integrations/mcp) integration. This will generate a `.env` as the configuration settings file.
|
1. **Configure Skyvern** Run the setup wizard which will guide you through the configuration process. This will generate a `.env` as the configuration settings file.
|
||||||
```bash
|
```bash
|
||||||
skyvern init
|
skyvern init
|
||||||
```
|
```
|
||||||
@@ -51,7 +51,9 @@ You can also run browser tasks locally in your Python code, though it takes a bi
|
|||||||
|
|
||||||
skyvern = Skyvern()
|
skyvern = Skyvern()
|
||||||
|
|
||||||
task = await skyvern.run_task(prompt="Find the top post on hackernews today")
|
task = await skyvern.run_task(
|
||||||
|
prompt="Find the top post on hackernews today",
|
||||||
|
)
|
||||||
print(task.model_dump())
|
print(task.model_dump())
|
||||||
```
|
```
|
||||||
A local browser will pop up. Skyvern will start executing the task in the browser and close the browser when the task is done.
|
A local browser will pop up. Skyvern will start executing the task in the browser and close the browser when the task is done.
|
||||||
|
|||||||
@@ -1,9 +1,8 @@
|
|||||||
---
|
---
|
||||||
title: Task Features
|
title: Run Tasks
|
||||||
slug: running-tasks/run-tasks
|
slug: running-tasks/run-tasks
|
||||||
---
|
---
|
||||||
|
|
||||||
## Run A Task
|
|
||||||
- [Quickstart](/getting-started/quickstart) to run a task.
|
- [Quickstart](/getting-started/quickstart) to run a task.
|
||||||
- [Run Task API](/api-reference/api-reference/agent/run-task)
|
- [Run Task API](/api-reference/api-reference/agent/run-task)
|
||||||
|
|
||||||
@@ -13,6 +12,8 @@ Every feature in this page is enabled through API & SDK. Some features are enabl
|
|||||||
<img src="../images/run_tasks/ui_run_task.png" alt="Configure advanced settings in the UI" width="400" />
|
<img src="../images/run_tasks/ui_run_task.png" alt="Configure advanced settings in the UI" width="400" />
|
||||||
</Frame>
|
</Frame>
|
||||||
|
|
||||||
|
## Parameters
|
||||||
|
|
||||||
### [Engine](/api-reference/api-reference/agent/run-task#request.body.engine)
|
### [Engine](/api-reference/api-reference/agent/run-task#request.body.engine)
|
||||||
|
|
||||||
Parameter: `engine`
|
Parameter: `engine`
|
||||||
@@ -75,4 +76,101 @@ Parameter: `browser_session_id`
|
|||||||
|
|
||||||
You can set a browser session for a task. Having a browser session persist the real-time state of the browser, so that the next run can continue from where the previous run left off.
|
You can set a browser session for a task. Having a browser session persist the real-time state of the browser, so that the next run can continue from where the previous run left off.
|
||||||
|
|
||||||
See the [Browser Sessions](/browser-sessions/introduction) section to see how to create a browser session.
|
See the [Browser Sessions](/browser-sessions/introduction) section to see how to create a browser session.
|
||||||
|
|
||||||
|
## Use Cases
|
||||||
|
### Control Your Own Browser (Chrome)
|
||||||
|
<Warning>Since [Chrome 136](https://developer.chrome.com/blog/remote-debugging-port), Chrome refuses any CDP connect to the browser using the default user_data_dir. In order to use your browser data, Skyvern copies your default user_data_dir to `./tmp/user_data_dir` the first time connecting to your local browser.</Warning>
|
||||||
|
|
||||||
|
**Just With Python Code**
|
||||||
|
```python
|
||||||
|
from skyvern import Skyvern
|
||||||
|
|
||||||
|
# The path to your Chrome browser. This example path is for Mac.
|
||||||
|
browser_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
|
||||||
|
skyvern = Skyvern(
|
||||||
|
base_url="http://localhost:8000",
|
||||||
|
api_key="YOUR_API_KEY",
|
||||||
|
browser_path=browser_path,
|
||||||
|
)
|
||||||
|
task = await skyvern.run_task(
|
||||||
|
prompt="Find the top post on hackernews today",
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
**With Skyvern Service**
|
||||||
|
```bash
|
||||||
|
# The path to your Chrome browser. This example path is for Mac.
|
||||||
|
CHROME_EXECUTABLE_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
|
||||||
|
BROWSER_TYPE=cdp-connect
|
||||||
|
```
|
||||||
|
|
||||||
|
Restart your Skyvern service `skyvern run server` and run the task through UI or code:
|
||||||
|
```python
|
||||||
|
from skyvern import Skyvern
|
||||||
|
|
||||||
|
skyvern = Skyvern(
|
||||||
|
base_url="http://localhost:8000",
|
||||||
|
api_key="YOUR_API_KEY",
|
||||||
|
)
|
||||||
|
task = await skyvern.run_task(
|
||||||
|
prompt="Find the top post on hackernews today",
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Get Consistent Output Schema
|
||||||
|
You can do it by adding the `data_extraction_schema` parameter to your task.
|
||||||
|
|
||||||
|
For example, if you want to get the title, URL, and points of the top post on Hacker News today, you can add the following to your task:
|
||||||
|
```python
|
||||||
|
from skyvern import Skyvern
|
||||||
|
|
||||||
|
skyvern = Skyvern()
|
||||||
|
task = await skyvern.run_task(
|
||||||
|
prompt="Find the top post on hackernews today",
|
||||||
|
data_extraction_schema={
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"title": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The title of the top post"
|
||||||
|
},
|
||||||
|
"url": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The URL of the top post"
|
||||||
|
},
|
||||||
|
"points": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Number of points the post has received"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Wait Until Task Is Done
|
||||||
|
When you are sending a run task request the Skyvern service, you can set the `wait_for_completion` to `True` and wait until the task is done.
|
||||||
|
```python
|
||||||
|
from skyvern import Skyvern
|
||||||
|
|
||||||
|
skyvern = Skyvern()
|
||||||
|
task = await skyvern.run_task(
|
||||||
|
prompt="Find the top post on hackernews today",
|
||||||
|
# the request will be hanging until the task is done
|
||||||
|
wait_for_completion=True,
|
||||||
|
)
|
||||||
|
print(task.output)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Send Run Result To Your Webhook
|
||||||
|
Instead of waiting, you can also set the `webhook_url` in the run task request and get the result in your webhook whenever it's done.
|
||||||
|
```python
|
||||||
|
from skyvern import Skyvern
|
||||||
|
|
||||||
|
skyvern = Skyvern()
|
||||||
|
task = await skyvern.run_task(
|
||||||
|
prompt="Find the top post on hackernews today",
|
||||||
|
webhook_url="https://your-webhook-url.com",
|
||||||
|
)
|
||||||
|
```
|
||||||
|
You can also use the [GET RUN API](/api-reference/api-reference/agent/get-run) to get the current status of the task.
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "skyvern"
|
name = "skyvern"
|
||||||
version = "0.1.88"
|
version = "0.1.89"
|
||||||
description = ""
|
description = ""
|
||||||
authors = ["Skyvern AI <info@skyvern.com>"]
|
authors = ["Skyvern AI <info@skyvern.com>"]
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
|||||||
@@ -40,6 +40,7 @@ def update_or_add_env_var(key: str, value: str) -> None:
|
|||||||
"BROWSER_ACTION_TIMEOUT_MS": "5000",
|
"BROWSER_ACTION_TIMEOUT_MS": "5000",
|
||||||
"MAX_STEPS_PER_RUN": "50",
|
"MAX_STEPS_PER_RUN": "50",
|
||||||
"LOG_LEVEL": "INFO",
|
"LOG_LEVEL": "INFO",
|
||||||
|
"LITELLM_LOG": "CRITICAL",
|
||||||
"DATABASE_STRING": "postgresql+psycopg://skyvern@localhost/skyvern",
|
"DATABASE_STRING": "postgresql+psycopg://skyvern@localhost/skyvern",
|
||||||
"PORT": "8000",
|
"PORT": "8000",
|
||||||
"ANALYTICS_ID": "anonymous",
|
"ANALYTICS_ID": "anonymous",
|
||||||
|
|||||||
@@ -2,7 +2,6 @@
|
|||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
|
||||||
|
|
||||||
import typer
|
import typer
|
||||||
from rich.panel import Panel
|
from rich.panel import Panel
|
||||||
@@ -11,6 +10,7 @@ from rich.progress import Progress, SpinnerColumn, TextColumn
|
|||||||
# Import console after skyvern.cli to ensure proper initialization
|
# Import console after skyvern.cli to ensure proper initialization
|
||||||
from skyvern.cli.console import console
|
from skyvern.cli.console import console
|
||||||
from skyvern.cli.init_command import init # init is used directly
|
from skyvern.cli.init_command import init # init is used directly
|
||||||
|
from skyvern.cli.utils import start_services
|
||||||
|
|
||||||
quickstart_app = typer.Typer(help="Quickstart command to set up and run Skyvern with one command.")
|
quickstart_app = typer.Typer(help="Quickstart command to set up and run Skyvern with one command.")
|
||||||
|
|
||||||
@@ -29,40 +29,6 @@ def check_docker() -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
async def start_services(server_only: bool = False) -> None:
|
|
||||||
"""Start Skyvern services in the background.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
server_only: If True, only start the server, not the UI.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Start server in the background
|
|
||||||
server_process = await asyncio.create_subprocess_exec(
|
|
||||||
sys.executable, "-m", "skyvern.cli.commands", "run", "server"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Give server a moment to start
|
|
||||||
await asyncio.sleep(2)
|
|
||||||
|
|
||||||
if not server_only:
|
|
||||||
# Start UI in the background
|
|
||||||
ui_process = await asyncio.create_subprocess_exec(sys.executable, "-m", "skyvern.cli.commands", "run", "ui")
|
|
||||||
|
|
||||||
console.print("\n🎉 [bold green]Skyvern is now running![/bold green]")
|
|
||||||
console.print("🌐 [bold]Access the UI at:[/bold] [cyan]http://localhost:8080[/cyan]")
|
|
||||||
console.print("🔑 [bold]Your API key is in your .env file as SKYVERN_API_KEY[/bold]")
|
|
||||||
|
|
||||||
# Wait for processes to complete (they won't unless killed)
|
|
||||||
if not server_only:
|
|
||||||
await asyncio.gather(server_process.wait(), ui_process.wait())
|
|
||||||
else:
|
|
||||||
await server_process.wait()
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
console.print(f"[bold red]Error starting services: {str(e)}[/bold red]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
|
|
||||||
@quickstart_app.callback(invoke_without_command=True)
|
@quickstart_app.callback(invoke_without_command=True)
|
||||||
def quickstart(
|
def quickstart(
|
||||||
ctx: typer.Context,
|
ctx: typer.Context,
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import asyncio
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
@@ -12,6 +13,7 @@ from mcp.server.fastmcp import FastMCP
|
|||||||
from rich.panel import Panel
|
from rich.panel import Panel
|
||||||
from rich.prompt import Confirm
|
from rich.prompt import Confirm
|
||||||
|
|
||||||
|
from skyvern.cli.utils import start_services
|
||||||
from skyvern.config import settings
|
from skyvern.config import settings
|
||||||
from skyvern.library.skyvern import Skyvern
|
from skyvern.library.skyvern import Skyvern
|
||||||
from skyvern.utils import detect_os
|
from skyvern.utils import detect_os
|
||||||
@@ -153,6 +155,12 @@ def run_ui() -> None:
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
@run_app.command(name="all")
|
||||||
|
def run_all() -> None:
|
||||||
|
"""Run the Skyvern API server and UI server in parallel."""
|
||||||
|
asyncio.run(start_services())
|
||||||
|
|
||||||
|
|
||||||
@run_app.command(name="mcp")
|
@run_app.command(name="mcp")
|
||||||
def run_mcp() -> None:
|
def run_mcp() -> None:
|
||||||
"""Run the MCP server."""
|
"""Run the MCP server."""
|
||||||
|
|||||||
40
skyvern/cli/utils.py
Normal file
40
skyvern/cli/utils.py
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
import asyncio
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from skyvern.cli.console import console
|
||||||
|
|
||||||
|
|
||||||
|
async def start_services(server_only: bool = False) -> None:
|
||||||
|
"""Start Skyvern services in the background.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
server_only: If True, only start the server, not the UI.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Start server in the background
|
||||||
|
server_process = await asyncio.create_subprocess_exec(
|
||||||
|
sys.executable, "-m", "skyvern.cli.commands", "run", "server"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Give server a moment to start
|
||||||
|
await asyncio.sleep(2)
|
||||||
|
|
||||||
|
if not server_only:
|
||||||
|
# Start UI in the background
|
||||||
|
ui_process = await asyncio.create_subprocess_exec(sys.executable, "-m", "skyvern.cli.commands", "run", "ui")
|
||||||
|
|
||||||
|
console.print("\n🎉 [bold green]Skyvern is now running![/bold green]")
|
||||||
|
console.print("🌐 [bold]Access the UI at:[/bold] [cyan]http://localhost:8080[/cyan]")
|
||||||
|
console.print("🔑 [bold]Your API key is in your .env file as SKYVERN_API_KEY[/bold]")
|
||||||
|
|
||||||
|
# Wait for processes to complete (they won't unless killed)
|
||||||
|
if not server_only:
|
||||||
|
await asyncio.gather(server_process.wait(), ui_process.wait())
|
||||||
|
else:
|
||||||
|
await server_process.wait()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
console.print(f"[bold red]Error starting services: {str(e)}[/bold red]")
|
||||||
|
raise typer.Exit(1)
|
||||||
@@ -1,6 +1,5 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import os
|
||||||
import subprocess
|
|
||||||
import typing
|
import typing
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
@@ -66,20 +65,19 @@ class Skyvern(AsyncSkyvern):
|
|||||||
# TODO validate browser_path
|
# TODO validate browser_path
|
||||||
# Supported Browsers: Google Chrome, Brave Browser, Microsoft Edge, Firefox
|
# Supported Browsers: Google Chrome, Brave Browser, Microsoft Edge, Firefox
|
||||||
if "Chrome" in browser_path or "Brave" in browser_path or "Edge" in browser_path:
|
if "Chrome" in browser_path or "Brave" in browser_path or "Edge" in browser_path:
|
||||||
browser_process = subprocess.Popen(
|
|
||||||
[browser_path, "--remote-debugging-port=9222"], stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
|
||||||
)
|
|
||||||
if browser_process.poll() is not None:
|
|
||||||
raise Exception(f"Failed to open browser. browser_path: {browser_path}")
|
|
||||||
|
|
||||||
self._cdp_url = "http://127.0.0.1:9222"
|
self._cdp_url = "http://127.0.0.1:9222"
|
||||||
settings.BROWSER_TYPE = "cdp-connect"
|
settings.BROWSER_TYPE = "cdp-connect"
|
||||||
settings.BROWSER_REMOTE_DEBUGGING_URL = self._cdp_url
|
settings.BROWSER_REMOTE_DEBUGGING_URL = self._cdp_url
|
||||||
|
settings.CHROME_EXECUTABLE_PATH = browser_path
|
||||||
else:
|
else:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Unsupported browser or invalid path: {browser_path}. "
|
f"Unsupported browser or invalid path: {browser_path}. "
|
||||||
"Here's a list of supported browsers Skyvern can connect to: Google Chrome, Brave Browser, Microsoft Edge, Firefox."
|
"Here's a list of supported browsers Skyvern can connect to: Google Chrome, Brave Browser, Microsoft Edge, Firefox."
|
||||||
)
|
)
|
||||||
|
elif cdp_url:
|
||||||
|
self._cdp_url = cdp_url
|
||||||
|
settings.BROWSER_TYPE = "cdp-connect"
|
||||||
|
settings.BROWSER_REMOTE_DEBUGGING_URL = self._cdp_url
|
||||||
elif base_url is None and api_key is None:
|
elif base_url is None and api_key is None:
|
||||||
if not browser_type:
|
if not browser_type:
|
||||||
# if "BROWSER_TYPE" not in os.environ:
|
# if "BROWSER_TYPE" not in os.environ:
|
||||||
|
|||||||
@@ -2,8 +2,11 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import os
|
||||||
|
import pathlib
|
||||||
|
import platform
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
|
import shutil
|
||||||
import socket
|
import socket
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
@@ -447,6 +450,34 @@ async def _create_headful_chromium(
|
|||||||
return browser_context, browser_artifacts, None
|
return browser_context, browser_artifacts, None
|
||||||
|
|
||||||
|
|
||||||
|
def default_user_data_dir() -> pathlib.Path:
|
||||||
|
p = platform.system()
|
||||||
|
if p == "Darwin":
|
||||||
|
return pathlib.Path("~/Library/Application Support/Google/Chrome").expanduser()
|
||||||
|
if p == "Windows":
|
||||||
|
return pathlib.Path(os.environ["LOCALAPPDATA"]) / "Google" / "Chrome" / "User Data"
|
||||||
|
# Assume Linux/Unix
|
||||||
|
return pathlib.Path("~/.config/google-chrome").expanduser()
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_chromium_user_data_dir(directory: str) -> bool:
|
||||||
|
"""Check if a directory is a valid Chromium user data directory.
|
||||||
|
|
||||||
|
A valid Chromium user data directory should:
|
||||||
|
1. Exist
|
||||||
|
2. Not be empty
|
||||||
|
3. Contain a 'Default' directory
|
||||||
|
4. Have a 'Preferences' file in the 'Default' directory
|
||||||
|
"""
|
||||||
|
if not os.path.exists(directory):
|
||||||
|
return False
|
||||||
|
|
||||||
|
default_dir = os.path.join(directory, "Default")
|
||||||
|
preferences_file = os.path.join(default_dir, "Preferences")
|
||||||
|
|
||||||
|
return os.path.isdir(directory) and os.path.isdir(default_dir) and os.path.isfile(preferences_file)
|
||||||
|
|
||||||
|
|
||||||
async def _create_cdp_connection_browser(
|
async def _create_cdp_connection_browser(
|
||||||
playwright: Playwright, proxy_location: ProxyLocation | None = None, **kwargs: dict
|
playwright: Playwright, proxy_location: ProxyLocation | None = None, **kwargs: dict
|
||||||
) -> tuple[BrowserContext, BrowserArtifacts, BrowserCleanupFunc]:
|
) -> tuple[BrowserContext, BrowserArtifacts, BrowserCleanupFunc]:
|
||||||
@@ -454,31 +485,48 @@ async def _create_cdp_connection_browser(
|
|||||||
browser_path = settings.CHROME_EXECUTABLE_PATH
|
browser_path = settings.CHROME_EXECUTABLE_PATH
|
||||||
|
|
||||||
if browser_type == "cdp-connect" and browser_path:
|
if browser_type == "cdp-connect" and browser_path:
|
||||||
# First check if Chrome is already running
|
LOG.info("Local browser path is given. Connecting to local browser with CDP", browser_path=browser_path)
|
||||||
if _is_chrome_running():
|
# First check if the debugging port is running and can be used
|
||||||
raise Exception(
|
if not _is_port_in_use(9222):
|
||||||
"Chrome is already running. Please close all Chrome instances before starting with remote debugging."
|
LOG.info("Port 9222 is not in use, starting Chrome", browser_path=browser_path)
|
||||||
|
# Check if Chrome is already running
|
||||||
|
if _is_chrome_running():
|
||||||
|
raise Exception(
|
||||||
|
"Chrome is already running. Please close all Chrome instances before starting with remote debugging."
|
||||||
|
)
|
||||||
|
# check if ./tmp/user_data_dir exists and if it's a valid Chromium user data directory
|
||||||
|
try:
|
||||||
|
if os.path.exists("./tmp/user_data_dir") and not is_valid_chromium_user_data_dir("./tmp/user_data_dir"):
|
||||||
|
LOG.info("Removing invalid user data directory")
|
||||||
|
shutil.rmtree("./tmp/user_data_dir")
|
||||||
|
shutil.copytree(default_user_data_dir(), "./tmp/user_data_dir")
|
||||||
|
elif not os.path.exists("./tmp/user_data_dir"):
|
||||||
|
LOG.info("Copying default user data directory")
|
||||||
|
shutil.copytree(default_user_data_dir(), "./tmp/user_data_dir")
|
||||||
|
else:
|
||||||
|
LOG.info("User data directory is valid")
|
||||||
|
except FileExistsError:
|
||||||
|
# If directory exists, remove it first then copy
|
||||||
|
shutil.rmtree("./tmp/user_data_dir")
|
||||||
|
shutil.copytree(default_user_data_dir(), "./tmp/user_data_dir")
|
||||||
|
browser_process = subprocess.Popen(
|
||||||
|
[
|
||||||
|
browser_path,
|
||||||
|
"--remote-debugging-port=9222",
|
||||||
|
"--no-first-run",
|
||||||
|
"--no-default-browser-check",
|
||||||
|
"--remote-debugging-address=0.0.0.0",
|
||||||
|
"--user-data-dir=./tmp/user_data_dir",
|
||||||
|
],
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
)
|
)
|
||||||
|
# Add small delay to allow browser to start
|
||||||
# Then check if the debugging port is already in use
|
time.sleep(1)
|
||||||
if _is_port_in_use(9222):
|
if browser_process.poll() is not None:
|
||||||
raise Exception("Port 9222 is already in use. Another process may be using this port.")
|
raise Exception(f"Failed to open browser. browser_path: {browser_path}")
|
||||||
|
else:
|
||||||
browser_process = subprocess.Popen(
|
LOG.info("Port 9222 is in use, using existing browser")
|
||||||
[
|
|
||||||
browser_path,
|
|
||||||
"--remote-debugging-port=9222",
|
|
||||||
"--no-first-run",
|
|
||||||
"--no-default-browser-check",
|
|
||||||
"--remote-debugging-address=0.0.0.0",
|
|
||||||
],
|
|
||||||
stdout=subprocess.PIPE,
|
|
||||||
stderr=subprocess.PIPE,
|
|
||||||
)
|
|
||||||
# Add small delay to allow browser to start
|
|
||||||
time.sleep(2)
|
|
||||||
if browser_process.poll() is not None:
|
|
||||||
raise Exception(f"Failed to open browser. browser_path: {browser_path}")
|
|
||||||
|
|
||||||
browser_args = BrowserContextFactory.build_browser_args()
|
browser_args = BrowserContextFactory.build_browser_args()
|
||||||
|
|
||||||
|
|||||||
@@ -545,7 +545,8 @@ async def scrape_web_unsafe(
|
|||||||
try:
|
try:
|
||||||
skyvern_frame = await SkyvernFrame.create_instance(frame=page)
|
skyvern_frame = await SkyvernFrame.create_instance(frame=page)
|
||||||
html = await skyvern_frame.get_content()
|
html = await skyvern_frame.get_content()
|
||||||
window_dimension = Resolution(width=page.viewport_size["width"], height=page.viewport_size["height"])
|
if page.viewport_size:
|
||||||
|
window_dimension = Resolution(width=page.viewport_size["width"], height=page.viewport_size["height"])
|
||||||
except Exception:
|
except Exception:
|
||||||
LOG.error(
|
LOG.error(
|
||||||
"Failed out to get HTML content",
|
"Failed out to get HTML content",
|
||||||
|
|||||||
Reference in New Issue
Block a user