Added Ollama & Openrouter & Groq & improved cdp browser (#2283)
This commit is contained in:
committed by
GitHub
parent
0540e65d06
commit
c3072d7572
@@ -24,6 +24,7 @@ services:
|
|||||||
# comment out if you want to externally call skyvern API
|
# comment out if you want to externally call skyvern API
|
||||||
ports:
|
ports:
|
||||||
- 8000:8000
|
- 8000:8000
|
||||||
|
- 9222:9222 # for cdp browser forwarding
|
||||||
volumes:
|
volumes:
|
||||||
- ./artifacts:/data/artifacts
|
- ./artifacts:/data/artifacts
|
||||||
- ./videos:/data/videos
|
- ./videos:/data/videos
|
||||||
@@ -36,9 +37,26 @@ services:
|
|||||||
environment:
|
environment:
|
||||||
- DATABASE_STRING=postgresql+psycopg://skyvern:skyvern@postgres:5432/skyvern
|
- DATABASE_STRING=postgresql+psycopg://skyvern:skyvern@postgres:5432/skyvern
|
||||||
- BROWSER_TYPE=chromium-headful
|
- BROWSER_TYPE=chromium-headful
|
||||||
- ENABLE_OPENAI=true
|
# - BROWSER_TYPE=cdp-connect
|
||||||
- LLM_KEY=OPENAI_GPT4O
|
# Use this command to start Chrome with remote debugging:
|
||||||
- OPENAI_API_KEY=<your_openai_key>
|
# "C:\Program Files\Google\Chrome\Application\chrome.exe" --remote-debugging-port=9222 --user-data-dir="C:\chrome-cdp-profile" --no-first-run --no-default-browser-check
|
||||||
|
# /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222 --user-data-dir="/Users/yourusername/chrome-cdp-profile" --no-first-run --no-default-browser-check
|
||||||
|
# - BROWSER_REMOTE_DEBUGGING_URL=http://host.docker.internal:9222/
|
||||||
|
|
||||||
|
# =========================
|
||||||
|
# LLM Settings
|
||||||
|
# =========================
|
||||||
|
# OpenAI Support:
|
||||||
|
# If you want to use OpenAI as your LLM provider, uncomment the following lines and fill in your OpenAI API key.
|
||||||
|
# - ENABLE_OPENAI=true
|
||||||
|
# - LLM_KEY=OPENAI_GPT4O
|
||||||
|
# - OPENAI_API_KEY=<your_openai_key>
|
||||||
|
# Gemini Support:
|
||||||
|
# Gemini is a new LLM provider that is currently in beta. You can use it by uncommenting the following lines and filling in your Gemini API key.
|
||||||
|
- LLM_KEY=GEMINI
|
||||||
|
- ENABLE_GEMINI=true
|
||||||
|
- GEMINI_API_KEY=YOUR_GEMINI_KEY
|
||||||
|
- LLM_KEY=GEMINI_2.5_PRO_PREVIEW_03_25
|
||||||
# If you want to use other LLM provider, like azure and anthropic:
|
# If you want to use other LLM provider, like azure and anthropic:
|
||||||
# - ENABLE_ANTHROPIC=true
|
# - ENABLE_ANTHROPIC=true
|
||||||
# - LLM_KEY=ANTHROPIC_CLAUDE3.5_SONNET
|
# - LLM_KEY=ANTHROPIC_CLAUDE3.5_SONNET
|
||||||
@@ -72,7 +90,26 @@ services:
|
|||||||
# - AWS_REGION=us-west-2 # Replace this with a different AWS region, if you desire
|
# - AWS_REGION=us-west-2 # Replace this with a different AWS region, if you desire
|
||||||
# - AWS_ACCESS_KEY_ID=FILL_ME_IN_PLEASE
|
# - AWS_ACCESS_KEY_ID=FILL_ME_IN_PLEASE
|
||||||
# - AWS_SECRET_ACCESS_KEY=FILL_ME_IN_PLEASE
|
# - AWS_SECRET_ACCESS_KEY=FILL_ME_IN_PLEASE
|
||||||
#
|
# Ollama Support:
|
||||||
|
# Ollama is a local LLM provider that can be used to run models locally on your machine.
|
||||||
|
# - LLM_KEY=OLLAMA
|
||||||
|
# - ENABLE_OLLAMA=true
|
||||||
|
# - OLLAMA_MODEL=qwen2.5:7b-instruct
|
||||||
|
# - OLLAMA_SERVER_URL=http://host.docker.internal:11434
|
||||||
|
# Open Router Support:
|
||||||
|
# - ENABLE_OPENROUTER=true
|
||||||
|
# - LLM_KEY=OPENROUTER
|
||||||
|
# - OPENROUTER_API_KEY=<your_openrouter_api_key>
|
||||||
|
# - OPENROUTER_MODEL=mistralai/mistral-small-3.1-24b-instruct
|
||||||
|
# Groq Support:
|
||||||
|
# - ENABLE_GROQ=true
|
||||||
|
# - LLM_KEY=GROQ
|
||||||
|
# - GROQ_API_KEY=<your_groq_api_key>
|
||||||
|
# - GROQ_MODEL=llama-3.1-8b-instant
|
||||||
|
|
||||||
|
# Maximum tokens to use: (only set for OpenRouter aand Ollama)
|
||||||
|
# - LLM_CONFIG_MAX_TOKENS=128000
|
||||||
|
|
||||||
# Bitwarden Settings
|
# Bitwarden Settings
|
||||||
# If you are looking to integrate Skyvern with a password manager (eg Bitwarden), you can use the following environment variables.
|
# If you are looking to integrate Skyvern with a password manager (eg Bitwarden), you can use the following environment variables.
|
||||||
# - BITWARDEN_SERVER=http://localhost # OPTIONAL IF YOU ARE SELF HOSTING BITWARDEN
|
# - BITWARDEN_SERVER=http://localhost # OPTIONAL IF YOU ARE SELF HOSTING BITWARDEN
|
||||||
@@ -80,7 +117,7 @@ services:
|
|||||||
# - BITWARDEN_CLIENT_ID=FILL_ME_IN_PLEASE
|
# - BITWARDEN_CLIENT_ID=FILL_ME_IN_PLEASE
|
||||||
# - BITWARDEN_CLIENT_SECRET=FILL_ME_IN_PLEASE
|
# - BITWARDEN_CLIENT_SECRET=FILL_ME_IN_PLEASE
|
||||||
# - BITWARDEN_MASTER_PASSWORD=FILL_ME_IN_PLEASE
|
# - BITWARDEN_MASTER_PASSWORD=FILL_ME_IN_PLEASE
|
||||||
|
|
||||||
depends_on:
|
depends_on:
|
||||||
postgres:
|
postgres:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
|||||||
@@ -7,6 +7,8 @@ import time
|
|||||||
import uuid
|
import uuid
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
import requests
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import typer
|
import typer
|
||||||
import uvicorn
|
import uvicorn
|
||||||
@@ -472,6 +474,82 @@ def setup_browser_config() -> tuple[str, Optional[str], Optional[str]]:
|
|||||||
print("\nTo use CDP connection, Chrome must be running with remote debugging enabled.")
|
print("\nTo use CDP connection, Chrome must be running with remote debugging enabled.")
|
||||||
print("Example: chrome --remote-debugging-port=9222")
|
print("Example: chrome --remote-debugging-port=9222")
|
||||||
print("Default debugging URL: http://localhost:9222")
|
print("Default debugging URL: http://localhost:9222")
|
||||||
|
|
||||||
|
default_port = "9222"
|
||||||
|
if remote_debugging_url is None:
|
||||||
|
remote_debugging_url = "http://localhost:9222"
|
||||||
|
elif ":" in remote_debugging_url.split("/")[-1]:
|
||||||
|
default_port = remote_debugging_url.split(":")[-1].split("/")[0]
|
||||||
|
|
||||||
|
parsed_url = urlparse(remote_debugging_url)
|
||||||
|
version_url = f"{parsed_url.scheme}://{parsed_url.netloc}/json/version"
|
||||||
|
|
||||||
|
print(f"\nChecking if Chrome is already running with remote debugging on port {default_port}...")
|
||||||
|
try:
|
||||||
|
response = requests.get(version_url, timeout=2)
|
||||||
|
if response.status_code == 200:
|
||||||
|
try:
|
||||||
|
browser_info = response.json()
|
||||||
|
print(f"Chrome is already running with remote debugging!")
|
||||||
|
if "Browser" in browser_info:
|
||||||
|
print(f"Browser: {browser_info['Browser']}")
|
||||||
|
if "webSocketDebuggerUrl" in browser_info:
|
||||||
|
print(f"WebSocket URL: {browser_info['webSocketDebuggerUrl']}")
|
||||||
|
print(f"Connected to {remote_debugging_url}")
|
||||||
|
return selected_browser, browser_location, remote_debugging_url
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
print("Port is in use, but doesn't appear to be Chrome with remote debugging.")
|
||||||
|
except requests.RequestException:
|
||||||
|
print(f"No Chrome instance detected on {remote_debugging_url}")
|
||||||
|
|
||||||
|
print("\nExecuting Chrome with remote debugging enabled:")
|
||||||
|
|
||||||
|
if host_system == "darwin" or host_system == "linux":
|
||||||
|
chrome_cmd = f'{browser_location} --remote-debugging-port={default_port} --user-data-dir="$HOME/chrome-cdp-profile" --no-first-run --no-default-browser-check'
|
||||||
|
print(f" {chrome_cmd}")
|
||||||
|
elif host_system == "windows" or host_system == "wsl":
|
||||||
|
chrome_cmd = f'"{browser_location}" --remote-debugging-port={default_port} --user-data-dir="C:\\chrome-cdp-profile" --no-first-run --no-default-browser-check'
|
||||||
|
print(f" {chrome_cmd}")
|
||||||
|
else:
|
||||||
|
print("Unsupported OS for Chrome configuration. Please set it up manually.")
|
||||||
|
|
||||||
|
# Ask user if they want to execute the command
|
||||||
|
execute_browser = input("\nWould you like to start Chrome with remote debugging now? (y/n) [y]: ").strip().lower()
|
||||||
|
if not execute_browser or execute_browser == "y":
|
||||||
|
print(f"Starting Chrome with remote debugging on port {default_port}...")
|
||||||
|
try:
|
||||||
|
# Execute in background - different approach per OS
|
||||||
|
if host_system in ["darwin", "linux"]:
|
||||||
|
subprocess.Popen(f"nohup {chrome_cmd} > /dev/null 2>&1 &", shell=True)
|
||||||
|
elif host_system == "windows":
|
||||||
|
subprocess.Popen(f"start {chrome_cmd}", shell=True)
|
||||||
|
elif host_system == "wsl":
|
||||||
|
subprocess.Popen(f"cmd.exe /c start {chrome_cmd}", shell=True)
|
||||||
|
|
||||||
|
print(f"Chrome started successfully. Connecting to {remote_debugging_url}")
|
||||||
|
|
||||||
|
print("Waiting for Chrome to initialize...")
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
try:
|
||||||
|
verification_response = requests.get(version_url, timeout=5)
|
||||||
|
if verification_response.status_code == 200:
|
||||||
|
try:
|
||||||
|
browser_info = verification_response.json()
|
||||||
|
print("Connection verified! Chrome is running with remote debugging.")
|
||||||
|
if "Browser" in browser_info:
|
||||||
|
print(f"Browser: {browser_info['Browser']}")
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
print("Warning: Response from Chrome debugging port is not valid JSON.")
|
||||||
|
else:
|
||||||
|
print(f"Warning: Chrome responded with status code {verification_response.status_code}")
|
||||||
|
except requests.RequestException as e:
|
||||||
|
print(f"Warning: Could not verify Chrome is running properly: {e}")
|
||||||
|
print("You may need to check Chrome manually or try a different port.")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error starting Chrome: {e}")
|
||||||
|
print("Please start Chrome manually using the command above.")
|
||||||
|
|
||||||
remote_debugging_url = input("Enter remote debugging URL (press Enter for default): ").strip()
|
remote_debugging_url = input("Enter remote debugging URL (press Enter for default): ").strip()
|
||||||
if not remote_debugging_url:
|
if not remote_debugging_url:
|
||||||
remote_debugging_url = "http://localhost:9222"
|
remote_debugging_url = "http://localhost:9222"
|
||||||
|
|||||||
@@ -214,6 +214,23 @@ class Settings(BaseSettings):
|
|||||||
NOVITA_API_KEY: str | None = None
|
NOVITA_API_KEY: str | None = None
|
||||||
NOVITA_API_VERSION: str = "v3"
|
NOVITA_API_VERSION: str = "v3"
|
||||||
|
|
||||||
|
# OLLAMA
|
||||||
|
ENABLE_OLLAMA: bool = False
|
||||||
|
OLLAMA_SERVER_URL: str | None = None
|
||||||
|
OLLAMA_MODEL: str | None = None
|
||||||
|
|
||||||
|
# OPENROUTER
|
||||||
|
ENABLE_OPENROUTER: bool = False
|
||||||
|
OPENROUTER_API_KEY: str | None = None
|
||||||
|
OPENROUTER_MODEL: str | None = None
|
||||||
|
OPENROUTER_API_BASE: str = "https://api.openrouter.ai/v1"
|
||||||
|
|
||||||
|
# GROQ
|
||||||
|
ENABLE_GROQ: bool = False
|
||||||
|
GROQ_API_KEY: str | None = None
|
||||||
|
GROQ_MODEL: str | None = None
|
||||||
|
GROQ_API_BASE: str = "https://api.groq.com/openai/v1"
|
||||||
|
|
||||||
# TOTP Settings
|
# TOTP Settings
|
||||||
TOTP_LIFESPAN_MINUTES: int = 10
|
TOTP_LIFESPAN_MINUTES: int = 10
|
||||||
VERIFICATION_CODE_INITIAL_WAIT_TIME_SECS: int = 40
|
VERIFICATION_CODE_INITIAL_WAIT_TIME_SECS: int = 40
|
||||||
|
|||||||
@@ -804,7 +804,67 @@ if settings.ENABLE_VERTEX_AI:
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if settings.ENABLE_OLLAMA:
|
||||||
|
# Register Ollama model configured in settings
|
||||||
|
if settings.OLLAMA_MODEL:
|
||||||
|
model_name = settings.OLLAMA_MODEL
|
||||||
|
LLMConfigRegistry.register_config(
|
||||||
|
"OLLAMA",
|
||||||
|
LLMConfig(
|
||||||
|
f"ollama/{model_name}",
|
||||||
|
["OLLAMA_SERVER_URL", "OLLAMA_MODEL"],
|
||||||
|
supports_vision=False, # Ollama does not support vision yet
|
||||||
|
add_assistant_prefix=False,
|
||||||
|
max_completion_tokens=settings.LLM_CONFIG_MAX_TOKENS,
|
||||||
|
litellm_params=LiteLLMParams(
|
||||||
|
api_base=settings.OLLAMA_SERVER_URL,
|
||||||
|
api_key=None,
|
||||||
|
api_version=None,
|
||||||
|
model_info={"model_name": f"ollama/{model_name}"},
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
if settings.ENABLE_OPENROUTER:
|
||||||
|
# Register OpenRouter model configured in settings
|
||||||
|
if settings.OPENROUTER_MODEL:
|
||||||
|
model_name = settings.OPENROUTER_MODEL
|
||||||
|
LLMConfigRegistry.register_config(
|
||||||
|
"OPENROUTER",
|
||||||
|
LLMConfig(
|
||||||
|
f"openrouter/{model_name}",
|
||||||
|
["OPENROUTER_API_KEY", "OPENROUTER_MODEL"],
|
||||||
|
supports_vision=settings.LLM_CONFIG_SUPPORT_VISION,
|
||||||
|
add_assistant_prefix=False,
|
||||||
|
max_completion_tokens=settings.LLM_CONFIG_MAX_TOKENS,
|
||||||
|
litellm_params=LiteLLMParams(
|
||||||
|
api_key=settings.OPENROUTER_API_KEY,
|
||||||
|
api_base=settings.OPENROUTER_API_BASE,
|
||||||
|
api_version=None,
|
||||||
|
model_info={"model_name": f"openrouter/{model_name}"},
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
if settings.ENABLE_GROQ:
|
||||||
|
# Register Groq model configured in settings
|
||||||
|
if settings.GROQ_MODEL:
|
||||||
|
model_name = settings.GROQ_MODEL
|
||||||
|
LLMConfigRegistry.register_config(
|
||||||
|
"GROQ",
|
||||||
|
LLMConfig(
|
||||||
|
f"groq/{model_name}",
|
||||||
|
["GROQ_API_KEY", "GROQ_MODEL"],
|
||||||
|
supports_vision=settings.LLM_CONFIG_SUPPORT_VISION,
|
||||||
|
add_assistant_prefix=False,
|
||||||
|
max_completion_tokens=settings.LLM_CONFIG_MAX_TOKENS,
|
||||||
|
litellm_params=LiteLLMParams(
|
||||||
|
api_key=settings.GROQ_API_KEY,
|
||||||
|
api_version=None,
|
||||||
|
api_base=settings.GROQ_API_BASE,
|
||||||
|
model_info={"model_name": f"groq/{model_name}"},
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
# Add support for dynamically configuring OpenAI-compatible LLM models
|
# Add support for dynamically configuring OpenAI-compatible LLM models
|
||||||
# Based on liteLLM's support for OpenAI-compatible APIs
|
# Based on liteLLM's support for OpenAI-compatible APIs
|
||||||
# See documentation: https://docs.litellm.ai/docs/providers/openai_compatible
|
# See documentation: https://docs.litellm.ai/docs/providers/openai_compatible
|
||||||
|
|||||||
@@ -465,10 +465,18 @@ async def _create_cdp_connection_browser(
|
|||||||
raise Exception("Port 9222 is already in use. Another process may be using this port.")
|
raise Exception("Port 9222 is already in use. Another process may be using this port.")
|
||||||
|
|
||||||
browser_process = subprocess.Popen(
|
browser_process = subprocess.Popen(
|
||||||
[browser_path, "--remote-debugging-port=9222"], stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
[
|
||||||
|
browser_path,
|
||||||
|
"--remote-debugging-port=9222",
|
||||||
|
"--no-first-run",
|
||||||
|
"--no-default-browser-check",
|
||||||
|
"--remote-debugging-address=0.0.0.0",
|
||||||
|
],
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE
|
||||||
)
|
)
|
||||||
# Add small delay to allow browser to start
|
# Add small delay to allow browser to start
|
||||||
time.sleep(1)
|
time.sleep(2)
|
||||||
if browser_process.poll() is not None:
|
if browser_process.poll() is not None:
|
||||||
raise Exception(f"Failed to open browser. browser_path: {browser_path}")
|
raise Exception(f"Failed to open browser. browser_path: {browser_path}")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user