Added Ollama & Openrouter & Groq & improved cdp browser (#2283)
This commit is contained in:
committed by
GitHub
parent
0540e65d06
commit
c3072d7572
@@ -24,6 +24,7 @@ services:
|
||||
# comment out if you want to externally call skyvern API
|
||||
ports:
|
||||
- 8000:8000
|
||||
- 9222:9222 # for cdp browser forwarding
|
||||
volumes:
|
||||
- ./artifacts:/data/artifacts
|
||||
- ./videos:/data/videos
|
||||
@@ -36,9 +37,26 @@ services:
|
||||
environment:
|
||||
- DATABASE_STRING=postgresql+psycopg://skyvern:skyvern@postgres:5432/skyvern
|
||||
- BROWSER_TYPE=chromium-headful
|
||||
- ENABLE_OPENAI=true
|
||||
- LLM_KEY=OPENAI_GPT4O
|
||||
- OPENAI_API_KEY=<your_openai_key>
|
||||
# - BROWSER_TYPE=cdp-connect
|
||||
# Use this command to start Chrome with remote debugging:
|
||||
# "C:\Program Files\Google\Chrome\Application\chrome.exe" --remote-debugging-port=9222 --user-data-dir="C:\chrome-cdp-profile" --no-first-run --no-default-browser-check
|
||||
# /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222 --user-data-dir="/Users/yourusername/chrome-cdp-profile" --no-first-run --no-default-browser-check
|
||||
# - BROWSER_REMOTE_DEBUGGING_URL=http://host.docker.internal:9222/
|
||||
|
||||
# =========================
|
||||
# LLM Settings
|
||||
# =========================
|
||||
# OpenAI Support:
|
||||
# If you want to use OpenAI as your LLM provider, uncomment the following lines and fill in your OpenAI API key.
|
||||
# - ENABLE_OPENAI=true
|
||||
# - LLM_KEY=OPENAI_GPT4O
|
||||
# - OPENAI_API_KEY=<your_openai_key>
|
||||
# Gemini Support:
|
||||
# Gemini is a new LLM provider that is currently in beta. You can use it by uncommenting the following lines and filling in your Gemini API key.
|
||||
- LLM_KEY=GEMINI
|
||||
- ENABLE_GEMINI=true
|
||||
- GEMINI_API_KEY=YOUR_GEMINI_KEY
|
||||
- LLM_KEY=GEMINI_2.5_PRO_PREVIEW_03_25
|
||||
# If you want to use other LLM provider, like azure and anthropic:
|
||||
# - ENABLE_ANTHROPIC=true
|
||||
# - LLM_KEY=ANTHROPIC_CLAUDE3.5_SONNET
|
||||
@@ -72,7 +90,26 @@ services:
|
||||
# - AWS_REGION=us-west-2 # Replace this with a different AWS region, if you desire
|
||||
# - AWS_ACCESS_KEY_ID=FILL_ME_IN_PLEASE
|
||||
# - AWS_SECRET_ACCESS_KEY=FILL_ME_IN_PLEASE
|
||||
#
|
||||
# Ollama Support:
|
||||
# Ollama is a local LLM provider that can be used to run models locally on your machine.
|
||||
# - LLM_KEY=OLLAMA
|
||||
# - ENABLE_OLLAMA=true
|
||||
# - OLLAMA_MODEL=qwen2.5:7b-instruct
|
||||
# - OLLAMA_SERVER_URL=http://host.docker.internal:11434
|
||||
# Open Router Support:
|
||||
# - ENABLE_OPENROUTER=true
|
||||
# - LLM_KEY=OPENROUTER
|
||||
# - OPENROUTER_API_KEY=<your_openrouter_api_key>
|
||||
# - OPENROUTER_MODEL=mistralai/mistral-small-3.1-24b-instruct
|
||||
# Groq Support:
|
||||
# - ENABLE_GROQ=true
|
||||
# - LLM_KEY=GROQ
|
||||
# - GROQ_API_KEY=<your_groq_api_key>
|
||||
# - GROQ_MODEL=llama-3.1-8b-instant
|
||||
|
||||
# Maximum tokens to use: (only set for OpenRouter aand Ollama)
|
||||
# - LLM_CONFIG_MAX_TOKENS=128000
|
||||
|
||||
# Bitwarden Settings
|
||||
# If you are looking to integrate Skyvern with a password manager (eg Bitwarden), you can use the following environment variables.
|
||||
# - BITWARDEN_SERVER=http://localhost # OPTIONAL IF YOU ARE SELF HOSTING BITWARDEN
|
||||
@@ -80,7 +117,7 @@ services:
|
||||
# - BITWARDEN_CLIENT_ID=FILL_ME_IN_PLEASE
|
||||
# - BITWARDEN_CLIENT_SECRET=FILL_ME_IN_PLEASE
|
||||
# - BITWARDEN_MASTER_PASSWORD=FILL_ME_IN_PLEASE
|
||||
|
||||
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
|
||||
@@ -7,6 +7,8 @@ import time
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
import requests
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import typer
|
||||
import uvicorn
|
||||
@@ -472,6 +474,82 @@ def setup_browser_config() -> tuple[str, Optional[str], Optional[str]]:
|
||||
print("\nTo use CDP connection, Chrome must be running with remote debugging enabled.")
|
||||
print("Example: chrome --remote-debugging-port=9222")
|
||||
print("Default debugging URL: http://localhost:9222")
|
||||
|
||||
default_port = "9222"
|
||||
if remote_debugging_url is None:
|
||||
remote_debugging_url = "http://localhost:9222"
|
||||
elif ":" in remote_debugging_url.split("/")[-1]:
|
||||
default_port = remote_debugging_url.split(":")[-1].split("/")[0]
|
||||
|
||||
parsed_url = urlparse(remote_debugging_url)
|
||||
version_url = f"{parsed_url.scheme}://{parsed_url.netloc}/json/version"
|
||||
|
||||
print(f"\nChecking if Chrome is already running with remote debugging on port {default_port}...")
|
||||
try:
|
||||
response = requests.get(version_url, timeout=2)
|
||||
if response.status_code == 200:
|
||||
try:
|
||||
browser_info = response.json()
|
||||
print(f"Chrome is already running with remote debugging!")
|
||||
if "Browser" in browser_info:
|
||||
print(f"Browser: {browser_info['Browser']}")
|
||||
if "webSocketDebuggerUrl" in browser_info:
|
||||
print(f"WebSocket URL: {browser_info['webSocketDebuggerUrl']}")
|
||||
print(f"Connected to {remote_debugging_url}")
|
||||
return selected_browser, browser_location, remote_debugging_url
|
||||
except json.JSONDecodeError:
|
||||
print("Port is in use, but doesn't appear to be Chrome with remote debugging.")
|
||||
except requests.RequestException:
|
||||
print(f"No Chrome instance detected on {remote_debugging_url}")
|
||||
|
||||
print("\nExecuting Chrome with remote debugging enabled:")
|
||||
|
||||
if host_system == "darwin" or host_system == "linux":
|
||||
chrome_cmd = f'{browser_location} --remote-debugging-port={default_port} --user-data-dir="$HOME/chrome-cdp-profile" --no-first-run --no-default-browser-check'
|
||||
print(f" {chrome_cmd}")
|
||||
elif host_system == "windows" or host_system == "wsl":
|
||||
chrome_cmd = f'"{browser_location}" --remote-debugging-port={default_port} --user-data-dir="C:\\chrome-cdp-profile" --no-first-run --no-default-browser-check'
|
||||
print(f" {chrome_cmd}")
|
||||
else:
|
||||
print("Unsupported OS for Chrome configuration. Please set it up manually.")
|
||||
|
||||
# Ask user if they want to execute the command
|
||||
execute_browser = input("\nWould you like to start Chrome with remote debugging now? (y/n) [y]: ").strip().lower()
|
||||
if not execute_browser or execute_browser == "y":
|
||||
print(f"Starting Chrome with remote debugging on port {default_port}...")
|
||||
try:
|
||||
# Execute in background - different approach per OS
|
||||
if host_system in ["darwin", "linux"]:
|
||||
subprocess.Popen(f"nohup {chrome_cmd} > /dev/null 2>&1 &", shell=True)
|
||||
elif host_system == "windows":
|
||||
subprocess.Popen(f"start {chrome_cmd}", shell=True)
|
||||
elif host_system == "wsl":
|
||||
subprocess.Popen(f"cmd.exe /c start {chrome_cmd}", shell=True)
|
||||
|
||||
print(f"Chrome started successfully. Connecting to {remote_debugging_url}")
|
||||
|
||||
print("Waiting for Chrome to initialize...")
|
||||
time.sleep(2)
|
||||
|
||||
try:
|
||||
verification_response = requests.get(version_url, timeout=5)
|
||||
if verification_response.status_code == 200:
|
||||
try:
|
||||
browser_info = verification_response.json()
|
||||
print("Connection verified! Chrome is running with remote debugging.")
|
||||
if "Browser" in browser_info:
|
||||
print(f"Browser: {browser_info['Browser']}")
|
||||
except json.JSONDecodeError:
|
||||
print("Warning: Response from Chrome debugging port is not valid JSON.")
|
||||
else:
|
||||
print(f"Warning: Chrome responded with status code {verification_response.status_code}")
|
||||
except requests.RequestException as e:
|
||||
print(f"Warning: Could not verify Chrome is running properly: {e}")
|
||||
print("You may need to check Chrome manually or try a different port.")
|
||||
except Exception as e:
|
||||
print(f"Error starting Chrome: {e}")
|
||||
print("Please start Chrome manually using the command above.")
|
||||
|
||||
remote_debugging_url = input("Enter remote debugging URL (press Enter for default): ").strip()
|
||||
if not remote_debugging_url:
|
||||
remote_debugging_url = "http://localhost:9222"
|
||||
|
||||
@@ -214,6 +214,23 @@ class Settings(BaseSettings):
|
||||
NOVITA_API_KEY: str | None = None
|
||||
NOVITA_API_VERSION: str = "v3"
|
||||
|
||||
# OLLAMA
|
||||
ENABLE_OLLAMA: bool = False
|
||||
OLLAMA_SERVER_URL: str | None = None
|
||||
OLLAMA_MODEL: str | None = None
|
||||
|
||||
# OPENROUTER
|
||||
ENABLE_OPENROUTER: bool = False
|
||||
OPENROUTER_API_KEY: str | None = None
|
||||
OPENROUTER_MODEL: str | None = None
|
||||
OPENROUTER_API_BASE: str = "https://api.openrouter.ai/v1"
|
||||
|
||||
# GROQ
|
||||
ENABLE_GROQ: bool = False
|
||||
GROQ_API_KEY: str | None = None
|
||||
GROQ_MODEL: str | None = None
|
||||
GROQ_API_BASE: str = "https://api.groq.com/openai/v1"
|
||||
|
||||
# TOTP Settings
|
||||
TOTP_LIFESPAN_MINUTES: int = 10
|
||||
VERIFICATION_CODE_INITIAL_WAIT_TIME_SECS: int = 40
|
||||
|
||||
@@ -804,7 +804,67 @@ if settings.ENABLE_VERTEX_AI:
|
||||
),
|
||||
)
|
||||
|
||||
if settings.ENABLE_OLLAMA:
|
||||
# Register Ollama model configured in settings
|
||||
if settings.OLLAMA_MODEL:
|
||||
model_name = settings.OLLAMA_MODEL
|
||||
LLMConfigRegistry.register_config(
|
||||
"OLLAMA",
|
||||
LLMConfig(
|
||||
f"ollama/{model_name}",
|
||||
["OLLAMA_SERVER_URL", "OLLAMA_MODEL"],
|
||||
supports_vision=False, # Ollama does not support vision yet
|
||||
add_assistant_prefix=False,
|
||||
max_completion_tokens=settings.LLM_CONFIG_MAX_TOKENS,
|
||||
litellm_params=LiteLLMParams(
|
||||
api_base=settings.OLLAMA_SERVER_URL,
|
||||
api_key=None,
|
||||
api_version=None,
|
||||
model_info={"model_name": f"ollama/{model_name}"},
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
if settings.ENABLE_OPENROUTER:
|
||||
# Register OpenRouter model configured in settings
|
||||
if settings.OPENROUTER_MODEL:
|
||||
model_name = settings.OPENROUTER_MODEL
|
||||
LLMConfigRegistry.register_config(
|
||||
"OPENROUTER",
|
||||
LLMConfig(
|
||||
f"openrouter/{model_name}",
|
||||
["OPENROUTER_API_KEY", "OPENROUTER_MODEL"],
|
||||
supports_vision=settings.LLM_CONFIG_SUPPORT_VISION,
|
||||
add_assistant_prefix=False,
|
||||
max_completion_tokens=settings.LLM_CONFIG_MAX_TOKENS,
|
||||
litellm_params=LiteLLMParams(
|
||||
api_key=settings.OPENROUTER_API_KEY,
|
||||
api_base=settings.OPENROUTER_API_BASE,
|
||||
api_version=None,
|
||||
model_info={"model_name": f"openrouter/{model_name}"},
|
||||
),
|
||||
),
|
||||
)
|
||||
if settings.ENABLE_GROQ:
|
||||
# Register Groq model configured in settings
|
||||
if settings.GROQ_MODEL:
|
||||
model_name = settings.GROQ_MODEL
|
||||
LLMConfigRegistry.register_config(
|
||||
"GROQ",
|
||||
LLMConfig(
|
||||
f"groq/{model_name}",
|
||||
["GROQ_API_KEY", "GROQ_MODEL"],
|
||||
supports_vision=settings.LLM_CONFIG_SUPPORT_VISION,
|
||||
add_assistant_prefix=False,
|
||||
max_completion_tokens=settings.LLM_CONFIG_MAX_TOKENS,
|
||||
litellm_params=LiteLLMParams(
|
||||
api_key=settings.GROQ_API_KEY,
|
||||
api_version=None,
|
||||
api_base=settings.GROQ_API_BASE,
|
||||
model_info={"model_name": f"groq/{model_name}"},
|
||||
),
|
||||
),
|
||||
)
|
||||
# Add support for dynamically configuring OpenAI-compatible LLM models
|
||||
# Based on liteLLM's support for OpenAI-compatible APIs
|
||||
# See documentation: https://docs.litellm.ai/docs/providers/openai_compatible
|
||||
|
||||
@@ -465,10 +465,18 @@ async def _create_cdp_connection_browser(
|
||||
raise Exception("Port 9222 is already in use. Another process may be using this port.")
|
||||
|
||||
browser_process = subprocess.Popen(
|
||||
[browser_path, "--remote-debugging-port=9222"], stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
||||
[
|
||||
browser_path,
|
||||
"--remote-debugging-port=9222",
|
||||
"--no-first-run",
|
||||
"--no-default-browser-check",
|
||||
"--remote-debugging-address=0.0.0.0",
|
||||
],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE
|
||||
)
|
||||
# Add small delay to allow browser to start
|
||||
time.sleep(1)
|
||||
time.sleep(2)
|
||||
if browser_process.poll() is not None:
|
||||
raise Exception(f"Failed to open browser. browser_path: {browser_path}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user