Added Ollama & Openrouter & Groq & improved cdp browser (#2283)

This commit is contained in:
Prakash Maheshwaran
2025-05-05 03:03:23 -04:00
committed by GitHub
parent 0540e65d06
commit c3072d7572
5 changed files with 207 additions and 7 deletions

View File

@@ -24,6 +24,7 @@ services:
# comment out if you want to externally call skyvern API # comment out if you want to externally call skyvern API
ports: ports:
- 8000:8000 - 8000:8000
- 9222:9222 # for cdp browser forwarding
volumes: volumes:
- ./artifacts:/data/artifacts - ./artifacts:/data/artifacts
- ./videos:/data/videos - ./videos:/data/videos
@@ -36,9 +37,26 @@ services:
environment: environment:
- DATABASE_STRING=postgresql+psycopg://skyvern:skyvern@postgres:5432/skyvern - DATABASE_STRING=postgresql+psycopg://skyvern:skyvern@postgres:5432/skyvern
- BROWSER_TYPE=chromium-headful - BROWSER_TYPE=chromium-headful
- ENABLE_OPENAI=true # - BROWSER_TYPE=cdp-connect
- LLM_KEY=OPENAI_GPT4O # Use this command to start Chrome with remote debugging:
- OPENAI_API_KEY=<your_openai_key> # "C:\Program Files\Google\Chrome\Application\chrome.exe" --remote-debugging-port=9222 --user-data-dir="C:\chrome-cdp-profile" --no-first-run --no-default-browser-check
# /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222 --user-data-dir="/Users/yourusername/chrome-cdp-profile" --no-first-run --no-default-browser-check
# - BROWSER_REMOTE_DEBUGGING_URL=http://host.docker.internal:9222/
# =========================
# LLM Settings
# =========================
# OpenAI Support:
# If you want to use OpenAI as your LLM provider, uncomment the following lines and fill in your OpenAI API key.
# - ENABLE_OPENAI=true
# - LLM_KEY=OPENAI_GPT4O
# - OPENAI_API_KEY=<your_openai_key>
# Gemini Support:
# Gemini is a new LLM provider that is currently in beta. You can use it by uncommenting the following lines and filling in your Gemini API key.
- LLM_KEY=GEMINI
- ENABLE_GEMINI=true
- GEMINI_API_KEY=YOUR_GEMINI_KEY
- LLM_KEY=GEMINI_2.5_PRO_PREVIEW_03_25
# If you want to use other LLM provider, like azure and anthropic: # If you want to use other LLM provider, like azure and anthropic:
# - ENABLE_ANTHROPIC=true # - ENABLE_ANTHROPIC=true
# - LLM_KEY=ANTHROPIC_CLAUDE3.5_SONNET # - LLM_KEY=ANTHROPIC_CLAUDE3.5_SONNET
@@ -72,7 +90,26 @@ services:
# - AWS_REGION=us-west-2 # Replace this with a different AWS region, if you desire # - AWS_REGION=us-west-2 # Replace this with a different AWS region, if you desire
# - AWS_ACCESS_KEY_ID=FILL_ME_IN_PLEASE # - AWS_ACCESS_KEY_ID=FILL_ME_IN_PLEASE
# - AWS_SECRET_ACCESS_KEY=FILL_ME_IN_PLEASE # - AWS_SECRET_ACCESS_KEY=FILL_ME_IN_PLEASE
# # Ollama Support:
# Ollama is a local LLM provider that can be used to run models locally on your machine.
# - LLM_KEY=OLLAMA
# - ENABLE_OLLAMA=true
# - OLLAMA_MODEL=qwen2.5:7b-instruct
# - OLLAMA_SERVER_URL=http://host.docker.internal:11434
# Open Router Support:
# - ENABLE_OPENROUTER=true
# - LLM_KEY=OPENROUTER
# - OPENROUTER_API_KEY=<your_openrouter_api_key>
# - OPENROUTER_MODEL=mistralai/mistral-small-3.1-24b-instruct
# Groq Support:
# - ENABLE_GROQ=true
# - LLM_KEY=GROQ
# - GROQ_API_KEY=<your_groq_api_key>
# - GROQ_MODEL=llama-3.1-8b-instant
# Maximum tokens to use: (only set for OpenRouter aand Ollama)
# - LLM_CONFIG_MAX_TOKENS=128000
# Bitwarden Settings # Bitwarden Settings
# If you are looking to integrate Skyvern with a password manager (eg Bitwarden), you can use the following environment variables. # If you are looking to integrate Skyvern with a password manager (eg Bitwarden), you can use the following environment variables.
# - BITWARDEN_SERVER=http://localhost # OPTIONAL IF YOU ARE SELF HOSTING BITWARDEN # - BITWARDEN_SERVER=http://localhost # OPTIONAL IF YOU ARE SELF HOSTING BITWARDEN
@@ -80,7 +117,7 @@ services:
# - BITWARDEN_CLIENT_ID=FILL_ME_IN_PLEASE # - BITWARDEN_CLIENT_ID=FILL_ME_IN_PLEASE
# - BITWARDEN_CLIENT_SECRET=FILL_ME_IN_PLEASE # - BITWARDEN_CLIENT_SECRET=FILL_ME_IN_PLEASE
# - BITWARDEN_MASTER_PASSWORD=FILL_ME_IN_PLEASE # - BITWARDEN_MASTER_PASSWORD=FILL_ME_IN_PLEASE
depends_on: depends_on:
postgres: postgres:
condition: service_healthy condition: service_healthy

View File

@@ -7,6 +7,8 @@ import time
import uuid import uuid
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
import requests
from urllib.parse import urlparse
import typer import typer
import uvicorn import uvicorn
@@ -472,6 +474,82 @@ def setup_browser_config() -> tuple[str, Optional[str], Optional[str]]:
print("\nTo use CDP connection, Chrome must be running with remote debugging enabled.") print("\nTo use CDP connection, Chrome must be running with remote debugging enabled.")
print("Example: chrome --remote-debugging-port=9222") print("Example: chrome --remote-debugging-port=9222")
print("Default debugging URL: http://localhost:9222") print("Default debugging URL: http://localhost:9222")
default_port = "9222"
if remote_debugging_url is None:
remote_debugging_url = "http://localhost:9222"
elif ":" in remote_debugging_url.split("/")[-1]:
default_port = remote_debugging_url.split(":")[-1].split("/")[0]
parsed_url = urlparse(remote_debugging_url)
version_url = f"{parsed_url.scheme}://{parsed_url.netloc}/json/version"
print(f"\nChecking if Chrome is already running with remote debugging on port {default_port}...")
try:
response = requests.get(version_url, timeout=2)
if response.status_code == 200:
try:
browser_info = response.json()
print(f"Chrome is already running with remote debugging!")
if "Browser" in browser_info:
print(f"Browser: {browser_info['Browser']}")
if "webSocketDebuggerUrl" in browser_info:
print(f"WebSocket URL: {browser_info['webSocketDebuggerUrl']}")
print(f"Connected to {remote_debugging_url}")
return selected_browser, browser_location, remote_debugging_url
except json.JSONDecodeError:
print("Port is in use, but doesn't appear to be Chrome with remote debugging.")
except requests.RequestException:
print(f"No Chrome instance detected on {remote_debugging_url}")
print("\nExecuting Chrome with remote debugging enabled:")
if host_system == "darwin" or host_system == "linux":
chrome_cmd = f'{browser_location} --remote-debugging-port={default_port} --user-data-dir="$HOME/chrome-cdp-profile" --no-first-run --no-default-browser-check'
print(f" {chrome_cmd}")
elif host_system == "windows" or host_system == "wsl":
chrome_cmd = f'"{browser_location}" --remote-debugging-port={default_port} --user-data-dir="C:\\chrome-cdp-profile" --no-first-run --no-default-browser-check'
print(f" {chrome_cmd}")
else:
print("Unsupported OS for Chrome configuration. Please set it up manually.")
# Ask user if they want to execute the command
execute_browser = input("\nWould you like to start Chrome with remote debugging now? (y/n) [y]: ").strip().lower()
if not execute_browser or execute_browser == "y":
print(f"Starting Chrome with remote debugging on port {default_port}...")
try:
# Execute in background - different approach per OS
if host_system in ["darwin", "linux"]:
subprocess.Popen(f"nohup {chrome_cmd} > /dev/null 2>&1 &", shell=True)
elif host_system == "windows":
subprocess.Popen(f"start {chrome_cmd}", shell=True)
elif host_system == "wsl":
subprocess.Popen(f"cmd.exe /c start {chrome_cmd}", shell=True)
print(f"Chrome started successfully. Connecting to {remote_debugging_url}")
print("Waiting for Chrome to initialize...")
time.sleep(2)
try:
verification_response = requests.get(version_url, timeout=5)
if verification_response.status_code == 200:
try:
browser_info = verification_response.json()
print("Connection verified! Chrome is running with remote debugging.")
if "Browser" in browser_info:
print(f"Browser: {browser_info['Browser']}")
except json.JSONDecodeError:
print("Warning: Response from Chrome debugging port is not valid JSON.")
else:
print(f"Warning: Chrome responded with status code {verification_response.status_code}")
except requests.RequestException as e:
print(f"Warning: Could not verify Chrome is running properly: {e}")
print("You may need to check Chrome manually or try a different port.")
except Exception as e:
print(f"Error starting Chrome: {e}")
print("Please start Chrome manually using the command above.")
remote_debugging_url = input("Enter remote debugging URL (press Enter for default): ").strip() remote_debugging_url = input("Enter remote debugging URL (press Enter for default): ").strip()
if not remote_debugging_url: if not remote_debugging_url:
remote_debugging_url = "http://localhost:9222" remote_debugging_url = "http://localhost:9222"

View File

@@ -214,6 +214,23 @@ class Settings(BaseSettings):
NOVITA_API_KEY: str | None = None NOVITA_API_KEY: str | None = None
NOVITA_API_VERSION: str = "v3" NOVITA_API_VERSION: str = "v3"
# OLLAMA
ENABLE_OLLAMA: bool = False
OLLAMA_SERVER_URL: str | None = None
OLLAMA_MODEL: str | None = None
# OPENROUTER
ENABLE_OPENROUTER: bool = False
OPENROUTER_API_KEY: str | None = None
OPENROUTER_MODEL: str | None = None
OPENROUTER_API_BASE: str = "https://api.openrouter.ai/v1"
# GROQ
ENABLE_GROQ: bool = False
GROQ_API_KEY: str | None = None
GROQ_MODEL: str | None = None
GROQ_API_BASE: str = "https://api.groq.com/openai/v1"
# TOTP Settings # TOTP Settings
TOTP_LIFESPAN_MINUTES: int = 10 TOTP_LIFESPAN_MINUTES: int = 10
VERIFICATION_CODE_INITIAL_WAIT_TIME_SECS: int = 40 VERIFICATION_CODE_INITIAL_WAIT_TIME_SECS: int = 40

View File

@@ -804,7 +804,67 @@ if settings.ENABLE_VERTEX_AI:
), ),
) )
if settings.ENABLE_OLLAMA:
# Register Ollama model configured in settings
if settings.OLLAMA_MODEL:
model_name = settings.OLLAMA_MODEL
LLMConfigRegistry.register_config(
"OLLAMA",
LLMConfig(
f"ollama/{model_name}",
["OLLAMA_SERVER_URL", "OLLAMA_MODEL"],
supports_vision=False, # Ollama does not support vision yet
add_assistant_prefix=False,
max_completion_tokens=settings.LLM_CONFIG_MAX_TOKENS,
litellm_params=LiteLLMParams(
api_base=settings.OLLAMA_SERVER_URL,
api_key=None,
api_version=None,
model_info={"model_name": f"ollama/{model_name}"},
),
),
)
if settings.ENABLE_OPENROUTER:
# Register OpenRouter model configured in settings
if settings.OPENROUTER_MODEL:
model_name = settings.OPENROUTER_MODEL
LLMConfigRegistry.register_config(
"OPENROUTER",
LLMConfig(
f"openrouter/{model_name}",
["OPENROUTER_API_KEY", "OPENROUTER_MODEL"],
supports_vision=settings.LLM_CONFIG_SUPPORT_VISION,
add_assistant_prefix=False,
max_completion_tokens=settings.LLM_CONFIG_MAX_TOKENS,
litellm_params=LiteLLMParams(
api_key=settings.OPENROUTER_API_KEY,
api_base=settings.OPENROUTER_API_BASE,
api_version=None,
model_info={"model_name": f"openrouter/{model_name}"},
),
),
)
if settings.ENABLE_GROQ:
# Register Groq model configured in settings
if settings.GROQ_MODEL:
model_name = settings.GROQ_MODEL
LLMConfigRegistry.register_config(
"GROQ",
LLMConfig(
f"groq/{model_name}",
["GROQ_API_KEY", "GROQ_MODEL"],
supports_vision=settings.LLM_CONFIG_SUPPORT_VISION,
add_assistant_prefix=False,
max_completion_tokens=settings.LLM_CONFIG_MAX_TOKENS,
litellm_params=LiteLLMParams(
api_key=settings.GROQ_API_KEY,
api_version=None,
api_base=settings.GROQ_API_BASE,
model_info={"model_name": f"groq/{model_name}"},
),
),
)
# Add support for dynamically configuring OpenAI-compatible LLM models # Add support for dynamically configuring OpenAI-compatible LLM models
# Based on liteLLM's support for OpenAI-compatible APIs # Based on liteLLM's support for OpenAI-compatible APIs
# See documentation: https://docs.litellm.ai/docs/providers/openai_compatible # See documentation: https://docs.litellm.ai/docs/providers/openai_compatible

View File

@@ -465,10 +465,18 @@ async def _create_cdp_connection_browser(
raise Exception("Port 9222 is already in use. Another process may be using this port.") raise Exception("Port 9222 is already in use. Another process may be using this port.")
browser_process = subprocess.Popen( browser_process = subprocess.Popen(
[browser_path, "--remote-debugging-port=9222"], stdout=subprocess.PIPE, stderr=subprocess.PIPE [
browser_path,
"--remote-debugging-port=9222",
"--no-first-run",
"--no-default-browser-check",
"--remote-debugging-address=0.0.0.0",
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
) )
# Add small delay to allow browser to start # Add small delay to allow browser to start
time.sleep(1) time.sleep(2)
if browser_process.poll() is not None: if browser_process.poll() is not None:
raise Exception(f"Failed to open browser. browser_path: {browser_path}") raise Exception(f"Failed to open browser. browser_path: {browser_path}")