Extract shared core from MCP tools, add CLI browser commands (#4768)

2026-02-17 11:24:56 -08:00
parent aacc612365
commit 7c5be8fefe
14 changed files with 1304 additions and 113 deletions
--- a/skyvern/cli/core/browser_ops.py
+++ b/skyvern/cli/core/browser_ops.py
@@ -0,0 +1,87 @@
+"""Shared browser operations for MCP tools and CLI commands.
+
+Each function: validate inputs -> call SDK -> return typed result.
+Session resolution and output formatting are caller responsibilities.
+"""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+from typing import Any
+
+from .guards import GuardError
+
+
+@dataclass
+class NavigateResult:
+    url: str
+    title: str
+
+
+@dataclass
+class ScreenshotResult:
+    data: bytes
+    full_page: bool = False
+
+
+@dataclass
+class ActResult:
+    prompt: str
+    completed: bool = True
+
+
+@dataclass
+class ExtractResult:
+    extracted: Any = None
+
+
+def parse_extract_schema(schema: str | dict[str, Any] | None) -> dict[str, Any] | None:
+    """Parse and validate an extraction schema payload."""
+    if schema is None:
+        return None
+    if isinstance(schema, dict):
+        return schema
+
+    try:
+        return json.loads(schema)
+    except (json.JSONDecodeError, TypeError) as e:
+        raise GuardError(f"Invalid JSON schema: {e}", "Provide schema as a valid JSON string")
+
+
+async def do_navigate(
+    page: Any,
+    url: str,
+    timeout: int = 30000,
+    wait_until: str | None = None,
+) -> NavigateResult:
+    await page.goto(url, timeout=timeout, wait_until=wait_until)
+    return NavigateResult(url=page.url, title=await page.title())
+
+
+async def do_screenshot(
+    page: Any,
+    full_page: bool = False,
+    selector: str | None = None,
+) -> ScreenshotResult:
+    if selector:
+        element = page.locator(selector)
+        data = await element.screenshot()
+    else:
+        data = await page.screenshot(full_page=full_page)
+    return ScreenshotResult(data=data, full_page=full_page)
+
+
+async def do_act(page: Any, prompt: str) -> ActResult:
+    await page.act(prompt)
+    return ActResult(prompt=prompt, completed=True)
+
+
+async def do_extract(
+    page: Any,
+    prompt: str,
+    schema: str | dict[str, Any] | None = None,
+) -> ExtractResult:
+    parsed_schema = parse_extract_schema(schema)
+    extracted = await page.extract(prompt=prompt, schema=parsed_schema)
+    return ExtractResult(extracted=extracted)
--- a/skyvern/cli/core/guards.py
+++ b/skyvern/cli/core/guards.py
@@ -0,0 +1,81 @@
+"""Shared input validation guards for both MCP and CLI surfaces."""
+
+from __future__ import annotations
+
+import re
+
+PASSWORD_PATTERN = re.compile(
+    r"\bpass(?:word|phrase|code)s?\b|\bsecret\b|\bcredential\b|\bpin\s*(?:code)?\b|\bpwd\b|\bpasswd\b",
+    re.IGNORECASE,
+)
+
+JS_PASSWORD_PATTERN = re.compile(
+    r"""(?:type\s*=\s*['"]?password|\.type\s*===?\s*['"]password|input\[type=password\]).*?\.value\s*=""",
+    re.IGNORECASE,
+)
+
+CREDENTIAL_HINT = (
+    "Use skyvern_login with a stored credential to authenticate. "
+    "Create credentials via CLI: skyvern credentials add. "
+    "Never pass passwords through tool calls."
+)
+
+VALID_WAIT_UNTIL = ("load", "domcontentloaded", "networkidle", "commit")
+VALID_BUTTONS = ("left", "right", "middle")
+VALID_ELEMENT_STATES = ("visible", "hidden", "attached", "detached")
+
+
+class GuardError(Exception):
+    """Raised when an input guard blocks an operation."""
+
+    def __init__(self, message: str, hint: str = "") -> None:
+        super().__init__(message)
+        self.hint = hint
+
+
+def check_password_prompt(text: str) -> None:
+    """Block prompts containing password/credential terms."""
+    if PASSWORD_PATTERN.search(text):
+        raise GuardError(
+            "Cannot perform password/credential actions — credentials must not be passed through tool calls",
+            CREDENTIAL_HINT,
+        )
+
+
+def check_js_password(expression: str) -> None:
+    """Block JS expressions that set password field values."""
+    if JS_PASSWORD_PATTERN.search(expression):
+        raise GuardError(
+            "Cannot set password field values via JavaScript — credentials must not be passed through tool calls",
+            CREDENTIAL_HINT,
+        )
+
+
+def validate_wait_until(value: str | None) -> None:
+    if value is not None and value not in VALID_WAIT_UNTIL:
+        raise GuardError(
+            f"Invalid wait_until: {value}",
+            "Use load, domcontentloaded, networkidle, or commit",
+        )
+
+
+def validate_button(value: str | None) -> None:
+    if value is not None and value not in VALID_BUTTONS:
+        raise GuardError(f"Invalid button: {value}", "Use left, right, or middle")
+
+
+def resolve_ai_mode(
+    selector: str | None,
+    intent: str | None,
+) -> tuple[str | None, str | None]:
+    """Determine AI mode from selector/intent combination.
+
+    Returns (ai_mode, error_code) -- if error_code is set, the call should fail.
+    """
+    if intent and not selector:
+        return "proactive", None
+    if intent and selector:
+        return "fallback", None
+    if selector and not intent:
+        return None, None
+    return None, "INVALID_INPUT"
--- a/skyvern/cli/core/session_ops.py
+++ b/skyvern/cli/core/session_ops.py
@@ -0,0 +1,74 @@
+"""Shared session operations for MCP tools and CLI commands."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+from skyvern.schemas.runs import ProxyLocation
+
+
+@dataclass
+class SessionCreateResult:
+    session_id: str | None
+    local: bool = False
+    headless: bool = False
+    timeout_minutes: int | None = None
+
+
+@dataclass
+class SessionCloseResult:
+    session_id: str | None
+    closed: bool = True
+
+
+@dataclass
+class SessionInfo:
+    session_id: str
+    status: str | None
+    started_at: str | None
+    timeout: int | None
+    runnable_id: str | None = None
+    available: bool = False
+
+
+async def do_session_create(
+    skyvern: Any,
+    timeout: int = 60,
+    proxy_location: str | None = None,
+    local: bool = False,
+    headless: bool = False,
+) -> tuple[Any, SessionCreateResult]:
+    """Create browser session. Returns (browser, result)."""
+    if local:
+        browser = await skyvern.launch_local_browser(headless=headless)
+        return browser, SessionCreateResult(session_id=None, local=True, headless=headless)
+
+    proxy = ProxyLocation(proxy_location) if proxy_location else None
+    browser = await skyvern.launch_cloud_browser(timeout=timeout, proxy_location=proxy)
+    return browser, SessionCreateResult(
+        session_id=browser.browser_session_id,
+        timeout_minutes=timeout,
+    )
+
+
+async def do_session_close(skyvern: Any, session_id: str) -> SessionCloseResult:
+    """Close a browser session by ID."""
+    await skyvern.close_browser_session(session_id)
+    return SessionCloseResult(session_id=session_id)
+
+
+async def do_session_list(skyvern: Any) -> list[SessionInfo]:
+    """List all browser sessions."""
+    sessions = await skyvern.get_browser_sessions()
+    return [
+        SessionInfo(
+            session_id=s.browser_session_id,
+            status=s.status,
+            started_at=s.started_at.isoformat() if s.started_at else None,
+            timeout=s.timeout,
+            runnable_id=s.runnable_id,
+            available=s.runnable_id is None and s.browser_address is not None,
+        )
+        for s in sessions
+    ]