SkyvernBrowserPage extends SkyvernPage (#3921)

2025-11-07 10:58:22 -07:00
parent 926a5da13e
commit a9c3d692ff
6 changed files with 304 additions and 492 deletions
--- a/skyvern/core/script_generations/real_skyvern_page_ai.py
+++ b/skyvern/core/script_generations/real_skyvern_page_ai.py
@@ -134,11 +134,11 @@ class RealSkyvernPageAi(SkyvernPageAi):

    async def ai_click(
        self,
-        selector: str,
+        selector: str | None,
        intention: str,
        data: str | dict[str, Any] | None = None,
        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
-    ) -> str:
+    ) -> str | None:
        """Click an element using AI to locate it based on intention."""
        try:
            # Build the element tree of the current page for the prompt
@@ -193,7 +193,7 @@ class RealSkyvernPageAi(SkyvernPageAi):
    async def ai_input_text(
        self,
        selector: str | None,
-        value: str,
+        value: str | None,
        intention: str,
        data: str | dict[str, Any] | None = None,
        totp_identifier: str | None = None,
@@ -419,8 +419,8 @@ class RealSkyvernPageAi(SkyvernPageAi):

    async def ai_select_option(
        self,
-        selector: str,
-        value: str,
+        selector: str | None,
+        value: str | None,
        intention: str,
        data: str | dict[str, Any] | None = None,
        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
--- a/skyvern/core/script_generations/skyvern_page.py
+++ b/skyvern/core/script_generations/skyvern_page.py
@@ -4,7 +4,7 @@ import asyncio
 import copy
 from dataclasses import dataclass
 from enum import StrEnum
-from typing import Any, Callable, Literal
+from typing import Any, Callable, Literal, overload

 import structlog
 from playwright.async_api import Page
@@ -95,42 +95,94 @@ class SkyvernPage:
        await self.page.goto(url, timeout=timeout)

    ######### Public Interfaces #########
-    @action_wrap(ActionType.CLICK)
+
+    @overload
    async def click(
        self,
        selector: str,
+        *,
        prompt: str | None = None,
        ai: str | None = "fallback",
        data: str | dict[str, Any] | None = None,
        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
        intention: str | None = None,  # backward compatibility
-    ) -> str:
-        """Click an element identified by ``selector``.
+        **kwargs: Any,
+    ) -> str | None: ...

-        When ``prompt`` and ``data`` are provided a new click action is
-        generated via the ``single-click-action`` prompt.  The model returns a
-        fresh "xpath=..." selector based on the current DOM and the updated data for this run.
-        The browser then clicks the element using this newly generated xpath selector.
+    @overload
+    async def click(
+        self,
+        *,
+        prompt: str,
+        ai: str | None = "fallback",
+        data: str | dict[str, Any] | None = None,
+        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
+        intention: str | None = None,  # backward compatibility
+        **kwargs: Any,
+    ) -> str | None: ...

-        If the prompt generation or parsing fails for any reason we fall back to
-        clicking the originally supplied ``selector``.
+    @action_wrap(ActionType.CLICK)
+    async def click(
+        self,
+        selector: str | None = None,
+        *,
+        prompt: str | None = None,
+        ai: str | None = "fallback",
+        data: str | dict[str, Any] | None = None,
+        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
+        intention: str | None = None,  # backward compatibility
+        **kwargs: Any,
+    ) -> str | None:
+        """Click an element using a CSS selector, AI-powered prompt matching, or both.
+
+        This method supports three modes:
+        - **Selector-based**: Click the element matching the CSS selector
+        - **AI-powered**: Use natural language to describe which element to click
+        - **Fallback mode** (default): Try the selector first, fall back to AI if it fails
+
+        Args:
+            selector: CSS selector for the target element.
+            prompt: Natural language description of which element to click.
+            ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
+            data: Additional context data for AI processing.
+            timeout: Maximum time to wait for the click action in milliseconds.
+
+        Returns:
+            The selector string that was successfully used to click the element, or None.
+
+        Examples:
+            ```python
+            # Click using a CSS selector
+            await page.click("#open-invoice-button")
+
+            # Click using AI with natural language
+            await page.click(prompt="Click on the 'Open Invoice' button")
+
+            # Try selector first, fall back to AI if selector fails
+            await page.click("#open-invoice-button", prompt="Click on the 'Open Invoice' button")
+            ```
        """
        # Backward compatibility
        if intention is not None and prompt is None:
            prompt = intention

+        if not selector and not prompt:
+            raise ValueError("Missing input: pass a selector and/or a prompt.")
+
        context = skyvern_context.current()
        if context and context.ai_mode_override:
            ai = context.ai_mode_override
        if ai == "fallback":
            # try to click the element with the original selector first
            error_to_raise = None
-            try:
-                locator = self.page.locator(selector)
-                await locator.click(timeout=timeout)
-                return selector
-            except Exception as e:
-                error_to_raise = e
+            if selector:
+                try:
+                    locator = self.page.locator(selector)
+                    await locator.click(timeout=timeout, **kwargs)
+                    return selector
+                except Exception as e:
+                    error_to_raise = e
+                    selector = None

            # if the original selector doesn't work, try to click the element with the ai generated selector
            if prompt:
@@ -152,30 +204,104 @@ class SkyvernPage:
                    data=data,
                    timeout=timeout,
                )
-        locator = self.page.locator(selector)
-        await locator.click(timeout=timeout)
+
+        if selector:
+            locator = self.page.locator(selector, **kwargs)
+            await locator.click(timeout=timeout)
+
        return selector

+    @overload
+    async def fill(
+        self,
+        selector: str,
+        value: str,
+        *,
+        prompt: str | None = None,
+        ai: str | None = "fallback",
+        data: str | dict[str, Any] | None = None,
+        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
+        totp_identifier: str | None = None,
+        totp_url: str | None = None,
+        intention: str | None = None,  # backward compatibility
+    ) -> str: ...
+
+    @overload
+    async def fill(
+        self,
+        *,
+        prompt: str,
+        value: str | None = None,
+        selector: str | None = None,
+        ai: str | None = "fallback",
+        data: str | dict[str, Any] | None = None,
+        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
+        totp_identifier: str | None = None,
+        totp_url: str | None = None,
+        intention: str | None = None,  # backward compatibility
+    ) -> str: ...
+
    @action_wrap(ActionType.INPUT_TEXT)
    async def fill(
        self,
-        selector: str | None,
-        value: str,
-        ai: str | None = "fallback",
+        selector: str | None = None,
+        value: str | None = None,
+        *,
        prompt: str | None = None,
+        ai: str | None = "fallback",
        data: str | dict[str, Any] | None = None,
        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
        totp_identifier: str | None = None,
        totp_url: str | None = None,
        intention: str | None = None,  # backward compatibility
    ) -> str:
+        """Fill an input field using a CSS selector, AI-powered prompt matching, or both.
+
+        This method supports three modes:
+        - **Selector-based**: Fill the input field with a value using CSS selector
+        - **AI-powered**: Use natural language prompt (AI extracts value from prompt)
+        - **Fallback mode** (default): Try the selector first, fall back to AI if it fails
+
+        Args:
+            selector: CSS selector for the target input element.
+            value: The text value to input into the field.
+            prompt: Natural language description of which field to fill and what value.
+            ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
+            data: Additional context data for AI processing.
+            timeout: Maximum time to wait for the fill action in milliseconds.
+            totp_identifier: TOTP identifier for time-based one-time password fields.
+            totp_url: URL to fetch TOTP codes from for authentication.
+
+        Returns:
+            The value that was successfully filled into the field.
+
+        Examples:
+            ```python
+            # Fill using selector and value (both positional)
+            await page.fill("#email-input", "user@example.com")
+
+            # Fill using AI with natural language (prompt only)
+            await page.fill(prompt="Fill 'user@example.com' in the email address field")
+
+            # Try selector first, fall back to AI if selector fails
+            await page.fill(
+                "#email-input",
+                "user@example.com",
+                prompt="Fill the email address with user@example.com"
+            )
+            ```
+        """
+
        # Backward compatibility
        if intention is not None and prompt is None:
            prompt = intention

+        if not selector and not prompt:
+            raise ValueError("Missing input: pass a selector and/or a prompt.")
+
        return await self._input_text(
            selector=selector,
-            value=value,
+            value=value or "",
            ai=ai,
            intention=prompt,
            data=data,
@@ -201,6 +327,9 @@ class SkyvernPage:
        if intention is not None and prompt is None:
            prompt = intention

+        if not selector and not prompt:
+            raise ValueError("Missing input: pass a selector and/or a prompt.")
+
        return await self._input_text(
            selector=selector,
            value=value,
@@ -225,7 +354,7 @@ class SkyvernPage:
    ) -> str:
        """Input text into an element identified by ``selector``.

-        When ``prompt`` and ``data`` are provided a new input text action is
+        When ``intention`` and ``data`` are provided a new input text action is
        generated via the `script-generation-input-text-generation` prompt.  The model returns a
        fresh text based on the current DOM and the updated data for this run.
        The browser then inputs the text using this newly generated text.
@@ -248,6 +377,7 @@ class SkyvernPage:
                    return value
                except Exception as e:
                    error_to_raise = e
+                    selector = None

            if intention:
                return await self._ai.ai_input_text(
@@ -296,6 +426,9 @@ class SkyvernPage:
        if intention is not None and prompt is None:
            prompt = intention

+        if not selector and not prompt:
+            raise ValueError("Missing input: pass a selector and/or a prompt.")
+
        context = skyvern_context.current()
        if context and context.ai_mode_override:
            ai = context.ai_mode_override
@@ -308,6 +441,7 @@ class SkyvernPage:
                    await locator.set_input_files(file_path)
                except Exception as e:
                    error_to_raise = e
+                    selector = None

            if prompt:
                return await self._ai.ai_upload_file(
@@ -338,34 +472,104 @@ class SkyvernPage:
        await locator.set_input_files(file_path, timeout=timeout)
        return files

-    @action_wrap(ActionType.SELECT_OPTION)
+    @overload
    async def select_option(
        self,
        selector: str,
        value: str | None = None,
-        label: str | None = None,
-        ai: str | None = "fallback",
+        *,
        prompt: str | None = None,
+        ai: str | None = "fallback",
        data: str | dict[str, Any] | None = None,
        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
        intention: str | None = None,  # backward compatibility
-    ) -> str:
+        **kwargs: Any,
+    ) -> str | None: ...
+
+    @overload
+    async def select_option(
+        self,
+        *,
+        prompt: str,
+        value: str | None = None,
+        selector: str | None = None,
+        ai: str | None = "fallback",
+        data: str | dict[str, Any] | None = None,
+        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
+        intention: str | None = None,  # backward compatibility
+        **kwargs: Any,
+    ) -> str | None: ...
+
+    @action_wrap(ActionType.SELECT_OPTION)
+    async def select_option(
+        self,
+        selector: str | None = None,
+        value: str | None = None,
+        *,
+        prompt: str | None = None,
+        ai: str | None = "fallback",
+        data: str | dict[str, Any] | None = None,
+        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
+        intention: str | None = None,  # backward compatibility
+        **kwargs: Any,
+    ) -> str | None:
+        """Select an option from a dropdown using a CSS selector, AI-powered prompt matching, or both.
+
+        This method supports three modes:
+        - **Selector-based**: Select the option with a value using CSS selector
+        - **AI-powered**: Use natural language prompt (AI extracts value from prompt)
+        - **Fallback mode** (default): Try the selector first, fall back to AI if it fails
+
+        Args:
+            selector: CSS selector for the target select/dropdown element.
+            value: The option value to select.
+            prompt: Natural language description of which option to select.
+            ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
+            data: Additional context data for AI processing.
+            timeout: Maximum time to wait for the select action in milliseconds.
+
+        Returns:
+            The value that was successfully selected.
+
+        Examples:
+            ```python
+            # Select using selector and value (both positional)
+            await page.select_option("#country", "us")
+
+            # Select using AI with natural language (prompt only)
+            await page.select_option(prompt="Select 'United States' from the country dropdown")
+
+            # Try selector first, fall back to AI if selector fails
+            await page.select_option(
+                "#country",
+                "us",
+                prompt="Select United States from country"
+            )
+            ```
+        """
+
        # Backward compatibility
        if intention is not None and prompt is None:
            prompt = intention

+        if not selector and not prompt:
+            raise ValueError("Missing input: pass a selector and/or a prompt.")
+
        context = skyvern_context.current()
        if context and context.ai_mode_override:
            ai = context.ai_mode_override
        value = value or ""
        if ai == "fallback":
            error_to_raise = None
-            try:
-                locator = self.page.locator(selector)
-                await locator.select_option(value, timeout=timeout)
-                return value
-            except Exception as e:
-                error_to_raise = e
+            if selector:
+                try:
+                    locator = self.page.locator(selector)
+                    await locator.select_option(value, timeout=timeout, **kwargs)
+                    return value
+                except Exception as e:
+                    error_to_raise = e
+                    selector = None
+
            if prompt:
                return await self._ai.ai_select_option(
                    selector=selector,
@@ -386,8 +590,9 @@ class SkyvernPage:
                data=data,
                timeout=timeout,
            )
-        locator = self.page.locator(selector)
-        await locator.select_option(value, timeout=timeout)
+        if selector:
+            locator = self.page.locator(selector)
+            await locator.select_option(value, timeout=timeout, **kwargs)
        return value

    @action_wrap(ActionType.WAIT)
@@ -445,6 +650,35 @@ class SkyvernPage:
        intention: str | None = None,
        data: str | dict[str, Any] | None = None,
    ) -> dict[str, Any] | list | str | None:
+        """Extract structured data from the page using AI.
+
+        Args:
+            prompt: Natural language description of what data to extract.
+            schema: JSON Schema defining the structure of data to extract.
+            error_code_mapping: Mapping of error codes to custom error messages.
+            intention: Additional context about the extraction intent.
+            data: Additional context data for AI processing.
+
+        Returns:
+            Extracted data matching the provided schema, or None if extraction fails.
+
+        Examples:
+            ```python
+            # Extract structured data with JSON Schema
+            result = await page.extract(
+                prompt="Extract product information",
+                schema={
+                    "type": "object",
+                    "properties": {
+                        "name": {"type": "string", "description": "Product name"},
+                        "price": {"type": "number", "description": "Product price"}
+                    },
+                    "required": ["name", "price"]
+                }
+            )
+            # Returns: {"name": "...", "price": 29.99}
+            ```
+        """
        return await self._ai.ai_extract(prompt, schema, error_code_mapping, intention, data)

    @action_wrap(ActionType.VERIFICATION_CODE)
--- a/skyvern/core/script_generations/skyvern_page_ai.py
+++ b/skyvern/core/script_generations/skyvern_page_ai.py
@@ -10,18 +10,18 @@ class SkyvernPageAi(Protocol):

    async def ai_click(
        self,
-        selector: str,
+        selector: str | None,
        intention: str,
        data: str | dict[str, Any] | None = None,
        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
-    ) -> str:
+    ) -> str | None:
        """Click an element using AI to locate it based on intention."""
        ...

    async def ai_input_text(
        self,
        selector: str | None,
-        value: str,
+        value: str | None,
        intention: str,
        data: str | dict[str, Any] | None = None,
        totp_identifier: str | None = None,
@@ -44,8 +44,8 @@ class SkyvernPageAi(Protocol):

    async def ai_select_option(
        self,
-        selector: str,
-        value: str,
+        selector: str | None,
+        value: str | None,
        intention: str,
        data: str | dict[str, Any] | None = None,
        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
--- a/skyvern/forge/sdk/schemas/sdk_actions.py
+++ b/skyvern/forge/sdk/schemas/sdk_actions.py
@@ -28,7 +28,7 @@ class ClickAction(SdkActionBase):
    """Click action parameters."""

    type: Literal["ai_click"] = "ai_click"
-    selector: str = Field(default="", description="CSS selector for the element")
+    selector: str | None = Field(default="", description="CSS selector for the element")
    intention: str = Field(default="", description="The intention or goal of the click")
    data: str | dict[str, Any] | None = Field(None, description="Additional context data")
    timeout: float = Field(default=settings.BROWSER_ACTION_TIMEOUT_MS, description="Timeout in milliseconds")
@@ -39,7 +39,7 @@ class InputTextAction(SdkActionBase):

    type: Literal["ai_input_text"] = "ai_input_text"
    selector: str | None = Field(default="", description="CSS selector for the element")
-    value: str = Field(default="", description="Value to input")
+    value: str | None = Field(default="", description="Value to input")
    intention: str = Field(default="", description="The intention or goal of the input")
    data: str | dict[str, Any] | None = Field(None, description="Additional context data")
    totp_identifier: str | None = Field(None, description="TOTP identifier for input_text actions")
@@ -51,8 +51,8 @@ class SelectOptionAction(SdkActionBase):
    """Select option action parameters."""

    type: Literal["ai_select_option"] = "ai_select_option"
-    selector: str = Field(default="", description="CSS selector for the element")
-    value: str = Field(default="", description="Value to select")
+    selector: str | None = Field(default="", description="CSS selector for the element")
+    value: str | None = Field(default="", description="Value to select")
    intention: str = Field(default="", description="The intention or goal of the selection")
    data: str | dict[str, Any] | None = Field(None, description="Additional context data")
    timeout: float = Field(default=settings.BROWSER_ACTION_TIMEOUT_MS, description="Timeout in milliseconds")
--- a/skyvern/library/skyvern_browser_page.py
+++ b/skyvern/library/skyvern_browser_page.py
@@ -1,15 +1,14 @@
 import asyncio
-from typing import TYPE_CHECKING, Any, Pattern, overload
+from typing import TYPE_CHECKING, Any, Pattern

 from playwright.async_api import Page

 from skyvern.client import GetRunResponse
 from skyvern.client.types.workflow_run_response import WorkflowRunResponse
-from skyvern.config import settings
+from skyvern.core.script_generations.skyvern_page import SkyvernPage
 from skyvern.library.constants import DEFAULT_AGENT_HEARTBEAT_INTERVAL, DEFAULT_AGENT_TIMEOUT
 from skyvern.library.skyvern_browser_page_ai import SdkSkyvernPageAi
 from skyvern.library.skyvern_locator import SkyvernLocator
-from skyvern.webeye.actions import handler_utils

 if TYPE_CHECKING:
    from skyvern.library.skyvern_browser import SkyvernBrowser
@@ -206,7 +205,7 @@ class SkyvernPageRun:
        return url


-class SkyvernBrowserPage:
+class SkyvernBrowserPage(SkyvernPage):
    """A browser page wrapper that combines Playwright's page API with Skyvern's AI capabilities.

    This class provides a unified interface for both traditional browser automation (via Playwright)
@@ -230,365 +229,10 @@ class SkyvernBrowserPage:
    """

    def __init__(self, browser: "SkyvernBrowser", page: Page):
+        super().__init__(page, SdkSkyvernPageAi(browser, page))
        self._browser = browser
-        self._page = page
-        self._ai = SdkSkyvernPageAi(browser, page)
        self.run = SkyvernPageRun(browser, page)

-    @overload
-    async def click(
-        self,
-        selector: str,
-        *,
-        prompt: str | None = None,
-        ai: str | None = "fallback",
-        data: str | dict[str, Any] | None = None,
-        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
-        **kwargs: Any,
-    ) -> str | None: ...
-
-    @overload
-    async def click(
-        self,
-        *,
-        prompt: str,
-        ai: str | None = "fallback",
-        data: str | dict[str, Any] | None = None,
-        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
-        **kwargs: Any,
-    ) -> str | None: ...
-
-    async def click(
-        self,
-        selector: str | None = None,
-        *,
-        prompt: str | None = None,
-        ai: str | None = "fallback",
-        data: str | dict[str, Any] | None = None,
-        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
-        **kwargs: Any,
-    ) -> str | None:
-        """Click an element using a CSS selector, AI-powered prompt matching, or both.
-
-        This method supports three modes:
-        - **Selector-based**: Click the element matching the CSS selector
-        - **AI-powered**: Use natural language to describe which element to click
-        - **Fallback mode** (default): Try the selector first, fall back to AI if it fails
-
-        Args:
-            selector: CSS selector for the target element.
-            prompt: Natural language description of which element to click.
-            ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
-            data: Additional context data for AI processing.
-            timeout: Maximum time to wait for the click action in milliseconds.
-
-        Returns:
-            The selector string that was successfully used to click the element, or None.
-
-        Examples:
-            ```python
-            # Click using a CSS selector
-            await page.click("#open-invoice-button")
-
-            # Click using AI with natural language
-            await page.click(prompt="Click on the 'Open Invoice' button")
-
-            # Try selector first, fall back to AI if selector fails
-            await page.click("#open-invoice-button", prompt="Click on the 'Open Invoice' button")
-            ```
-        """
-
-        if ai == "fallback":
-            # try to click the element with the original selector first
-            error_to_raise = None
-            if selector:
-                try:
-                    locator = self._page.locator(selector)
-                    await locator.click(timeout=timeout, **kwargs)
-                    return selector
-                except Exception as e:
-                    error_to_raise = e
-                    selector = None
-
-            # if the original selector doesn't work, try to click the element with the ai generated selector
-            if prompt:
-                return await self._ai.ai_click(
-                    selector=selector or "",
-                    intention=prompt,
-                    data=data,
-                    timeout=timeout,
-                )
-            if error_to_raise:
-                raise error_to_raise
-            else:
-                return selector
-        elif ai == "proactive":
-            if prompt:
-                return await self._ai.ai_click(
-                    selector=selector or "",
-                    intention=prompt,
-                    data=data,
-                    timeout=timeout,
-                )
-
-        if selector:
-            locator = self._page.locator(selector, **kwargs)
-            await locator.click(timeout=timeout)
-        return selector
-
-    @overload
-    async def fill(
-        self,
-        selector: str,
-        value: str,
-        *,
-        prompt: str | None = None,
-        ai: str | None = "fallback",
-        data: str | dict[str, Any] | None = None,
-        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
-        totp_identifier: str | None = None,
-        totp_url: str | None = None,
-    ) -> str: ...
-
-    @overload
-    async def fill(
-        self,
-        *,
-        prompt: str,
-        value: str | None = None,
-        selector: str | None = None,
-        ai: str | None = "fallback",
-        data: str | dict[str, Any] | None = None,
-        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
-        totp_identifier: str | None = None,
-        totp_url: str | None = None,
-    ) -> str: ...
-
-    async def fill(
-        self,
-        selector: str | None = None,
-        value: str | None = None,
-        *,
-        prompt: str | None = None,
-        ai: str | None = "fallback",
-        data: str | dict[str, Any] | None = None,
-        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
-        totp_identifier: str | None = None,
-        totp_url: str | None = None,
-    ) -> str:
-        """Fill an input field using a CSS selector, AI-powered prompt matching, or both.
-
-        This method supports three modes:
-        - **Selector-based**: Fill the input field with a value using CSS selector
-        - **AI-powered**: Use natural language prompt (AI extracts value from prompt)
-        - **Fallback mode** (default): Try the selector first, fall back to AI if it fails
-
-        Args:
-            selector: CSS selector for the target input element.
-            value: The text value to input into the field.
-            prompt: Natural language description of which field to fill and what value.
-            ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
-            data: Additional context data for AI processing.
-            timeout: Maximum time to wait for the fill action in milliseconds.
-            totp_identifier: TOTP identifier for time-based one-time password fields.
-            totp_url: URL to fetch TOTP codes from for authentication.
-
-        Returns:
-            The value that was successfully filled into the field.
-
-        Examples:
-            ```python
-            # Fill using selector and value (both positional)
-            await page.fill("#email-input", "user@example.com")
-
-            # Fill using AI with natural language (prompt only)
-            await page.fill(prompt="Fill 'user@example.com' in the email address field")
-
-            # Try selector first, fall back to AI if selector fails
-            await page.fill(
-                "#email-input",
-                "user@example.com",
-                prompt="Fill the email address with user@example.com"
-            )
-            ```
-        """
-        return await self._input_text(
-            selector=selector,
-            value=value or "",
-            ai=ai,
-            intention=prompt,
-            data=data,
-            timeout=timeout,
-            totp_identifier=totp_identifier,
-            totp_url=totp_url,
-        )
-
-    async def goto(self, url: str, **kwargs: Any) -> None:
-        """Navigate to the given URL.
-
-        Args:
-            url: URL to navigate page to.
-            **kwargs: Additional options like timeout, wait_until, referer, etc.
-        """
-        await self._page.goto(url, **kwargs)
-
-    async def type(self, selector: str, text: str, **kwargs: Any) -> None:
-        """Type text into an element character by character.
-
-        Args:
-            selector: A selector to search for an element to type into.
-            text: Text to type into the element.
-            **kwargs: Additional options like delay, timeout, no_wait_after, etc.
-        """
-        await self._page.type(selector, text, **kwargs)
-
-    @overload
-    async def select_option(
-        self,
-        selector: str,
-        value: str | None = None,
-        *,
-        prompt: str | None = None,
-        ai: str | None = "fallback",
-        data: str | dict[str, Any] | None = None,
-        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
-        **kwargs: Any,
-    ) -> str: ...
-
-    @overload
-    async def select_option(
-        self,
-        *,
-        prompt: str,
-        value: str | None = None,
-        selector: str | None = None,
-        ai: str | None = "fallback",
-        data: str | dict[str, Any] | None = None,
-        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
-        **kwargs: Any,
-    ) -> str: ...
-
-    async def select_option(
-        self,
-        selector: str | None = None,
-        value: str | None = None,
-        *,
-        prompt: str | None = None,
-        ai: str | None = "fallback",
-        data: str | dict[str, Any] | None = None,
-        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
-        **kwargs: Any,
-    ) -> str:
-        """Select an option from a dropdown using a CSS selector, AI-powered prompt matching, or both.
-
-        This method supports three modes:
-        - **Selector-based**: Select the option with a value using CSS selector
-        - **AI-powered**: Use natural language prompt (AI extracts value from prompt)
-        - **Fallback mode** (default): Try the selector first, fall back to AI if it fails
-
-        Args:
-            selector: CSS selector for the target select/dropdown element.
-            value: The option value to select.
-            prompt: Natural language description of which option to select.
-            ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
-            data: Additional context data for AI processing.
-            timeout: Maximum time to wait for the select action in milliseconds.
-
-        Returns:
-            The value that was successfully selected.
-
-        Examples:
-            ```python
-            # Select using selector and value (both positional)
-            await page.select_option("#country", "us")
-
-            # Select using AI with natural language (prompt only)
-            await page.select_option(prompt="Select 'United States' from the country dropdown")
-
-            # Try selector first, fall back to AI if selector fails
-            await page.select_option(
-                "#country",
-                "us",
-                prompt="Select United States from country"
-            )
-            ```
-        """
-        value = value or ""
-        if ai == "fallback":
-            error_to_raise = None
-            if selector:
-                try:
-                    locator = self._page.locator(selector)
-                    await locator.select_option(value, timeout=timeout, **kwargs)
-                    return value
-                except Exception as e:
-                    error_to_raise = e
-                    selector = None
-
-            if prompt:
-                return await self._ai.ai_select_option(
-                    selector=selector or "",
-                    value=value,
-                    intention=prompt,
-                    data=data,
-                    timeout=timeout,
-                )
-            if error_to_raise:
-                raise error_to_raise
-            else:
-                return value
-        elif ai == "proactive" and prompt:
-            return await self._ai.ai_select_option(
-                selector=selector or "",
-                value=value,
-                intention=prompt,
-                data=data,
-                timeout=timeout,
-            )
-        if selector:
-            locator = self._page.locator(selector)
-            await locator.select_option(value, timeout=timeout, **kwargs)
-        return value
-
-    async def extract(
-        self,
-        prompt: str,
-        schema: dict[str, Any] | list | str | None = None,
-        error_code_mapping: dict[str, str] | None = None,
-        intention: str | None = None,
-        data: str | dict[str, Any] | None = None,
-    ) -> dict[str, Any] | list | str | None:
-        """Extract structured data from the page using AI.
-
-        Args:
-            prompt: Natural language description of what data to extract.
-            schema: JSON Schema defining the structure of data to extract.
-            error_code_mapping: Mapping of error codes to custom error messages.
-            intention: Additional context about the extraction intent.
-            data: Additional context data for AI processing.
-
-        Returns:
-            Extracted data matching the provided schema, or None if extraction fails.
-
-        Examples:
-            ```python
-            # Extract structured data with JSON Schema
-            result = await page.extract(
-                prompt="Extract product information",
-                schema={
-                    "type": "object",
-                    "properties": {
-                        "name": {"type": "string", "description": "Product name"},
-                        "price": {"type": "number", "description": "Product price"}
-                    },
-                    "required": ["name", "price"]
-                }
-            )
-            # Returns: {"name": "...", "price": 29.99}
-            ```
-        """
-        return await self._ai.ai_extract(prompt, schema, error_code_mapping, intention, data)
-
    async def act(
        self,
        prompt: str,
@@ -612,7 +256,7 @@ class SkyvernBrowserPage:
        Args:
            **kwargs: Additional options like timeout, wait_until, etc.
        """
-        await self._page.reload(**kwargs)
+        await self.page.reload(**kwargs)

    async def screenshot(self, **kwargs: Any) -> bytes:
        """Take a screenshot of the page.
@@ -623,7 +267,7 @@ class SkyvernBrowserPage:
        Returns:
            bytes: The screenshot as bytes (unless path is specified, then saves to file).
        """
-        return await self._page.screenshot(**kwargs)
+        return await self.page.screenshot(**kwargs)

    def locator(self, selector: str, **kwargs: Any) -> SkyvernLocator:
        """Find an element using a CSS selector or other selector syntax.
@@ -635,7 +279,7 @@ class SkyvernBrowserPage:
        Returns:
            SkyvernLocator object that can be used to perform actions or assertions.
        """
-        return SkyvernLocator(self._page.locator(selector, **kwargs))
+        return SkyvernLocator(self.page.locator(selector, **kwargs))

    def get_by_label(self, text: str | Pattern[str], **kwargs: Any) -> SkyvernLocator:
        """Find an input element by its associated label text.
@@ -647,7 +291,7 @@ class SkyvernBrowserPage:
        Returns:
            SkyvernLocator object for the labeled input element.
        """
-        return SkyvernLocator(self._page.get_by_label(text, **kwargs))
+        return SkyvernLocator(self.page.get_by_label(text, **kwargs))

    def get_by_text(self, text: str | Pattern[str], **kwargs: Any) -> SkyvernLocator:
        """Find an element containing the specified text.
@@ -659,7 +303,7 @@ class SkyvernBrowserPage:
        Returns:
            SkyvernLocator object for the element containing the text.
        """
-        return SkyvernLocator(self._page.get_by_text(text, **kwargs))
+        return SkyvernLocator(self.page.get_by_text(text, **kwargs))

    def get_by_title(self, text: str | Pattern[str], **kwargs: Any) -> SkyvernLocator:
        """Find an element by its title attribute.
@@ -671,7 +315,7 @@ class SkyvernBrowserPage:
        Returns:
            SkyvernLocator object for the element with matching title.
        """
-        return SkyvernLocator(self._page.get_by_title(text, **kwargs))
+        return SkyvernLocator(self.page.get_by_title(text, **kwargs))

    def get_by_role(self, role: str, **kwargs: Any) -> SkyvernLocator:
        """Find an element by its ARIA role.
@@ -683,7 +327,7 @@ class SkyvernBrowserPage:
        Returns:
            SkyvernLocator object for the element with matching role.
        """
-        return SkyvernLocator(self._page.get_by_role(role, **kwargs))
+        return SkyvernLocator(self.page.get_by_role(role, **kwargs))

    def get_by_placeholder(self, text: str | Pattern[str], **kwargs: Any) -> SkyvernLocator:
        """Find an input element by its placeholder text.
@@ -695,7 +339,7 @@ class SkyvernBrowserPage:
        Returns:
            SkyvernLocator object for the input element with matching placeholder.
        """
-        return SkyvernLocator(self._page.get_by_placeholder(text, **kwargs))
+        return SkyvernLocator(self.page.get_by_placeholder(text, **kwargs))

    def get_by_alt_text(self, text: str | Pattern[str], **kwargs: Any) -> SkyvernLocator:
        """Find an element by its alt text (typically images).
@@ -707,7 +351,7 @@ class SkyvernBrowserPage:
        Returns:
            SkyvernLocator object for the element with matching alt text.
        """
-        return SkyvernLocator(self._page.get_by_alt_text(text, **kwargs))
+        return SkyvernLocator(self.page.get_by_alt_text(text, **kwargs))

    def get_by_test_id(self, test_id: str) -> SkyvernLocator:
        """Find an element by its test ID attribute.
@@ -718,70 +362,4 @@ class SkyvernBrowserPage:
        Returns:
            SkyvernLocator object for the element with matching test ID.
        """
-        return SkyvernLocator(self._page.get_by_test_id(test_id))
-
-    async def _input_text(
-        self,
-        selector: str | None,
-        value: str,
-        ai: str | None = "fallback",
-        intention: str | None = None,
-        data: str | dict[str, Any] | None = None,
-        totp_identifier: str | None = None,
-        totp_url: str | None = None,
-        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
-    ) -> str:
-        """Input text into an element identified by ``selector``.
-
-        When ``intention`` and ``data`` are provided a new input text action is
-        generated via the `script-generation-input-text-generation` prompt.  The model returns a
-        fresh text based on the current DOM and the updated data for this run.
-        The browser then inputs the text using this newly generated text.
-
-        If the prompt generation or parsing fails for any reason we fall back to
-        inputting the originally supplied ``value``.
-        """
-
-        # format the text with the actual value of the parameter if it's a secret when running a workflow
-        if ai == "fallback":
-            error_to_raise = None
-            if selector:
-                try:
-                    locator = self._page.locator(selector)
-                    await handler_utils.input_sequentially(locator, value, timeout=timeout)
-                    return value
-                except Exception as e:
-                    error_to_raise = e
-                    selector = None
-
-            if intention:
-                return await self._ai.ai_input_text(
-                    selector=selector,
-                    value=value,
-                    intention=intention,
-                    data=data,
-                    totp_identifier=totp_identifier,
-                    totp_url=totp_url,
-                    timeout=timeout,
-                )
-            if error_to_raise:
-                raise error_to_raise
-            else:
-                return value
-        elif ai == "proactive" and intention:
-            return await self._ai.ai_input_text(
-                selector=selector,
-                value=value,
-                intention=intention,
-                data=data,
-                totp_identifier=totp_identifier,
-                totp_url=totp_url,
-                timeout=timeout,
-            )
-
-        if not selector:
-            raise ValueError("Selector is required but was not provided")
-
-        locator = self._page.locator(selector)
-        await handler_utils.input_sequentially(locator, value, timeout=timeout)
-        return value
+        return SkyvernLocator(self.page.get_by_test_id(test_id))
--- a/skyvern/library/skyvern_browser_page_ai.py
+++ b/skyvern/library/skyvern_browser_page_ai.py
@@ -29,11 +29,11 @@ class SdkSkyvernPageAi(SkyvernPageAi):

    async def ai_click(
        self,
-        selector: str,
+        selector: str | None,
        intention: str,
        data: str | dict[str, Any] | None = None,
        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
-    ) -> str:
+    ) -> str | None:
        """Click an element using AI via API call."""

        await self._browser.sdk.ensure_has_server()
@@ -55,7 +55,7 @@ class SdkSkyvernPageAi(SkyvernPageAi):
    async def ai_input_text(
        self,
        selector: str | None,
-        value: str,
+        value: str | None,
        intention: str,
        data: str | dict[str, Any] | None = None,
        totp_identifier: str | None = None,
@@ -81,12 +81,12 @@ class SdkSkyvernPageAi(SkyvernPageAi):
            workflow_run_id=self._browser.workflow_run_id,
        )
        self._browser.workflow_run_id = response.workflow_run_id
-        return response.result if response.result else value
+        return response.result if response.result else value or ""

    async def ai_select_option(
        self,
-        selector: str,
-        value: str,
+        selector: str | None,
+        value: str | None,
        intention: str,
        data: str | dict[str, Any] | None = None,
        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
@@ -108,7 +108,7 @@ class SdkSkyvernPageAi(SkyvernPageAi):
            workflow_run_id=self._browser.workflow_run_id,
        )
        self._browser.workflow_run_id = response.workflow_run_id
-        return response.result if response.result else value
+        return response.result if response.result else value or ""

    async def ai_upload_file(
        self,