SkyvernBrowserPage extends SkyvernPage (#3921)

This commit is contained in:
Stanislav Novosad
2025-11-07 10:58:22 -07:00
committed by GitHub
parent 926a5da13e
commit a9c3d692ff
6 changed files with 304 additions and 492 deletions

View File

@@ -134,11 +134,11 @@ class RealSkyvernPageAi(SkyvernPageAi):
async def ai_click(
self,
selector: str,
selector: str | None,
intention: str,
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str:
) -> str | None:
"""Click an element using AI to locate it based on intention."""
try:
# Build the element tree of the current page for the prompt
@@ -193,7 +193,7 @@ class RealSkyvernPageAi(SkyvernPageAi):
async def ai_input_text(
self,
selector: str | None,
value: str,
value: str | None,
intention: str,
data: str | dict[str, Any] | None = None,
totp_identifier: str | None = None,
@@ -419,8 +419,8 @@ class RealSkyvernPageAi(SkyvernPageAi):
async def ai_select_option(
self,
selector: str,
value: str,
selector: str | None,
value: str | None,
intention: str,
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,

View File

@@ -4,7 +4,7 @@ import asyncio
import copy
from dataclasses import dataclass
from enum import StrEnum
from typing import Any, Callable, Literal
from typing import Any, Callable, Literal, overload
import structlog
from playwright.async_api import Page
@@ -95,42 +95,94 @@ class SkyvernPage:
await self.page.goto(url, timeout=timeout)
######### Public Interfaces #########
@action_wrap(ActionType.CLICK)
@overload
async def click(
self,
selector: str,
*,
prompt: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
intention: str | None = None, # backward compatibility
) -> str:
"""Click an element identified by ``selector``.
**kwargs: Any,
) -> str | None: ...
When ``prompt`` and ``data`` are provided a new click action is
generated via the ``single-click-action`` prompt. The model returns a
fresh "xpath=..." selector based on the current DOM and the updated data for this run.
The browser then clicks the element using this newly generated xpath selector.
@overload
async def click(
self,
*,
prompt: str,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
intention: str | None = None, # backward compatibility
**kwargs: Any,
) -> str | None: ...
If the prompt generation or parsing fails for any reason we fall back to
clicking the originally supplied ``selector``.
@action_wrap(ActionType.CLICK)
async def click(
self,
selector: str | None = None,
*,
prompt: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
intention: str | None = None, # backward compatibility
**kwargs: Any,
) -> str | None:
"""Click an element using a CSS selector, AI-powered prompt matching, or both.
This method supports three modes:
- **Selector-based**: Click the element matching the CSS selector
- **AI-powered**: Use natural language to describe which element to click
- **Fallback mode** (default): Try the selector first, fall back to AI if it fails
Args:
selector: CSS selector for the target element.
prompt: Natural language description of which element to click.
ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
data: Additional context data for AI processing.
timeout: Maximum time to wait for the click action in milliseconds.
Returns:
The selector string that was successfully used to click the element, or None.
Examples:
```python
# Click using a CSS selector
await page.click("#open-invoice-button")
# Click using AI with natural language
await page.click(prompt="Click on the 'Open Invoice' button")
# Try selector first, fall back to AI if selector fails
await page.click("#open-invoice-button", prompt="Click on the 'Open Invoice' button")
```
"""
# Backward compatibility
if intention is not None and prompt is None:
prompt = intention
if not selector and not prompt:
raise ValueError("Missing input: pass a selector and/or a prompt.")
context = skyvern_context.current()
if context and context.ai_mode_override:
ai = context.ai_mode_override
if ai == "fallback":
# try to click the element with the original selector first
error_to_raise = None
try:
locator = self.page.locator(selector)
await locator.click(timeout=timeout)
return selector
except Exception as e:
error_to_raise = e
if selector:
try:
locator = self.page.locator(selector)
await locator.click(timeout=timeout, **kwargs)
return selector
except Exception as e:
error_to_raise = e
selector = None
# if the original selector doesn't work, try to click the element with the ai generated selector
if prompt:
@@ -152,30 +204,104 @@ class SkyvernPage:
data=data,
timeout=timeout,
)
locator = self.page.locator(selector)
await locator.click(timeout=timeout)
if selector:
locator = self.page.locator(selector, **kwargs)
await locator.click(timeout=timeout)
return selector
@overload
async def fill(
self,
selector: str,
value: str,
*,
prompt: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
totp_identifier: str | None = None,
totp_url: str | None = None,
intention: str | None = None, # backward compatibility
) -> str: ...
@overload
async def fill(
self,
*,
prompt: str,
value: str | None = None,
selector: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
totp_identifier: str | None = None,
totp_url: str | None = None,
intention: str | None = None, # backward compatibility
) -> str: ...
@action_wrap(ActionType.INPUT_TEXT)
async def fill(
self,
selector: str | None,
value: str,
ai: str | None = "fallback",
selector: str | None = None,
value: str | None = None,
*,
prompt: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
totp_identifier: str | None = None,
totp_url: str | None = None,
intention: str | None = None, # backward compatibility
) -> str:
"""Fill an input field using a CSS selector, AI-powered prompt matching, or both.
This method supports three modes:
- **Selector-based**: Fill the input field with a value using CSS selector
- **AI-powered**: Use natural language prompt (AI extracts value from prompt)
- **Fallback mode** (default): Try the selector first, fall back to AI if it fails
Args:
selector: CSS selector for the target input element.
value: The text value to input into the field.
prompt: Natural language description of which field to fill and what value.
ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
data: Additional context data for AI processing.
timeout: Maximum time to wait for the fill action in milliseconds.
totp_identifier: TOTP identifier for time-based one-time password fields.
totp_url: URL to fetch TOTP codes from for authentication.
Returns:
The value that was successfully filled into the field.
Examples:
```python
# Fill using selector and value (both positional)
await page.fill("#email-input", "user@example.com")
# Fill using AI with natural language (prompt only)
await page.fill(prompt="Fill 'user@example.com' in the email address field")
# Try selector first, fall back to AI if selector fails
await page.fill(
"#email-input",
"user@example.com",
prompt="Fill the email address with user@example.com"
)
```
"""
# Backward compatibility
if intention is not None and prompt is None:
prompt = intention
if not selector and not prompt:
raise ValueError("Missing input: pass a selector and/or a prompt.")
return await self._input_text(
selector=selector,
value=value,
value=value or "",
ai=ai,
intention=prompt,
data=data,
@@ -201,6 +327,9 @@ class SkyvernPage:
if intention is not None and prompt is None:
prompt = intention
if not selector and not prompt:
raise ValueError("Missing input: pass a selector and/or a prompt.")
return await self._input_text(
selector=selector,
value=value,
@@ -225,7 +354,7 @@ class SkyvernPage:
) -> str:
"""Input text into an element identified by ``selector``.
When ``prompt`` and ``data`` are provided a new input text action is
When ``intention`` and ``data`` are provided a new input text action is
generated via the `script-generation-input-text-generation` prompt. The model returns a
fresh text based on the current DOM and the updated data for this run.
The browser then inputs the text using this newly generated text.
@@ -248,6 +377,7 @@ class SkyvernPage:
return value
except Exception as e:
error_to_raise = e
selector = None
if intention:
return await self._ai.ai_input_text(
@@ -296,6 +426,9 @@ class SkyvernPage:
if intention is not None and prompt is None:
prompt = intention
if not selector and not prompt:
raise ValueError("Missing input: pass a selector and/or a prompt.")
context = skyvern_context.current()
if context and context.ai_mode_override:
ai = context.ai_mode_override
@@ -308,6 +441,7 @@ class SkyvernPage:
await locator.set_input_files(file_path)
except Exception as e:
error_to_raise = e
selector = None
if prompt:
return await self._ai.ai_upload_file(
@@ -338,34 +472,104 @@ class SkyvernPage:
await locator.set_input_files(file_path, timeout=timeout)
return files
@action_wrap(ActionType.SELECT_OPTION)
@overload
async def select_option(
self,
selector: str,
value: str | None = None,
label: str | None = None,
ai: str | None = "fallback",
*,
prompt: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
intention: str | None = None, # backward compatibility
) -> str:
**kwargs: Any,
) -> str | None: ...
@overload
async def select_option(
self,
*,
prompt: str,
value: str | None = None,
selector: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
intention: str | None = None, # backward compatibility
**kwargs: Any,
) -> str | None: ...
@action_wrap(ActionType.SELECT_OPTION)
async def select_option(
self,
selector: str | None = None,
value: str | None = None,
*,
prompt: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
intention: str | None = None, # backward compatibility
**kwargs: Any,
) -> str | None:
"""Select an option from a dropdown using a CSS selector, AI-powered prompt matching, or both.
This method supports three modes:
- **Selector-based**: Select the option with a value using CSS selector
- **AI-powered**: Use natural language prompt (AI extracts value from prompt)
- **Fallback mode** (default): Try the selector first, fall back to AI if it fails
Args:
selector: CSS selector for the target select/dropdown element.
value: The option value to select.
prompt: Natural language description of which option to select.
ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
data: Additional context data for AI processing.
timeout: Maximum time to wait for the select action in milliseconds.
Returns:
The value that was successfully selected.
Examples:
```python
# Select using selector and value (both positional)
await page.select_option("#country", "us")
# Select using AI with natural language (prompt only)
await page.select_option(prompt="Select 'United States' from the country dropdown")
# Try selector first, fall back to AI if selector fails
await page.select_option(
"#country",
"us",
prompt="Select United States from country"
)
```
"""
# Backward compatibility
if intention is not None and prompt is None:
prompt = intention
if not selector and not prompt:
raise ValueError("Missing input: pass a selector and/or a prompt.")
context = skyvern_context.current()
if context and context.ai_mode_override:
ai = context.ai_mode_override
value = value or ""
if ai == "fallback":
error_to_raise = None
try:
locator = self.page.locator(selector)
await locator.select_option(value, timeout=timeout)
return value
except Exception as e:
error_to_raise = e
if selector:
try:
locator = self.page.locator(selector)
await locator.select_option(value, timeout=timeout, **kwargs)
return value
except Exception as e:
error_to_raise = e
selector = None
if prompt:
return await self._ai.ai_select_option(
selector=selector,
@@ -386,8 +590,9 @@ class SkyvernPage:
data=data,
timeout=timeout,
)
locator = self.page.locator(selector)
await locator.select_option(value, timeout=timeout)
if selector:
locator = self.page.locator(selector)
await locator.select_option(value, timeout=timeout, **kwargs)
return value
@action_wrap(ActionType.WAIT)
@@ -445,6 +650,35 @@ class SkyvernPage:
intention: str | None = None,
data: str | dict[str, Any] | None = None,
) -> dict[str, Any] | list | str | None:
"""Extract structured data from the page using AI.
Args:
prompt: Natural language description of what data to extract.
schema: JSON Schema defining the structure of data to extract.
error_code_mapping: Mapping of error codes to custom error messages.
intention: Additional context about the extraction intent.
data: Additional context data for AI processing.
Returns:
Extracted data matching the provided schema, or None if extraction fails.
Examples:
```python
# Extract structured data with JSON Schema
result = await page.extract(
prompt="Extract product information",
schema={
"type": "object",
"properties": {
"name": {"type": "string", "description": "Product name"},
"price": {"type": "number", "description": "Product price"}
},
"required": ["name", "price"]
}
)
# Returns: {"name": "...", "price": 29.99}
```
"""
return await self._ai.ai_extract(prompt, schema, error_code_mapping, intention, data)
@action_wrap(ActionType.VERIFICATION_CODE)

View File

@@ -10,18 +10,18 @@ class SkyvernPageAi(Protocol):
async def ai_click(
self,
selector: str,
selector: str | None,
intention: str,
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str:
) -> str | None:
"""Click an element using AI to locate it based on intention."""
...
async def ai_input_text(
self,
selector: str | None,
value: str,
value: str | None,
intention: str,
data: str | dict[str, Any] | None = None,
totp_identifier: str | None = None,
@@ -44,8 +44,8 @@ class SkyvernPageAi(Protocol):
async def ai_select_option(
self,
selector: str,
value: str,
selector: str | None,
value: str | None,
intention: str,
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,

View File

@@ -28,7 +28,7 @@ class ClickAction(SdkActionBase):
"""Click action parameters."""
type: Literal["ai_click"] = "ai_click"
selector: str = Field(default="", description="CSS selector for the element")
selector: str | None = Field(default="", description="CSS selector for the element")
intention: str = Field(default="", description="The intention or goal of the click")
data: str | dict[str, Any] | None = Field(None, description="Additional context data")
timeout: float = Field(default=settings.BROWSER_ACTION_TIMEOUT_MS, description="Timeout in milliseconds")
@@ -39,7 +39,7 @@ class InputTextAction(SdkActionBase):
type: Literal["ai_input_text"] = "ai_input_text"
selector: str | None = Field(default="", description="CSS selector for the element")
value: str = Field(default="", description="Value to input")
value: str | None = Field(default="", description="Value to input")
intention: str = Field(default="", description="The intention or goal of the input")
data: str | dict[str, Any] | None = Field(None, description="Additional context data")
totp_identifier: str | None = Field(None, description="TOTP identifier for input_text actions")
@@ -51,8 +51,8 @@ class SelectOptionAction(SdkActionBase):
"""Select option action parameters."""
type: Literal["ai_select_option"] = "ai_select_option"
selector: str = Field(default="", description="CSS selector for the element")
value: str = Field(default="", description="Value to select")
selector: str | None = Field(default="", description="CSS selector for the element")
value: str | None = Field(default="", description="Value to select")
intention: str = Field(default="", description="The intention or goal of the selection")
data: str | dict[str, Any] | None = Field(None, description="Additional context data")
timeout: float = Field(default=settings.BROWSER_ACTION_TIMEOUT_MS, description="Timeout in milliseconds")

View File

@@ -1,15 +1,14 @@
import asyncio
from typing import TYPE_CHECKING, Any, Pattern, overload
from typing import TYPE_CHECKING, Any, Pattern
from playwright.async_api import Page
from skyvern.client import GetRunResponse
from skyvern.client.types.workflow_run_response import WorkflowRunResponse
from skyvern.config import settings
from skyvern.core.script_generations.skyvern_page import SkyvernPage
from skyvern.library.constants import DEFAULT_AGENT_HEARTBEAT_INTERVAL, DEFAULT_AGENT_TIMEOUT
from skyvern.library.skyvern_browser_page_ai import SdkSkyvernPageAi
from skyvern.library.skyvern_locator import SkyvernLocator
from skyvern.webeye.actions import handler_utils
if TYPE_CHECKING:
from skyvern.library.skyvern_browser import SkyvernBrowser
@@ -206,7 +205,7 @@ class SkyvernPageRun:
return url
class SkyvernBrowserPage:
class SkyvernBrowserPage(SkyvernPage):
"""A browser page wrapper that combines Playwright's page API with Skyvern's AI capabilities.
This class provides a unified interface for both traditional browser automation (via Playwright)
@@ -230,365 +229,10 @@ class SkyvernBrowserPage:
"""
def __init__(self, browser: "SkyvernBrowser", page: Page):
super().__init__(page, SdkSkyvernPageAi(browser, page))
self._browser = browser
self._page = page
self._ai = SdkSkyvernPageAi(browser, page)
self.run = SkyvernPageRun(browser, page)
@overload
async def click(
self,
selector: str,
*,
prompt: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
**kwargs: Any,
) -> str | None: ...
@overload
async def click(
self,
*,
prompt: str,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
**kwargs: Any,
) -> str | None: ...
async def click(
self,
selector: str | None = None,
*,
prompt: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
**kwargs: Any,
) -> str | None:
"""Click an element using a CSS selector, AI-powered prompt matching, or both.
This method supports three modes:
- **Selector-based**: Click the element matching the CSS selector
- **AI-powered**: Use natural language to describe which element to click
- **Fallback mode** (default): Try the selector first, fall back to AI if it fails
Args:
selector: CSS selector for the target element.
prompt: Natural language description of which element to click.
ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
data: Additional context data for AI processing.
timeout: Maximum time to wait for the click action in milliseconds.
Returns:
The selector string that was successfully used to click the element, or None.
Examples:
```python
# Click using a CSS selector
await page.click("#open-invoice-button")
# Click using AI with natural language
await page.click(prompt="Click on the 'Open Invoice' button")
# Try selector first, fall back to AI if selector fails
await page.click("#open-invoice-button", prompt="Click on the 'Open Invoice' button")
```
"""
if ai == "fallback":
# try to click the element with the original selector first
error_to_raise = None
if selector:
try:
locator = self._page.locator(selector)
await locator.click(timeout=timeout, **kwargs)
return selector
except Exception as e:
error_to_raise = e
selector = None
# if the original selector doesn't work, try to click the element with the ai generated selector
if prompt:
return await self._ai.ai_click(
selector=selector or "",
intention=prompt,
data=data,
timeout=timeout,
)
if error_to_raise:
raise error_to_raise
else:
return selector
elif ai == "proactive":
if prompt:
return await self._ai.ai_click(
selector=selector or "",
intention=prompt,
data=data,
timeout=timeout,
)
if selector:
locator = self._page.locator(selector, **kwargs)
await locator.click(timeout=timeout)
return selector
@overload
async def fill(
self,
selector: str,
value: str,
*,
prompt: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
totp_identifier: str | None = None,
totp_url: str | None = None,
) -> str: ...
@overload
async def fill(
self,
*,
prompt: str,
value: str | None = None,
selector: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
totp_identifier: str | None = None,
totp_url: str | None = None,
) -> str: ...
async def fill(
self,
selector: str | None = None,
value: str | None = None,
*,
prompt: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
totp_identifier: str | None = None,
totp_url: str | None = None,
) -> str:
"""Fill an input field using a CSS selector, AI-powered prompt matching, or both.
This method supports three modes:
- **Selector-based**: Fill the input field with a value using CSS selector
- **AI-powered**: Use natural language prompt (AI extracts value from prompt)
- **Fallback mode** (default): Try the selector first, fall back to AI if it fails
Args:
selector: CSS selector for the target input element.
value: The text value to input into the field.
prompt: Natural language description of which field to fill and what value.
ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
data: Additional context data for AI processing.
timeout: Maximum time to wait for the fill action in milliseconds.
totp_identifier: TOTP identifier for time-based one-time password fields.
totp_url: URL to fetch TOTP codes from for authentication.
Returns:
The value that was successfully filled into the field.
Examples:
```python
# Fill using selector and value (both positional)
await page.fill("#email-input", "user@example.com")
# Fill using AI with natural language (prompt only)
await page.fill(prompt="Fill 'user@example.com' in the email address field")
# Try selector first, fall back to AI if selector fails
await page.fill(
"#email-input",
"user@example.com",
prompt="Fill the email address with user@example.com"
)
```
"""
return await self._input_text(
selector=selector,
value=value or "",
ai=ai,
intention=prompt,
data=data,
timeout=timeout,
totp_identifier=totp_identifier,
totp_url=totp_url,
)
async def goto(self, url: str, **kwargs: Any) -> None:
"""Navigate to the given URL.
Args:
url: URL to navigate page to.
**kwargs: Additional options like timeout, wait_until, referer, etc.
"""
await self._page.goto(url, **kwargs)
async def type(self, selector: str, text: str, **kwargs: Any) -> None:
"""Type text into an element character by character.
Args:
selector: A selector to search for an element to type into.
text: Text to type into the element.
**kwargs: Additional options like delay, timeout, no_wait_after, etc.
"""
await self._page.type(selector, text, **kwargs)
@overload
async def select_option(
self,
selector: str,
value: str | None = None,
*,
prompt: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
**kwargs: Any,
) -> str: ...
@overload
async def select_option(
self,
*,
prompt: str,
value: str | None = None,
selector: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
**kwargs: Any,
) -> str: ...
async def select_option(
self,
selector: str | None = None,
value: str | None = None,
*,
prompt: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
**kwargs: Any,
) -> str:
"""Select an option from a dropdown using a CSS selector, AI-powered prompt matching, or both.
This method supports three modes:
- **Selector-based**: Select the option with a value using CSS selector
- **AI-powered**: Use natural language prompt (AI extracts value from prompt)
- **Fallback mode** (default): Try the selector first, fall back to AI if it fails
Args:
selector: CSS selector for the target select/dropdown element.
value: The option value to select.
prompt: Natural language description of which option to select.
ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
data: Additional context data for AI processing.
timeout: Maximum time to wait for the select action in milliseconds.
Returns:
The value that was successfully selected.
Examples:
```python
# Select using selector and value (both positional)
await page.select_option("#country", "us")
# Select using AI with natural language (prompt only)
await page.select_option(prompt="Select 'United States' from the country dropdown")
# Try selector first, fall back to AI if selector fails
await page.select_option(
"#country",
"us",
prompt="Select United States from country"
)
```
"""
value = value or ""
if ai == "fallback":
error_to_raise = None
if selector:
try:
locator = self._page.locator(selector)
await locator.select_option(value, timeout=timeout, **kwargs)
return value
except Exception as e:
error_to_raise = e
selector = None
if prompt:
return await self._ai.ai_select_option(
selector=selector or "",
value=value,
intention=prompt,
data=data,
timeout=timeout,
)
if error_to_raise:
raise error_to_raise
else:
return value
elif ai == "proactive" and prompt:
return await self._ai.ai_select_option(
selector=selector or "",
value=value,
intention=prompt,
data=data,
timeout=timeout,
)
if selector:
locator = self._page.locator(selector)
await locator.select_option(value, timeout=timeout, **kwargs)
return value
async def extract(
self,
prompt: str,
schema: dict[str, Any] | list | str | None = None,
error_code_mapping: dict[str, str] | None = None,
intention: str | None = None,
data: str | dict[str, Any] | None = None,
) -> dict[str, Any] | list | str | None:
"""Extract structured data from the page using AI.
Args:
prompt: Natural language description of what data to extract.
schema: JSON Schema defining the structure of data to extract.
error_code_mapping: Mapping of error codes to custom error messages.
intention: Additional context about the extraction intent.
data: Additional context data for AI processing.
Returns:
Extracted data matching the provided schema, or None if extraction fails.
Examples:
```python
# Extract structured data with JSON Schema
result = await page.extract(
prompt="Extract product information",
schema={
"type": "object",
"properties": {
"name": {"type": "string", "description": "Product name"},
"price": {"type": "number", "description": "Product price"}
},
"required": ["name", "price"]
}
)
# Returns: {"name": "...", "price": 29.99}
```
"""
return await self._ai.ai_extract(prompt, schema, error_code_mapping, intention, data)
async def act(
self,
prompt: str,
@@ -612,7 +256,7 @@ class SkyvernBrowserPage:
Args:
**kwargs: Additional options like timeout, wait_until, etc.
"""
await self._page.reload(**kwargs)
await self.page.reload(**kwargs)
async def screenshot(self, **kwargs: Any) -> bytes:
"""Take a screenshot of the page.
@@ -623,7 +267,7 @@ class SkyvernBrowserPage:
Returns:
bytes: The screenshot as bytes (unless path is specified, then saves to file).
"""
return await self._page.screenshot(**kwargs)
return await self.page.screenshot(**kwargs)
def locator(self, selector: str, **kwargs: Any) -> SkyvernLocator:
"""Find an element using a CSS selector or other selector syntax.
@@ -635,7 +279,7 @@ class SkyvernBrowserPage:
Returns:
SkyvernLocator object that can be used to perform actions or assertions.
"""
return SkyvernLocator(self._page.locator(selector, **kwargs))
return SkyvernLocator(self.page.locator(selector, **kwargs))
def get_by_label(self, text: str | Pattern[str], **kwargs: Any) -> SkyvernLocator:
"""Find an input element by its associated label text.
@@ -647,7 +291,7 @@ class SkyvernBrowserPage:
Returns:
SkyvernLocator object for the labeled input element.
"""
return SkyvernLocator(self._page.get_by_label(text, **kwargs))
return SkyvernLocator(self.page.get_by_label(text, **kwargs))
def get_by_text(self, text: str | Pattern[str], **kwargs: Any) -> SkyvernLocator:
"""Find an element containing the specified text.
@@ -659,7 +303,7 @@ class SkyvernBrowserPage:
Returns:
SkyvernLocator object for the element containing the text.
"""
return SkyvernLocator(self._page.get_by_text(text, **kwargs))
return SkyvernLocator(self.page.get_by_text(text, **kwargs))
def get_by_title(self, text: str | Pattern[str], **kwargs: Any) -> SkyvernLocator:
"""Find an element by its title attribute.
@@ -671,7 +315,7 @@ class SkyvernBrowserPage:
Returns:
SkyvernLocator object for the element with matching title.
"""
return SkyvernLocator(self._page.get_by_title(text, **kwargs))
return SkyvernLocator(self.page.get_by_title(text, **kwargs))
def get_by_role(self, role: str, **kwargs: Any) -> SkyvernLocator:
"""Find an element by its ARIA role.
@@ -683,7 +327,7 @@ class SkyvernBrowserPage:
Returns:
SkyvernLocator object for the element with matching role.
"""
return SkyvernLocator(self._page.get_by_role(role, **kwargs))
return SkyvernLocator(self.page.get_by_role(role, **kwargs))
def get_by_placeholder(self, text: str | Pattern[str], **kwargs: Any) -> SkyvernLocator:
"""Find an input element by its placeholder text.
@@ -695,7 +339,7 @@ class SkyvernBrowserPage:
Returns:
SkyvernLocator object for the input element with matching placeholder.
"""
return SkyvernLocator(self._page.get_by_placeholder(text, **kwargs))
return SkyvernLocator(self.page.get_by_placeholder(text, **kwargs))
def get_by_alt_text(self, text: str | Pattern[str], **kwargs: Any) -> SkyvernLocator:
"""Find an element by its alt text (typically images).
@@ -707,7 +351,7 @@ class SkyvernBrowserPage:
Returns:
SkyvernLocator object for the element with matching alt text.
"""
return SkyvernLocator(self._page.get_by_alt_text(text, **kwargs))
return SkyvernLocator(self.page.get_by_alt_text(text, **kwargs))
def get_by_test_id(self, test_id: str) -> SkyvernLocator:
"""Find an element by its test ID attribute.
@@ -718,70 +362,4 @@ class SkyvernBrowserPage:
Returns:
SkyvernLocator object for the element with matching test ID.
"""
return SkyvernLocator(self._page.get_by_test_id(test_id))
async def _input_text(
self,
selector: str | None,
value: str,
ai: str | None = "fallback",
intention: str | None = None,
data: str | dict[str, Any] | None = None,
totp_identifier: str | None = None,
totp_url: str | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str:
"""Input text into an element identified by ``selector``.
When ``intention`` and ``data`` are provided a new input text action is
generated via the `script-generation-input-text-generation` prompt. The model returns a
fresh text based on the current DOM and the updated data for this run.
The browser then inputs the text using this newly generated text.
If the prompt generation or parsing fails for any reason we fall back to
inputting the originally supplied ``value``.
"""
# format the text with the actual value of the parameter if it's a secret when running a workflow
if ai == "fallback":
error_to_raise = None
if selector:
try:
locator = self._page.locator(selector)
await handler_utils.input_sequentially(locator, value, timeout=timeout)
return value
except Exception as e:
error_to_raise = e
selector = None
if intention:
return await self._ai.ai_input_text(
selector=selector,
value=value,
intention=intention,
data=data,
totp_identifier=totp_identifier,
totp_url=totp_url,
timeout=timeout,
)
if error_to_raise:
raise error_to_raise
else:
return value
elif ai == "proactive" and intention:
return await self._ai.ai_input_text(
selector=selector,
value=value,
intention=intention,
data=data,
totp_identifier=totp_identifier,
totp_url=totp_url,
timeout=timeout,
)
if not selector:
raise ValueError("Selector is required but was not provided")
locator = self._page.locator(selector)
await handler_utils.input_sequentially(locator, value, timeout=timeout)
return value
return SkyvernLocator(self.page.get_by_test_id(test_id))

View File

@@ -29,11 +29,11 @@ class SdkSkyvernPageAi(SkyvernPageAi):
async def ai_click(
self,
selector: str,
selector: str | None,
intention: str,
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str:
) -> str | None:
"""Click an element using AI via API call."""
await self._browser.sdk.ensure_has_server()
@@ -55,7 +55,7 @@ class SdkSkyvernPageAi(SkyvernPageAi):
async def ai_input_text(
self,
selector: str | None,
value: str,
value: str | None,
intention: str,
data: str | dict[str, Any] | None = None,
totp_identifier: str | None = None,
@@ -81,12 +81,12 @@ class SdkSkyvernPageAi(SkyvernPageAi):
workflow_run_id=self._browser.workflow_run_id,
)
self._browser.workflow_run_id = response.workflow_run_id
return response.result if response.result else value
return response.result if response.result else value or ""
async def ai_select_option(
self,
selector: str,
value: str,
selector: str | None,
value: str | None,
intention: str,
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
@@ -108,7 +108,7 @@ class SdkSkyvernPageAi(SkyvernPageAi):
workflow_run_id=self._browser.workflow_run_id,
)
self._browser.workflow_run_id = response.workflow_run_id
return response.result if response.result else value
return response.result if response.result else value or ""
async def ai_upload_file(
self,