SDK: support select_option and extract (#3850)
This commit is contained in:
committed by
GitHub
parent
ac069838c7
commit
af9a5f31e4
@@ -2,14 +2,9 @@ from typing import TYPE_CHECKING, Any
|
||||
|
||||
from playwright.async_api import Page
|
||||
|
||||
from skyvern.client import SdkAction_AiClick, SdkAction_AiInputText, SdkAction_AiSelectOption, SdkAction_Extract
|
||||
from skyvern.config import settings
|
||||
from skyvern.core.script_generations.skyvern_page_ai import SkyvernPageAi
|
||||
from skyvern.forge.sdk.schemas.sdk_actions import (
|
||||
ClickAction,
|
||||
ExtractAction,
|
||||
InputTextAction,
|
||||
SelectOptionAction,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from skyvern.library.skyvern_browser import SkyvernBrowser
|
||||
@@ -35,18 +30,17 @@ class SdkSkyvernPageAi(SkyvernPageAi):
|
||||
) -> str:
|
||||
"""Click an element using AI via API call."""
|
||||
|
||||
action = ClickAction(
|
||||
selector=selector,
|
||||
intention=intention,
|
||||
data=data,
|
||||
timeout=timeout,
|
||||
)
|
||||
response = await self._browser.client.run_sdk_action(
|
||||
url=self._page.url,
|
||||
browser_session_id=self._browser.browser_session_id,
|
||||
browser_address=self._browser.browser_address,
|
||||
workflow_run_id=self._browser.workflow_run_id,
|
||||
action=action,
|
||||
action=SdkAction_AiClick(
|
||||
selector=selector,
|
||||
intention=intention,
|
||||
data=data,
|
||||
timeout=timeout,
|
||||
),
|
||||
)
|
||||
self._browser.workflow_run_id = response.workflow_run_id
|
||||
return response.result if response.result else selector
|
||||
@@ -65,7 +59,7 @@ class SdkSkyvernPageAi(SkyvernPageAi):
|
||||
|
||||
response = await self._browser.client.run_sdk_action(
|
||||
url=self._page.url,
|
||||
action=InputTextAction(
|
||||
action=SdkAction_AiInputText(
|
||||
selector=selector,
|
||||
value=value,
|
||||
intention=intention,
|
||||
@@ -93,7 +87,7 @@ class SdkSkyvernPageAi(SkyvernPageAi):
|
||||
|
||||
response = await self._browser.client.run_sdk_action(
|
||||
url=self._page.url,
|
||||
action=SelectOptionAction(
|
||||
action=SdkAction_AiSelectOption(
|
||||
selector=selector,
|
||||
value=value,
|
||||
intention=intention,
|
||||
@@ -129,7 +123,7 @@ class SdkSkyvernPageAi(SkyvernPageAi):
|
||||
|
||||
response = await self._browser.client.run_sdk_action(
|
||||
url=self._page.url,
|
||||
action=ExtractAction(
|
||||
action=SdkAction_Extract(
|
||||
prompt=prompt,
|
||||
extract_schema=schema,
|
||||
error_code_mapping=error_code_mapping,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import asyncio
|
||||
from typing import TYPE_CHECKING, Any
|
||||
from typing import TYPE_CHECKING, Any, overload
|
||||
|
||||
from playwright.async_api import Page
|
||||
|
||||
@@ -230,24 +230,64 @@ class SkyvernBrowserPage:
|
||||
self._ai = SdkSkyvernPageAi(browser, page)
|
||||
self.run = SkyvernPageRun(browser, page)
|
||||
|
||||
@overload
|
||||
async def click(
|
||||
self,
|
||||
selector: str,
|
||||
*,
|
||||
prompt: str | None = None,
|
||||
ai: str | None = "fallback",
|
||||
data: str | dict[str, Any] | None = None,
|
||||
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||
) -> str | None: ...
|
||||
|
||||
@overload
|
||||
async def click(
|
||||
self,
|
||||
*,
|
||||
prompt: str,
|
||||
ai: str | None = "fallback",
|
||||
data: str | dict[str, Any] | None = None,
|
||||
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||
) -> str | None: ...
|
||||
|
||||
async def click(
|
||||
self,
|
||||
selector: str | None = None,
|
||||
intention: str | None = None,
|
||||
*,
|
||||
prompt: str | None = None,
|
||||
ai: str | None = "fallback",
|
||||
data: str | dict[str, Any] | None = None,
|
||||
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||
) -> str | None:
|
||||
"""Click an element identified by ``selector``.
|
||||
"""Click an element using a CSS selector, AI-powered prompt matching, or both.
|
||||
|
||||
When ``intention`` and ``data`` are provided a new click action is
|
||||
generated via the ``single-click-action`` prompt. The model returns a
|
||||
fresh "xpath=..." selector based on the current DOM and the updated data for this run.
|
||||
The browser then clicks the element using this newly generated xpath selector.
|
||||
This method supports three modes:
|
||||
- **Selector-based**: Click the element matching the CSS selector
|
||||
- **AI-powered**: Use natural language to describe which element to click
|
||||
- **Fallback mode** (default): Try the selector first, fall back to AI if it fails
|
||||
|
||||
If the prompt generation or parsing fails for any reason we fall back to
|
||||
clicking the originally supplied ``selector``.
|
||||
Args:
|
||||
selector: CSS selector for the target element.
|
||||
prompt: Natural language description of which element to click.
|
||||
ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
|
||||
data: Additional context data for AI processing.
|
||||
timeout: Maximum time to wait for the click action in milliseconds.
|
||||
|
||||
Returns:
|
||||
The selector string that was successfully used to click the element, or None.
|
||||
|
||||
Examples:
|
||||
```python
|
||||
# Click using a CSS selector
|
||||
await page.click("#open-invoice-button")
|
||||
|
||||
# Click using AI with natural language
|
||||
await page.click(prompt="Click on the 'Open Invoice' button")
|
||||
|
||||
# Try selector first, fall back to AI if selector fails
|
||||
await page.click("#open-invoice-button", prompt="Click on the 'Open Invoice' button")
|
||||
```
|
||||
"""
|
||||
|
||||
if ai == "fallback":
|
||||
@@ -262,10 +302,10 @@ class SkyvernBrowserPage:
|
||||
error_to_raise = e
|
||||
|
||||
# if the original selector doesn't work, try to click the element with the ai generated selector
|
||||
if intention:
|
||||
if prompt:
|
||||
return await self._ai.ai_click(
|
||||
selector=selector or "",
|
||||
intention=intention,
|
||||
intention=prompt,
|
||||
data=data,
|
||||
timeout=timeout,
|
||||
)
|
||||
@@ -274,10 +314,10 @@ class SkyvernBrowserPage:
|
||||
else:
|
||||
return selector
|
||||
elif ai == "proactive":
|
||||
if intention:
|
||||
if prompt:
|
||||
return await self._ai.ai_click(
|
||||
selector=selector or "",
|
||||
intention=intention,
|
||||
intention=prompt,
|
||||
data=data,
|
||||
timeout=timeout,
|
||||
)
|
||||
@@ -287,6 +327,244 @@ class SkyvernBrowserPage:
|
||||
await locator.click(timeout=timeout)
|
||||
return selector
|
||||
|
||||
@overload
|
||||
async def fill(
|
||||
self,
|
||||
selector: str,
|
||||
value: str,
|
||||
*,
|
||||
prompt: str | None = None,
|
||||
ai: str | None = "fallback",
|
||||
data: str | dict[str, Any] | None = None,
|
||||
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||
totp_identifier: str | None = None,
|
||||
totp_url: str | None = None,
|
||||
) -> str: ...
|
||||
|
||||
@overload
|
||||
async def fill(
|
||||
self,
|
||||
*,
|
||||
prompt: str,
|
||||
value: str | None = None,
|
||||
selector: str | None = None,
|
||||
ai: str | None = "fallback",
|
||||
data: str | dict[str, Any] | None = None,
|
||||
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||
totp_identifier: str | None = None,
|
||||
totp_url: str | None = None,
|
||||
) -> str: ...
|
||||
|
||||
async def fill(
|
||||
self,
|
||||
selector: str | None = None,
|
||||
value: str | None = None,
|
||||
*,
|
||||
prompt: str | None = None,
|
||||
ai: str | None = "fallback",
|
||||
data: str | dict[str, Any] | None = None,
|
||||
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||
totp_identifier: str | None = None,
|
||||
totp_url: str | None = None,
|
||||
) -> str:
|
||||
"""Fill an input field using a CSS selector, AI-powered prompt matching, or both.
|
||||
|
||||
This method supports three modes:
|
||||
- **Selector-based**: Fill the input field with a value using CSS selector
|
||||
- **AI-powered**: Use natural language prompt (AI extracts value from prompt)
|
||||
- **Fallback mode** (default): Try the selector first, fall back to AI if it fails
|
||||
|
||||
Args:
|
||||
selector: CSS selector for the target input element.
|
||||
value: The text value to input into the field.
|
||||
prompt: Natural language description of which field to fill and what value.
|
||||
ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
|
||||
data: Additional context data for AI processing.
|
||||
timeout: Maximum time to wait for the fill action in milliseconds.
|
||||
totp_identifier: TOTP identifier for time-based one-time password fields.
|
||||
totp_url: URL to fetch TOTP codes from for authentication.
|
||||
|
||||
Returns:
|
||||
The value that was successfully filled into the field.
|
||||
|
||||
Examples:
|
||||
```python
|
||||
# Fill using selector and value (both positional)
|
||||
await page.fill("#email-input", "user@example.com")
|
||||
|
||||
# Fill using AI with natural language (prompt only)
|
||||
await page.fill(prompt="Fill 'user@example.com' in the email address field")
|
||||
|
||||
# Try selector first, fall back to AI if selector fails
|
||||
await page.fill(
|
||||
"#email-input",
|
||||
"user@example.com",
|
||||
prompt="Fill the email address with user@example.com"
|
||||
)
|
||||
```
|
||||
"""
|
||||
return await self._input_text(
|
||||
selector=selector or "",
|
||||
value=value or "",
|
||||
ai=ai,
|
||||
intention=prompt,
|
||||
data=data,
|
||||
timeout=timeout,
|
||||
totp_identifier=totp_identifier,
|
||||
totp_url=totp_url,
|
||||
)
|
||||
|
||||
async def goto(self, url: str, **kwargs: Any) -> None:
|
||||
"""Navigate to the given URL.
|
||||
|
||||
Args:
|
||||
url: URL to navigate page to.
|
||||
**kwargs: Additional options like timeout, wait_until, referer, etc.
|
||||
"""
|
||||
await self._page.goto(url, **kwargs)
|
||||
|
||||
async def type(self, selector: str, text: str, **kwargs: Any) -> None:
|
||||
"""Type text into an element character by character.
|
||||
|
||||
Args:
|
||||
selector: A selector to search for an element to type into.
|
||||
text: Text to type into the element.
|
||||
**kwargs: Additional options like delay, timeout, no_wait_after, etc.
|
||||
"""
|
||||
await self._page.type(selector, text, **kwargs)
|
||||
|
||||
@overload
|
||||
async def select_option(
|
||||
self,
|
||||
selector: str,
|
||||
value: str,
|
||||
*,
|
||||
prompt: str | None = None,
|
||||
ai: str | None = "fallback",
|
||||
data: str | dict[str, Any] | None = None,
|
||||
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||
) -> str: ...
|
||||
|
||||
@overload
|
||||
async def select_option(
|
||||
self,
|
||||
*,
|
||||
prompt: str,
|
||||
value: str | None = None,
|
||||
selector: str | None = None,
|
||||
ai: str | None = "fallback",
|
||||
data: str | dict[str, Any] | None = None,
|
||||
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||
) -> str: ...
|
||||
|
||||
async def select_option(
|
||||
self,
|
||||
selector: str | None = None,
|
||||
value: str | None = None,
|
||||
*,
|
||||
prompt: str | None = None,
|
||||
ai: str | None = "fallback",
|
||||
data: str | dict[str, Any] | None = None,
|
||||
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||
) -> str:
|
||||
"""Select an option from a dropdown using a CSS selector, AI-powered prompt matching, or both.
|
||||
|
||||
This method supports three modes:
|
||||
- **Selector-based**: Select the option with a value using CSS selector
|
||||
- **AI-powered**: Use natural language prompt (AI extracts value from prompt)
|
||||
- **Fallback mode** (default): Try the selector first, fall back to AI if it fails
|
||||
|
||||
Args:
|
||||
selector: CSS selector for the target select/dropdown element.
|
||||
value: The option value to select.
|
||||
prompt: Natural language description of which option to select.
|
||||
ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
|
||||
data: Additional context data for AI processing.
|
||||
timeout: Maximum time to wait for the select action in milliseconds.
|
||||
|
||||
Returns:
|
||||
The value that was successfully selected.
|
||||
|
||||
Examples:
|
||||
```python
|
||||
# Select using selector and value (both positional)
|
||||
await page.select_option("#country", "us")
|
||||
|
||||
# Select using AI with natural language (prompt only)
|
||||
await page.select_option(prompt="Select 'United States' from the country dropdown")
|
||||
|
||||
# Try selector first, fall back to AI if selector fails
|
||||
await page.select_option(
|
||||
"#country",
|
||||
"us",
|
||||
prompt="Select United States from country"
|
||||
)
|
||||
```
|
||||
"""
|
||||
value = value or ""
|
||||
if ai == "fallback":
|
||||
error_to_raise = None
|
||||
if selector:
|
||||
try:
|
||||
locator = self._page.locator(selector)
|
||||
await locator.select_option(value, timeout=timeout)
|
||||
return value
|
||||
except Exception as e:
|
||||
error_to_raise = e
|
||||
if prompt:
|
||||
return await self._ai.ai_select_option(
|
||||
selector=selector or "",
|
||||
value=value,
|
||||
intention=prompt,
|
||||
data=data,
|
||||
timeout=timeout,
|
||||
)
|
||||
if error_to_raise:
|
||||
raise error_to_raise
|
||||
else:
|
||||
return value
|
||||
elif ai == "proactive" and prompt:
|
||||
return await self._ai.ai_select_option(
|
||||
selector=selector or "",
|
||||
value=value,
|
||||
intention=prompt,
|
||||
data=data,
|
||||
timeout=timeout,
|
||||
)
|
||||
if selector:
|
||||
locator = self._page.locator(selector)
|
||||
await locator.select_option(value, timeout=timeout)
|
||||
return value
|
||||
|
||||
async def extract(
|
||||
self,
|
||||
prompt: str,
|
||||
schema: dict[str, Any] | list | str | None = None,
|
||||
error_code_mapping: dict[str, str] | None = None,
|
||||
intention: str | None = None,
|
||||
data: str | dict[str, Any] | None = None,
|
||||
) -> dict[str, Any] | list | str | None:
|
||||
return await self._ai.ai_extract(prompt, schema, error_code_mapping, intention, data)
|
||||
|
||||
async def reload(self, **kwargs: Any) -> None:
|
||||
"""Reload the current page.
|
||||
|
||||
Args:
|
||||
**kwargs: Additional options like timeout, wait_until, etc.
|
||||
"""
|
||||
await self._page.reload(**kwargs)
|
||||
|
||||
async def screenshot(self, **kwargs: Any) -> bytes:
|
||||
"""Take a screenshot of the page.
|
||||
|
||||
Args:
|
||||
**kwargs: Additional options like path, full_page, clip, type, quality, etc.
|
||||
|
||||
Returns:
|
||||
bytes: The screenshot as bytes (unless path is specified, then saves to file).
|
||||
"""
|
||||
return await self._page.screenshot(**kwargs)
|
||||
|
||||
async def _input_text(
|
||||
self,
|
||||
selector: str,
|
||||
@@ -346,76 +624,3 @@ class SkyvernBrowserPage:
|
||||
locator = self._page.locator(selector)
|
||||
await handler_utils.input_sequentially(locator, value, timeout=timeout)
|
||||
return value
|
||||
|
||||
async def fill(
|
||||
self,
|
||||
selector: str,
|
||||
value: str,
|
||||
ai: str | None = "fallback",
|
||||
intention: str | None = None,
|
||||
data: str | dict[str, Any] | None = None,
|
||||
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||
totp_identifier: str | None = None,
|
||||
totp_url: str | None = None,
|
||||
) -> str:
|
||||
return await self._input_text(
|
||||
selector=selector,
|
||||
value=value,
|
||||
ai=ai,
|
||||
intention=intention,
|
||||
data=data,
|
||||
timeout=timeout,
|
||||
totp_identifier=totp_identifier,
|
||||
totp_url=totp_url,
|
||||
)
|
||||
|
||||
async def goto(self, url: str, **kwargs: Any) -> None:
|
||||
"""Navigate to the given URL.
|
||||
|
||||
Args:
|
||||
url: URL to navigate page to.
|
||||
**kwargs: Additional options like timeout, wait_until, referer, etc.
|
||||
"""
|
||||
await self._page.goto(url, **kwargs)
|
||||
|
||||
async def type(self, selector: str, text: str, **kwargs: Any) -> None:
|
||||
"""Type text into an element character by character.
|
||||
|
||||
Args:
|
||||
selector: A selector to search for an element to type into.
|
||||
text: Text to type into the element.
|
||||
**kwargs: Additional options like delay, timeout, no_wait_after, etc.
|
||||
"""
|
||||
await self._page.type(selector, text, **kwargs)
|
||||
|
||||
async def select_option(self, selector: str, value: Any = None, **kwargs: Any) -> list[str]:
|
||||
"""Select option(s) in a <select> element.
|
||||
|
||||
Args:
|
||||
selector: A selector to search for a select element.
|
||||
value: Option value(s) to select. Can be a string, list of strings, or dict with value/label/index.
|
||||
**kwargs: Additional options like timeout, force, no_wait_after, etc.
|
||||
|
||||
Returns:
|
||||
List of option values that have been successfully selected.
|
||||
"""
|
||||
return await self._page.select_option(selector, value, **kwargs)
|
||||
|
||||
async def reload(self, **kwargs: Any) -> None:
|
||||
"""Reload the current page.
|
||||
|
||||
Args:
|
||||
**kwargs: Additional options like timeout, wait_until, etc.
|
||||
"""
|
||||
await self._page.reload(**kwargs)
|
||||
|
||||
async def screenshot(self, **kwargs: Any) -> bytes:
|
||||
"""Take a screenshot of the page.
|
||||
|
||||
Args:
|
||||
**kwargs: Additional options like path, full_page, clip, type, quality, etc.
|
||||
|
||||
Returns:
|
||||
bytes: The screenshot as bytes (unless path is specified, then saves to file).
|
||||
"""
|
||||
return await self._page.screenshot(**kwargs)
|
||||
|
||||
Reference in New Issue
Block a user