SDK: support select_option and extract (#3850)

This commit is contained in:
Stanislav Novosad
2025-10-30 09:05:20 -06:00
committed by GitHub
parent ac069838c7
commit af9a5f31e4
21 changed files with 774 additions and 124 deletions

View File

@@ -2317,6 +2317,9 @@
"403": {
"description": "Unauthorized - Invalid or missing authentication"
},
"404": {
"description": "Workflow run or workflow not found"
},
"400": {
"description": "Invalid operation"
},
@@ -9733,6 +9736,26 @@
"description": "The workflow run ID used for this action"
},
"result": {
"anyOf": [
{
"type": "string"
},
{
"type": "object"
},
{
"type": "array"
},
{
"type": "number"
},
{
"type": "boolean"
},
{
"type": "null"
}
],
"title": "Result",
"description": "The result from the action (e.g., selector, value, extracted data)"
}

View File

@@ -1,5 +1,5 @@
# Reference
<details><summary><code>client.<a href="/src/Client.ts">deployScript</a>(scriptId, { ...params }) -> Skyvern.CreateScriptResponse</code></summary>
<details><summary><code>client.<a href="/src/Client.ts">runSdkAction</a>({ ...params }) -> Skyvern.RunSdkActionResponse</code></summary>
<dl>
<dd>
@@ -11,7 +11,7 @@
<dl>
<dd>
Deploy a script with updated files, creating a new version
Execute a single SDK action with the specified parameters
</dd>
</dl>
</dd>
@@ -26,11 +26,12 @@ Deploy a script with updated files, creating a new version
<dd>
```typescript
await client.deployScript("s_abc123", {
files: [{
path: "src/main.py",
content: "content"
}]
await client.runSdkAction({
"x-user-agent": "x-user-agent",
url: "url",
action: {
type: "ai_click"
}
});
```
@@ -47,15 +48,7 @@ await client.deployScript("s_abc123", {
<dl>
<dd>
**scriptId:** `string` — The unique identifier of the script
</dd>
</dl>
<dl>
<dd>
**request:** `Skyvern.DeployScriptRequest`
**request:** `Skyvern.RunSdkActionRequest`
</dd>
</dl>

View File

@@ -2202,4 +2202,101 @@ export class SkyvernClient {
});
}
}
/**
* Execute a single SDK action with the specified parameters
*
* @param {Skyvern.RunSdkActionRequest} request
* @param {SkyvernClient.RequestOptions} requestOptions - Request-specific configuration.
*
* @throws {@link Skyvern.BadRequestError}
* @throws {@link Skyvern.ForbiddenError}
* @throws {@link Skyvern.NotFoundError}
* @throws {@link Skyvern.UnprocessableEntityError}
*
* @example
* await client.runSdkAction({
* "x-user-agent": "x-user-agent",
* url: "url",
* action: {
* type: "ai_click"
* }
* })
*/
public runSdkAction(
request: Skyvern.RunSdkActionRequest,
requestOptions?: SkyvernClient.RequestOptions,
): core.HttpResponsePromise<Skyvern.RunSdkActionResponse> {
return core.HttpResponsePromise.fromPromise(this.__runSdkAction(request, requestOptions));
}
private async __runSdkAction(
request: Skyvern.RunSdkActionRequest,
requestOptions?: SkyvernClient.RequestOptions,
): Promise<core.WithRawResponse<Skyvern.RunSdkActionResponse>> {
const { "x-user-agent": userAgent, ..._body } = request;
const _headers: core.Fetcher.Args["headers"] = mergeHeaders(
this._options?.headers,
mergeOnlyDefinedHeaders({
"x-user-agent": userAgent != null ? userAgent : undefined,
"x-api-key": requestOptions?.apiKey ?? this._options?.apiKey,
}),
requestOptions?.headers,
);
const _response = await core.fetcher({
url: core.url.join(
(await core.Supplier.get(this._options.baseUrl)) ??
(await core.Supplier.get(this._options.environment)) ??
environments.SkyvernEnvironment.Production,
"v1/sdk/run_action",
),
method: "POST",
headers: _headers,
contentType: "application/json",
queryParameters: requestOptions?.queryParams,
requestType: "json",
body: _body,
timeoutMs: (requestOptions?.timeoutInSeconds ?? this._options?.timeoutInSeconds ?? 60) * 1000,
maxRetries: requestOptions?.maxRetries ?? this._options?.maxRetries,
abortSignal: requestOptions?.abortSignal,
});
if (_response.ok) {
return { data: _response.body as Skyvern.RunSdkActionResponse, rawResponse: _response.rawResponse };
}
if (_response.error.reason === "status-code") {
switch (_response.error.statusCode) {
case 400:
throw new Skyvern.BadRequestError(_response.error.body as unknown, _response.rawResponse);
case 403:
throw new Skyvern.ForbiddenError(_response.error.body as unknown, _response.rawResponse);
case 404:
throw new Skyvern.NotFoundError(_response.error.body as unknown, _response.rawResponse);
case 422:
throw new Skyvern.UnprocessableEntityError(_response.error.body as unknown, _response.rawResponse);
default:
throw new errors.SkyvernError({
statusCode: _response.error.statusCode,
body: _response.error.body,
rawResponse: _response.rawResponse,
});
}
}
switch (_response.error.reason) {
case "non-json":
throw new errors.SkyvernError({
statusCode: _response.error.statusCode,
body: _response.error.rawBody,
rawResponse: _response.rawResponse,
});
case "timeout":
throw new errors.SkyvernTimeoutError("Timeout exceeded when calling POST /v1/sdk/run_action.");
case "unknown":
throw new errors.SkyvernError({
message: _response.error.errorMessage,
rawResponse: _response.rawResponse,
});
}
}
}

View File

@@ -0,0 +1,27 @@
// This file was auto-generated by Fern from our API Definition.
import type * as Skyvern from "../../index.js";
/**
* @example
* {
* "x-user-agent": "x-user-agent",
* url: "url",
* action: {
* type: "ai_click"
* }
* }
*/
export interface RunSdkActionRequest {
"x-user-agent"?: string;
/** The URL where the action should be executed */
url: string;
/** The browser session ID */
browser_session_id?: string;
/** The browser address */
browser_address?: string;
/** Optional workflow run ID to continue an existing workflow run */
workflow_run_id?: string;
/** The action to execute with its specific parameters */
action: Skyvern.SdkAction;
}

View File

@@ -7,6 +7,7 @@ export type { GetRunArtifactsRequest } from "./GetRunArtifactsRequest.js";
export type { GetScriptsRequest } from "./GetScriptsRequest.js";
export type { GetWorkflowsRequest } from "./GetWorkflowsRequest.js";
export type { LoginRequest } from "./LoginRequest.js";
export type { RunSdkActionRequest } from "./RunSdkActionRequest.js";
export type { RunTaskRequest } from "./RunTaskRequest.js";
export type { RunWorkflowRequest } from "./RunWorkflowRequest.js";
export type { TotpCodeCreate } from "./TotpCodeCreate.js";

View File

@@ -0,0 +1,19 @@
// This file was auto-generated by Fern from our API Definition.
export interface ClickAction {
/** CSS selector for the element */
selector?: string;
/** The intention or goal of the click */
intention?: string;
/** Additional context data */
data?: ClickAction.Data;
/** Timeout in milliseconds */
timeout?: number;
}
export namespace ClickAction {
/**
* Additional context data
*/
export type Data = string | Record<string, unknown>;
}

View File

@@ -0,0 +1,25 @@
// This file was auto-generated by Fern from our API Definition.
export interface ExtractAction {
/** Extraction prompt */
prompt?: string;
/** Schema for extraction */
extract_schema?: ExtractAction.ExtractSchema;
/** Error code mapping for extraction */
error_code_mapping?: Record<string, string | undefined>;
/** The intention or goal of the extraction */
intention?: string;
/** Additional context data */
data?: ExtractAction.Data;
}
export namespace ExtractAction {
/**
* Schema for extraction
*/
export type ExtractSchema = Record<string, unknown> | unknown[] | string;
/**
* Additional context data
*/
export type Data = string | Record<string, unknown>;
}

View File

@@ -0,0 +1,25 @@
// This file was auto-generated by Fern from our API Definition.
export interface InputTextAction {
/** CSS selector for the element */
selector?: string;
/** Value to input */
value?: string;
/** The intention or goal of the input */
intention?: string;
/** Additional context data */
data?: InputTextAction.Data;
/** TOTP identifier for input_text actions */
totp_identifier?: string;
/** TOTP URL for input_text actions */
totp_url?: string;
/** Timeout in milliseconds */
timeout?: number;
}
export namespace InputTextAction {
/**
* Additional context data
*/
export type Data = string | Record<string, unknown>;
}

View File

@@ -0,0 +1,15 @@
// This file was auto-generated by Fern from our API Definition.
export interface RunSdkActionResponse {
/** The workflow run ID used for this action */
workflow_run_id: string;
/** The result from the action (e.g., selector, value, extracted data) */
result?: RunSdkActionResponse.Result;
}
export namespace RunSdkActionResponse {
/**
* The result from the action (e.g., selector, value, extracted data)
*/
export type Result = string | Record<string, unknown> | unknown[] | number | boolean;
}

View File

@@ -0,0 +1,27 @@
// This file was auto-generated by Fern from our API Definition.
import type * as Skyvern from "../index.js";
export type SdkAction =
| Skyvern.SdkAction.AiClick
| Skyvern.SdkAction.AiInputText
| Skyvern.SdkAction.AiSelectOption
| Skyvern.SdkAction.Extract;
export namespace SdkAction {
export interface AiClick extends Skyvern.ClickAction {
type: "ai_click";
}
export interface AiInputText extends Skyvern.InputTextAction {
type: "ai_input_text";
}
export interface AiSelectOption extends Skyvern.SelectOptionAction {
type: "ai_select_option";
}
export interface Extract extends Skyvern.ExtractAction {
type: "extract";
}
}

View File

@@ -0,0 +1,21 @@
// This file was auto-generated by Fern from our API Definition.
export interface SelectOptionAction {
/** CSS selector for the element */
selector?: string;
/** Value to select */
value?: string;
/** The intention or goal of the selection */
intention?: string;
/** Additional context data */
data?: SelectOptionAction.Data;
/** Timeout in milliseconds */
timeout?: number;
}
export namespace SelectOptionAction {
/**
* Additional context data
*/
export type Data = string | Record<string, unknown>;
}

View File

@@ -19,6 +19,7 @@ export * from "./BitwardenSensitiveInformationParameter.js";
export * from "./BitwardenSensitiveInformationParameterYaml.js";
export * from "./BlockType.js";
export * from "./BrowserSessionResponse.js";
export * from "./ClickAction.js";
export * from "./CodeBlock.js";
export * from "./CodeBlockParametersItem.js";
export * from "./CodeBlockYaml.js";
@@ -33,6 +34,7 @@ export * from "./CredentialTypeOutput.js";
export * from "./CreditCardCredentialResponse.js";
export * from "./DownloadToS3Block.js";
export * from "./DownloadToS3BlockYaml.js";
export * from "./ExtractAction.js";
export * from "./ExtractionBlock.js";
export * from "./ExtractionBlockParametersItem.js";
export * from "./ExtractionBlockYaml.js";
@@ -62,6 +64,7 @@ export * from "./HumanInteractionBlock.js";
export * from "./HumanInteractionBlockParametersItem.js";
export * from "./HumanInteractionBlockYaml.js";
export * from "./InputOrSelectContext.js";
export * from "./InputTextAction.js";
export * from "./LoginBlock.js";
export * from "./LoginBlockParametersItem.js";
export * from "./LoginBlockYaml.js";
@@ -80,11 +83,14 @@ export * from "./PdfParserBlock.js";
export * from "./PdfParserBlockYaml.js";
export * from "./ProxyLocation.js";
export * from "./RunEngine.js";
export * from "./RunSdkActionResponse.js";
export * from "./RunStatus.js";
export * from "./Script.js";
export * from "./ScriptFileCreate.js";
export * from "./ScriptRunResponse.js";
export * from "./SdkAction.js";
export * from "./SelectOption.js";
export * from "./SelectOptionAction.js";
export * from "./SendEmailBlock.js";
export * from "./SendEmailBlockYaml.js";
export * from "./SkyvernForgeSdkSchemasCredentialsCredentialType.js";

View File

@@ -2429,4 +2429,128 @@ describe("SkyvernClient", () => {
});
}).rejects.toThrow(Skyvern.UnprocessableEntityError);
});
test("run_sdk_action (1)", async () => {
const server = mockServerPool.createServer();
const client = new SkyvernClient({ apiKey: "test", environment: server.baseUrl });
const rawRequestBody = { url: "url", action: { type: "ai_click" } };
const rawResponseBody = { workflow_run_id: "workflow_run_id", result: "result" };
server
.mockEndpoint()
.post("/v1/sdk/run_action")
.header("x-user-agent", "x-user-agent")
.jsonBody(rawRequestBody)
.respondWith()
.statusCode(200)
.jsonBody(rawResponseBody)
.build();
const response = await client.runSdkAction({
"x-user-agent": "x-user-agent",
url: "url",
action: {
type: "ai_click",
},
});
expect(response).toEqual({
workflow_run_id: "workflow_run_id",
result: "result",
});
});
test("run_sdk_action (2)", async () => {
const server = mockServerPool.createServer();
const client = new SkyvernClient({ apiKey: "test", environment: server.baseUrl });
const rawRequestBody = { url: "url", action: { type: "ai_click" } };
const rawResponseBody = { key: "value" };
server
.mockEndpoint()
.post("/v1/sdk/run_action")
.jsonBody(rawRequestBody)
.respondWith()
.statusCode(400)
.jsonBody(rawResponseBody)
.build();
await expect(async () => {
return await client.runSdkAction({
url: "url",
action: {
type: "ai_click",
},
});
}).rejects.toThrow(Skyvern.BadRequestError);
});
test("run_sdk_action (3)", async () => {
const server = mockServerPool.createServer();
const client = new SkyvernClient({ apiKey: "test", environment: server.baseUrl });
const rawRequestBody = { url: "url", action: { type: "ai_click" } };
const rawResponseBody = { key: "value" };
server
.mockEndpoint()
.post("/v1/sdk/run_action")
.jsonBody(rawRequestBody)
.respondWith()
.statusCode(403)
.jsonBody(rawResponseBody)
.build();
await expect(async () => {
return await client.runSdkAction({
url: "url",
action: {
type: "ai_click",
},
});
}).rejects.toThrow(Skyvern.ForbiddenError);
});
test("run_sdk_action (4)", async () => {
const server = mockServerPool.createServer();
const client = new SkyvernClient({ apiKey: "test", environment: server.baseUrl });
const rawRequestBody = { url: "url", action: { type: "ai_click" } };
const rawResponseBody = { key: "value" };
server
.mockEndpoint()
.post("/v1/sdk/run_action")
.jsonBody(rawRequestBody)
.respondWith()
.statusCode(404)
.jsonBody(rawResponseBody)
.build();
await expect(async () => {
return await client.runSdkAction({
url: "url",
action: {
type: "ai_click",
},
});
}).rejects.toThrow(Skyvern.NotFoundError);
});
test("run_sdk_action (5)", async () => {
const server = mockServerPool.createServer();
const client = new SkyvernClient({ apiKey: "test", environment: server.baseUrl });
const rawRequestBody = { url: "url", action: { type: "ai_click" } };
const rawResponseBody = { key: "value" };
server
.mockEndpoint()
.post("/v1/sdk/run_action")
.jsonBody(rawRequestBody)
.respondWith()
.statusCode(422)
.jsonBody(rawResponseBody)
.build();
await expect(async () => {
return await client.runSdkAction({
url: "url",
action: {
type: "ai_click",
},
});
}).rejects.toThrow(Skyvern.UnprocessableEntityError);
});
});

View File

@@ -265,6 +265,7 @@ if typing.TYPE_CHECKING:
ProxyLocation,
RunEngine,
RunSdkActionResponse,
RunSdkActionResponseResult,
RunStatus,
Script,
ScriptFileCreate,
@@ -727,6 +728,7 @@ _dynamic_imports: typing.Dict[str, str] = {
"ProxyLocation": ".types",
"RunEngine": ".types",
"RunSdkActionResponse": ".types",
"RunSdkActionResponseResult": ".types",
"RunStatus": ".types",
"Script": ".types",
"ScriptFileCreate": ".types",
@@ -1212,6 +1214,7 @@ __all__ = [
"ProxyLocation",
"RunEngine",
"RunSdkActionResponse",
"RunSdkActionResponseResult",
"RunStatus",
"Script",
"ScriptFileCreate",

View File

@@ -2146,6 +2146,17 @@ class RawSkyvern:
),
),
)
if _response.status_code == 404:
raise NotFoundError(
headers=dict(_response.headers),
body=typing.cast(
typing.Optional[typing.Any],
parse_obj_as(
type_=typing.Optional[typing.Any], # type: ignore
object_=_response.json(),
),
),
)
if _response.status_code == 422:
raise UnprocessableEntityError(
headers=dict(_response.headers),
@@ -4267,6 +4278,17 @@ class AsyncRawSkyvern:
),
),
)
if _response.status_code == 404:
raise NotFoundError(
headers=dict(_response.headers),
body=typing.cast(
typing.Optional[typing.Any],
parse_obj_as(
type_=typing.Optional[typing.Any], # type: ignore
object_=_response.json(),
),
),
)
if _response.status_code == 422:
raise UnprocessableEntityError(
headers=dict(_response.headers),

View File

@@ -290,6 +290,7 @@ if typing.TYPE_CHECKING:
from .proxy_location import ProxyLocation
from .run_engine import RunEngine
from .run_sdk_action_response import RunSdkActionResponse
from .run_sdk_action_response_result import RunSdkActionResponseResult
from .run_status import RunStatus
from .script import Script
from .script_file_create import ScriptFileCreate
@@ -762,6 +763,7 @@ _dynamic_imports: typing.Dict[str, str] = {
"ProxyLocation": ".proxy_location",
"RunEngine": ".run_engine",
"RunSdkActionResponse": ".run_sdk_action_response",
"RunSdkActionResponseResult": ".run_sdk_action_response_result",
"RunStatus": ".run_status",
"Script": ".script",
"ScriptFileCreate": ".script_file_create",
@@ -1238,6 +1240,7 @@ __all__ = [
"ProxyLocation",
"RunEngine",
"RunSdkActionResponse",
"RunSdkActionResponseResult",
"RunStatus",
"Script",
"ScriptFileCreate",

View File

@@ -4,6 +4,7 @@ import typing
import pydantic
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
from .run_sdk_action_response_result import RunSdkActionResponseResult
class RunSdkActionResponse(UniversalBaseModel):
@@ -12,7 +13,7 @@ class RunSdkActionResponse(UniversalBaseModel):
The workflow run ID used for this action
"""
result: typing.Optional[typing.Optional[typing.Any]] = pydantic.Field(default=None)
result: typing.Optional[RunSdkActionResponseResult] = pydantic.Field(default=None)
"""
The result from the action (e.g., selector, value, extracted data)
"""

View File

@@ -0,0 +1,7 @@
# This file was auto-generated by Fern from our API Definition.
import typing
RunSdkActionResponseResult = typing.Union[
str, typing.Dict[str, typing.Optional[typing.Any]], typing.List[typing.Optional[typing.Any]], float, bool
]

View File

@@ -1,4 +1,4 @@
import json
from typing import Any
import structlog
from fastapi import Depends, HTTPException, status
@@ -95,8 +95,8 @@ async def run_sdk_action(
task = await app.DATABASE.create_task(
organization_id=organization_id,
url=action_request.url,
navigation_goal=None,
navigation_payload=None,
navigation_goal=action.intention,
navigation_payload=action.data,
data_extraction_goal=None,
title=f"SDK Action Task: {action_request.action.type}",
workflow_run_id=workflow_run.workflow_run_id,
@@ -118,6 +118,18 @@ async def run_sdk_action(
task_id=task.task_id,
)
await app.WORKFLOW_CONTEXT_MANAGER.initialize_workflow_run_context(
organization,
workflow_run.workflow_run_id,
workflow.title,
workflow.workflow_id,
workflow.workflow_permanent_id,
[],
[],
[],
[],
)
context = skyvern_context.ensure_context()
skyvern_context.set(
SkyvernContext(
@@ -131,7 +143,7 @@ async def run_sdk_action(
workflow_run_id=workflow_run.workflow_run_id,
)
)
result = None
result: Any | None = None
try:
scraped_page = await SkyvernPage.create_scraped_page(browser_session_id=browser_session_id)
page = await scraped_page._browser_state.must_get_working_page()
@@ -170,7 +182,7 @@ async def run_sdk_action(
intention=action.intention,
data=action.data,
)
result = json.dumps(extract_result)
result = extract_result
finally:
skyvern_context.reset()

View File

@@ -2,14 +2,9 @@ from typing import TYPE_CHECKING, Any
from playwright.async_api import Page
from skyvern.client import SdkAction_AiClick, SdkAction_AiInputText, SdkAction_AiSelectOption, SdkAction_Extract
from skyvern.config import settings
from skyvern.core.script_generations.skyvern_page_ai import SkyvernPageAi
from skyvern.forge.sdk.schemas.sdk_actions import (
ClickAction,
ExtractAction,
InputTextAction,
SelectOptionAction,
)
if TYPE_CHECKING:
from skyvern.library.skyvern_browser import SkyvernBrowser
@@ -35,18 +30,17 @@ class SdkSkyvernPageAi(SkyvernPageAi):
) -> str:
"""Click an element using AI via API call."""
action = ClickAction(
selector=selector,
intention=intention,
data=data,
timeout=timeout,
)
response = await self._browser.client.run_sdk_action(
url=self._page.url,
browser_session_id=self._browser.browser_session_id,
browser_address=self._browser.browser_address,
workflow_run_id=self._browser.workflow_run_id,
action=action,
action=SdkAction_AiClick(
selector=selector,
intention=intention,
data=data,
timeout=timeout,
),
)
self._browser.workflow_run_id = response.workflow_run_id
return response.result if response.result else selector
@@ -65,7 +59,7 @@ class SdkSkyvernPageAi(SkyvernPageAi):
response = await self._browser.client.run_sdk_action(
url=self._page.url,
action=InputTextAction(
action=SdkAction_AiInputText(
selector=selector,
value=value,
intention=intention,
@@ -93,7 +87,7 @@ class SdkSkyvernPageAi(SkyvernPageAi):
response = await self._browser.client.run_sdk_action(
url=self._page.url,
action=SelectOptionAction(
action=SdkAction_AiSelectOption(
selector=selector,
value=value,
intention=intention,
@@ -129,7 +123,7 @@ class SdkSkyvernPageAi(SkyvernPageAi):
response = await self._browser.client.run_sdk_action(
url=self._page.url,
action=ExtractAction(
action=SdkAction_Extract(
prompt=prompt,
extract_schema=schema,
error_code_mapping=error_code_mapping,

View File

@@ -1,5 +1,5 @@
import asyncio
from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Any, overload
from playwright.async_api import Page
@@ -230,24 +230,64 @@ class SkyvernBrowserPage:
self._ai = SdkSkyvernPageAi(browser, page)
self.run = SkyvernPageRun(browser, page)
@overload
async def click(
self,
selector: str,
*,
prompt: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str | None: ...
@overload
async def click(
self,
*,
prompt: str,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str | None: ...
async def click(
self,
selector: str | None = None,
intention: str | None = None,
*,
prompt: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str | None:
"""Click an element identified by ``selector``.
"""Click an element using a CSS selector, AI-powered prompt matching, or both.
When ``intention`` and ``data`` are provided a new click action is
generated via the ``single-click-action`` prompt. The model returns a
fresh "xpath=..." selector based on the current DOM and the updated data for this run.
The browser then clicks the element using this newly generated xpath selector.
This method supports three modes:
- **Selector-based**: Click the element matching the CSS selector
- **AI-powered**: Use natural language to describe which element to click
- **Fallback mode** (default): Try the selector first, fall back to AI if it fails
If the prompt generation or parsing fails for any reason we fall back to
clicking the originally supplied ``selector``.
Args:
selector: CSS selector for the target element.
prompt: Natural language description of which element to click.
ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
data: Additional context data for AI processing.
timeout: Maximum time to wait for the click action in milliseconds.
Returns:
The selector string that was successfully used to click the element, or None.
Examples:
```python
# Click using a CSS selector
await page.click("#open-invoice-button")
# Click using AI with natural language
await page.click(prompt="Click on the 'Open Invoice' button")
# Try selector first, fall back to AI if selector fails
await page.click("#open-invoice-button", prompt="Click on the 'Open Invoice' button")
```
"""
if ai == "fallback":
@@ -262,10 +302,10 @@ class SkyvernBrowserPage:
error_to_raise = e
# if the original selector doesn't work, try to click the element with the ai generated selector
if intention:
if prompt:
return await self._ai.ai_click(
selector=selector or "",
intention=intention,
intention=prompt,
data=data,
timeout=timeout,
)
@@ -274,10 +314,10 @@ class SkyvernBrowserPage:
else:
return selector
elif ai == "proactive":
if intention:
if prompt:
return await self._ai.ai_click(
selector=selector or "",
intention=intention,
intention=prompt,
data=data,
timeout=timeout,
)
@@ -287,6 +327,244 @@ class SkyvernBrowserPage:
await locator.click(timeout=timeout)
return selector
@overload
async def fill(
self,
selector: str,
value: str,
*,
prompt: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
totp_identifier: str | None = None,
totp_url: str | None = None,
) -> str: ...
@overload
async def fill(
self,
*,
prompt: str,
value: str | None = None,
selector: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
totp_identifier: str | None = None,
totp_url: str | None = None,
) -> str: ...
async def fill(
self,
selector: str | None = None,
value: str | None = None,
*,
prompt: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
totp_identifier: str | None = None,
totp_url: str | None = None,
) -> str:
"""Fill an input field using a CSS selector, AI-powered prompt matching, or both.
This method supports three modes:
- **Selector-based**: Fill the input field with a value using CSS selector
- **AI-powered**: Use natural language prompt (AI extracts value from prompt)
- **Fallback mode** (default): Try the selector first, fall back to AI if it fails
Args:
selector: CSS selector for the target input element.
value: The text value to input into the field.
prompt: Natural language description of which field to fill and what value.
ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
data: Additional context data for AI processing.
timeout: Maximum time to wait for the fill action in milliseconds.
totp_identifier: TOTP identifier for time-based one-time password fields.
totp_url: URL to fetch TOTP codes from for authentication.
Returns:
The value that was successfully filled into the field.
Examples:
```python
# Fill using selector and value (both positional)
await page.fill("#email-input", "user@example.com")
# Fill using AI with natural language (prompt only)
await page.fill(prompt="Fill 'user@example.com' in the email address field")
# Try selector first, fall back to AI if selector fails
await page.fill(
"#email-input",
"user@example.com",
prompt="Fill the email address with user@example.com"
)
```
"""
return await self._input_text(
selector=selector or "",
value=value or "",
ai=ai,
intention=prompt,
data=data,
timeout=timeout,
totp_identifier=totp_identifier,
totp_url=totp_url,
)
async def goto(self, url: str, **kwargs: Any) -> None:
"""Navigate to the given URL.
Args:
url: URL to navigate page to.
**kwargs: Additional options like timeout, wait_until, referer, etc.
"""
await self._page.goto(url, **kwargs)
async def type(self, selector: str, text: str, **kwargs: Any) -> None:
"""Type text into an element character by character.
Args:
selector: A selector to search for an element to type into.
text: Text to type into the element.
**kwargs: Additional options like delay, timeout, no_wait_after, etc.
"""
await self._page.type(selector, text, **kwargs)
@overload
async def select_option(
self,
selector: str,
value: str,
*,
prompt: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str: ...
@overload
async def select_option(
self,
*,
prompt: str,
value: str | None = None,
selector: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str: ...
async def select_option(
self,
selector: str | None = None,
value: str | None = None,
*,
prompt: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str:
"""Select an option from a dropdown using a CSS selector, AI-powered prompt matching, or both.
This method supports three modes:
- **Selector-based**: Select the option with a value using CSS selector
- **AI-powered**: Use natural language prompt (AI extracts value from prompt)
- **Fallback mode** (default): Try the selector first, fall back to AI if it fails
Args:
selector: CSS selector for the target select/dropdown element.
value: The option value to select.
prompt: Natural language description of which option to select.
ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
data: Additional context data for AI processing.
timeout: Maximum time to wait for the select action in milliseconds.
Returns:
The value that was successfully selected.
Examples:
```python
# Select using selector and value (both positional)
await page.select_option("#country", "us")
# Select using AI with natural language (prompt only)
await page.select_option(prompt="Select 'United States' from the country dropdown")
# Try selector first, fall back to AI if selector fails
await page.select_option(
"#country",
"us",
prompt="Select United States from country"
)
```
"""
value = value or ""
if ai == "fallback":
error_to_raise = None
if selector:
try:
locator = self._page.locator(selector)
await locator.select_option(value, timeout=timeout)
return value
except Exception as e:
error_to_raise = e
if prompt:
return await self._ai.ai_select_option(
selector=selector or "",
value=value,
intention=prompt,
data=data,
timeout=timeout,
)
if error_to_raise:
raise error_to_raise
else:
return value
elif ai == "proactive" and prompt:
return await self._ai.ai_select_option(
selector=selector or "",
value=value,
intention=prompt,
data=data,
timeout=timeout,
)
if selector:
locator = self._page.locator(selector)
await locator.select_option(value, timeout=timeout)
return value
async def extract(
self,
prompt: str,
schema: dict[str, Any] | list | str | None = None,
error_code_mapping: dict[str, str] | None = None,
intention: str | None = None,
data: str | dict[str, Any] | None = None,
) -> dict[str, Any] | list | str | None:
return await self._ai.ai_extract(prompt, schema, error_code_mapping, intention, data)
async def reload(self, **kwargs: Any) -> None:
"""Reload the current page.
Args:
**kwargs: Additional options like timeout, wait_until, etc.
"""
await self._page.reload(**kwargs)
async def screenshot(self, **kwargs: Any) -> bytes:
"""Take a screenshot of the page.
Args:
**kwargs: Additional options like path, full_page, clip, type, quality, etc.
Returns:
bytes: The screenshot as bytes (unless path is specified, then saves to file).
"""
return await self._page.screenshot(**kwargs)
async def _input_text(
self,
selector: str,
@@ -346,76 +624,3 @@ class SkyvernBrowserPage:
locator = self._page.locator(selector)
await handler_utils.input_sequentially(locator, value, timeout=timeout)
return value
async def fill(
self,
selector: str,
value: str,
ai: str | None = "fallback",
intention: str | None = None,
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
totp_identifier: str | None = None,
totp_url: str | None = None,
) -> str:
return await self._input_text(
selector=selector,
value=value,
ai=ai,
intention=intention,
data=data,
timeout=timeout,
totp_identifier=totp_identifier,
totp_url=totp_url,
)
async def goto(self, url: str, **kwargs: Any) -> None:
"""Navigate to the given URL.
Args:
url: URL to navigate page to.
**kwargs: Additional options like timeout, wait_until, referer, etc.
"""
await self._page.goto(url, **kwargs)
async def type(self, selector: str, text: str, **kwargs: Any) -> None:
"""Type text into an element character by character.
Args:
selector: A selector to search for an element to type into.
text: Text to type into the element.
**kwargs: Additional options like delay, timeout, no_wait_after, etc.
"""
await self._page.type(selector, text, **kwargs)
async def select_option(self, selector: str, value: Any = None, **kwargs: Any) -> list[str]:
"""Select option(s) in a <select> element.
Args:
selector: A selector to search for a select element.
value: Option value(s) to select. Can be a string, list of strings, or dict with value/label/index.
**kwargs: Additional options like timeout, force, no_wait_after, etc.
Returns:
List of option values that have been successfully selected.
"""
return await self._page.select_option(selector, value, **kwargs)
async def reload(self, **kwargs: Any) -> None:
"""Reload the current page.
Args:
**kwargs: Additional options like timeout, wait_until, etc.
"""
await self._page.reload(**kwargs)
async def screenshot(self, **kwargs: Any) -> bytes:
"""Take a screenshot of the page.
Args:
**kwargs: Additional options like path, full_page, clip, type, quality, etc.
Returns:
bytes: The screenshot as bytes (unless path is specified, then saves to file).
"""
return await self._page.screenshot(**kwargs)