From af9a5f31e4c4fd9f37c990ab49ce03434e83bb25 Mon Sep 17 00:00:00 2001 From: Stanislav Novosad Date: Thu, 30 Oct 2025 09:05:20 -0600 Subject: [PATCH] SDK: support select_option and extract (#3850) --- fern/openapi/skyvern_openapi.json | 23 ++ skyvern-ts/client/reference.md | 25 +- skyvern-ts/client/src/Client.ts | 97 +++++ .../client/requests/RunSdkActionRequest.ts | 27 ++ .../client/src/api/client/requests/index.ts | 1 + .../client/src/api/types/ClickAction.ts | 19 + .../client/src/api/types/ExtractAction.ts | 25 ++ .../client/src/api/types/InputTextAction.ts | 25 ++ .../src/api/types/RunSdkActionResponse.ts | 15 + skyvern-ts/client/src/api/types/SdkAction.ts | 27 ++ .../src/api/types/SelectOptionAction.ts | 21 + skyvern-ts/client/src/api/types/index.ts | 6 + skyvern-ts/client/tests/wire/main.test.ts | 124 ++++++ skyvern/client/__init__.py | 3 + skyvern/client/raw_client.py | 22 + skyvern/client/types/__init__.py | 3 + .../client/types/run_sdk_action_response.py | 3 +- .../types/run_sdk_action_response_result.py | 7 + skyvern/forge/sdk/routes/sdk.py | 22 +- skyvern/library/SdkSkyvernPageAi.py | 26 +- skyvern/library/skyvern_browser_page.py | 377 ++++++++++++++---- 21 files changed, 774 insertions(+), 124 deletions(-) create mode 100644 skyvern-ts/client/src/api/client/requests/RunSdkActionRequest.ts create mode 100644 skyvern-ts/client/src/api/types/ClickAction.ts create mode 100644 skyvern-ts/client/src/api/types/ExtractAction.ts create mode 100644 skyvern-ts/client/src/api/types/InputTextAction.ts create mode 100644 skyvern-ts/client/src/api/types/RunSdkActionResponse.ts create mode 100644 skyvern-ts/client/src/api/types/SdkAction.ts create mode 100644 skyvern-ts/client/src/api/types/SelectOptionAction.ts create mode 100644 skyvern/client/types/run_sdk_action_response_result.py diff --git a/fern/openapi/skyvern_openapi.json b/fern/openapi/skyvern_openapi.json index b02ea907..fb25bde3 100644 --- a/fern/openapi/skyvern_openapi.json +++ b/fern/openapi/skyvern_openapi.json @@ -2317,6 +2317,9 @@ "403": { "description": "Unauthorized - Invalid or missing authentication" }, + "404": { + "description": "Workflow run or workflow not found" + }, "400": { "description": "Invalid operation" }, @@ -9733,6 +9736,26 @@ "description": "The workflow run ID used for this action" }, "result": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "object" + }, + { + "type": "array" + }, + { + "type": "number" + }, + { + "type": "boolean" + }, + { + "type": "null" + } + ], "title": "Result", "description": "The result from the action (e.g., selector, value, extracted data)" } diff --git a/skyvern-ts/client/reference.md b/skyvern-ts/client/reference.md index 99422970..64f98f80 100644 --- a/skyvern-ts/client/reference.md +++ b/skyvern-ts/client/reference.md @@ -1,5 +1,5 @@ # Reference -
client.deployScript(scriptId, { ...params }) -> Skyvern.CreateScriptResponse +
client.runSdkAction({ ...params }) -> Skyvern.RunSdkActionResponse
@@ -11,7 +11,7 @@
-Deploy a script with updated files, creating a new version +Execute a single SDK action with the specified parameters
@@ -26,11 +26,12 @@ Deploy a script with updated files, creating a new version
```typescript -await client.deployScript("s_abc123", { - files: [{ - path: "src/main.py", - content: "content" - }] +await client.runSdkAction({ + "x-user-agent": "x-user-agent", + url: "url", + action: { + type: "ai_click" + } }); ``` @@ -47,15 +48,7 @@ await client.deployScript("s_abc123", {
-**scriptId:** `string` — The unique identifier of the script - -
-
- -
-
- -**request:** `Skyvern.DeployScriptRequest` +**request:** `Skyvern.RunSdkActionRequest`
diff --git a/skyvern-ts/client/src/Client.ts b/skyvern-ts/client/src/Client.ts index 90876356..35735e93 100644 --- a/skyvern-ts/client/src/Client.ts +++ b/skyvern-ts/client/src/Client.ts @@ -2202,4 +2202,101 @@ export class SkyvernClient { }); } } + + /** + * Execute a single SDK action with the specified parameters + * + * @param {Skyvern.RunSdkActionRequest} request + * @param {SkyvernClient.RequestOptions} requestOptions - Request-specific configuration. + * + * @throws {@link Skyvern.BadRequestError} + * @throws {@link Skyvern.ForbiddenError} + * @throws {@link Skyvern.NotFoundError} + * @throws {@link Skyvern.UnprocessableEntityError} + * + * @example + * await client.runSdkAction({ + * "x-user-agent": "x-user-agent", + * url: "url", + * action: { + * type: "ai_click" + * } + * }) + */ + public runSdkAction( + request: Skyvern.RunSdkActionRequest, + requestOptions?: SkyvernClient.RequestOptions, + ): core.HttpResponsePromise { + return core.HttpResponsePromise.fromPromise(this.__runSdkAction(request, requestOptions)); + } + + private async __runSdkAction( + request: Skyvern.RunSdkActionRequest, + requestOptions?: SkyvernClient.RequestOptions, + ): Promise> { + const { "x-user-agent": userAgent, ..._body } = request; + const _headers: core.Fetcher.Args["headers"] = mergeHeaders( + this._options?.headers, + mergeOnlyDefinedHeaders({ + "x-user-agent": userAgent != null ? userAgent : undefined, + "x-api-key": requestOptions?.apiKey ?? this._options?.apiKey, + }), + requestOptions?.headers, + ); + const _response = await core.fetcher({ + url: core.url.join( + (await core.Supplier.get(this._options.baseUrl)) ?? + (await core.Supplier.get(this._options.environment)) ?? + environments.SkyvernEnvironment.Production, + "v1/sdk/run_action", + ), + method: "POST", + headers: _headers, + contentType: "application/json", + queryParameters: requestOptions?.queryParams, + requestType: "json", + body: _body, + timeoutMs: (requestOptions?.timeoutInSeconds ?? this._options?.timeoutInSeconds ?? 60) * 1000, + maxRetries: requestOptions?.maxRetries ?? this._options?.maxRetries, + abortSignal: requestOptions?.abortSignal, + }); + if (_response.ok) { + return { data: _response.body as Skyvern.RunSdkActionResponse, rawResponse: _response.rawResponse }; + } + + if (_response.error.reason === "status-code") { + switch (_response.error.statusCode) { + case 400: + throw new Skyvern.BadRequestError(_response.error.body as unknown, _response.rawResponse); + case 403: + throw new Skyvern.ForbiddenError(_response.error.body as unknown, _response.rawResponse); + case 404: + throw new Skyvern.NotFoundError(_response.error.body as unknown, _response.rawResponse); + case 422: + throw new Skyvern.UnprocessableEntityError(_response.error.body as unknown, _response.rawResponse); + default: + throw new errors.SkyvernError({ + statusCode: _response.error.statusCode, + body: _response.error.body, + rawResponse: _response.rawResponse, + }); + } + } + + switch (_response.error.reason) { + case "non-json": + throw new errors.SkyvernError({ + statusCode: _response.error.statusCode, + body: _response.error.rawBody, + rawResponse: _response.rawResponse, + }); + case "timeout": + throw new errors.SkyvernTimeoutError("Timeout exceeded when calling POST /v1/sdk/run_action."); + case "unknown": + throw new errors.SkyvernError({ + message: _response.error.errorMessage, + rawResponse: _response.rawResponse, + }); + } + } } diff --git a/skyvern-ts/client/src/api/client/requests/RunSdkActionRequest.ts b/skyvern-ts/client/src/api/client/requests/RunSdkActionRequest.ts new file mode 100644 index 00000000..3ffc9f70 --- /dev/null +++ b/skyvern-ts/client/src/api/client/requests/RunSdkActionRequest.ts @@ -0,0 +1,27 @@ +// This file was auto-generated by Fern from our API Definition. + +import type * as Skyvern from "../../index.js"; + +/** + * @example + * { + * "x-user-agent": "x-user-agent", + * url: "url", + * action: { + * type: "ai_click" + * } + * } + */ +export interface RunSdkActionRequest { + "x-user-agent"?: string; + /** The URL where the action should be executed */ + url: string; + /** The browser session ID */ + browser_session_id?: string; + /** The browser address */ + browser_address?: string; + /** Optional workflow run ID to continue an existing workflow run */ + workflow_run_id?: string; + /** The action to execute with its specific parameters */ + action: Skyvern.SdkAction; +} diff --git a/skyvern-ts/client/src/api/client/requests/index.ts b/skyvern-ts/client/src/api/client/requests/index.ts index aca2642c..42868e3e 100644 --- a/skyvern-ts/client/src/api/client/requests/index.ts +++ b/skyvern-ts/client/src/api/client/requests/index.ts @@ -7,6 +7,7 @@ export type { GetRunArtifactsRequest } from "./GetRunArtifactsRequest.js"; export type { GetScriptsRequest } from "./GetScriptsRequest.js"; export type { GetWorkflowsRequest } from "./GetWorkflowsRequest.js"; export type { LoginRequest } from "./LoginRequest.js"; +export type { RunSdkActionRequest } from "./RunSdkActionRequest.js"; export type { RunTaskRequest } from "./RunTaskRequest.js"; export type { RunWorkflowRequest } from "./RunWorkflowRequest.js"; export type { TotpCodeCreate } from "./TotpCodeCreate.js"; diff --git a/skyvern-ts/client/src/api/types/ClickAction.ts b/skyvern-ts/client/src/api/types/ClickAction.ts new file mode 100644 index 00000000..4983ff0f --- /dev/null +++ b/skyvern-ts/client/src/api/types/ClickAction.ts @@ -0,0 +1,19 @@ +// This file was auto-generated by Fern from our API Definition. + +export interface ClickAction { + /** CSS selector for the element */ + selector?: string; + /** The intention or goal of the click */ + intention?: string; + /** Additional context data */ + data?: ClickAction.Data; + /** Timeout in milliseconds */ + timeout?: number; +} + +export namespace ClickAction { + /** + * Additional context data + */ + export type Data = string | Record; +} diff --git a/skyvern-ts/client/src/api/types/ExtractAction.ts b/skyvern-ts/client/src/api/types/ExtractAction.ts new file mode 100644 index 00000000..6382cbbc --- /dev/null +++ b/skyvern-ts/client/src/api/types/ExtractAction.ts @@ -0,0 +1,25 @@ +// This file was auto-generated by Fern from our API Definition. + +export interface ExtractAction { + /** Extraction prompt */ + prompt?: string; + /** Schema for extraction */ + extract_schema?: ExtractAction.ExtractSchema; + /** Error code mapping for extraction */ + error_code_mapping?: Record; + /** The intention or goal of the extraction */ + intention?: string; + /** Additional context data */ + data?: ExtractAction.Data; +} + +export namespace ExtractAction { + /** + * Schema for extraction + */ + export type ExtractSchema = Record | unknown[] | string; + /** + * Additional context data + */ + export type Data = string | Record; +} diff --git a/skyvern-ts/client/src/api/types/InputTextAction.ts b/skyvern-ts/client/src/api/types/InputTextAction.ts new file mode 100644 index 00000000..695615ec --- /dev/null +++ b/skyvern-ts/client/src/api/types/InputTextAction.ts @@ -0,0 +1,25 @@ +// This file was auto-generated by Fern from our API Definition. + +export interface InputTextAction { + /** CSS selector for the element */ + selector?: string; + /** Value to input */ + value?: string; + /** The intention or goal of the input */ + intention?: string; + /** Additional context data */ + data?: InputTextAction.Data; + /** TOTP identifier for input_text actions */ + totp_identifier?: string; + /** TOTP URL for input_text actions */ + totp_url?: string; + /** Timeout in milliseconds */ + timeout?: number; +} + +export namespace InputTextAction { + /** + * Additional context data + */ + export type Data = string | Record; +} diff --git a/skyvern-ts/client/src/api/types/RunSdkActionResponse.ts b/skyvern-ts/client/src/api/types/RunSdkActionResponse.ts new file mode 100644 index 00000000..c3c4352e --- /dev/null +++ b/skyvern-ts/client/src/api/types/RunSdkActionResponse.ts @@ -0,0 +1,15 @@ +// This file was auto-generated by Fern from our API Definition. + +export interface RunSdkActionResponse { + /** The workflow run ID used for this action */ + workflow_run_id: string; + /** The result from the action (e.g., selector, value, extracted data) */ + result?: RunSdkActionResponse.Result; +} + +export namespace RunSdkActionResponse { + /** + * The result from the action (e.g., selector, value, extracted data) + */ + export type Result = string | Record | unknown[] | number | boolean; +} diff --git a/skyvern-ts/client/src/api/types/SdkAction.ts b/skyvern-ts/client/src/api/types/SdkAction.ts new file mode 100644 index 00000000..2bb07cba --- /dev/null +++ b/skyvern-ts/client/src/api/types/SdkAction.ts @@ -0,0 +1,27 @@ +// This file was auto-generated by Fern from our API Definition. + +import type * as Skyvern from "../index.js"; + +export type SdkAction = + | Skyvern.SdkAction.AiClick + | Skyvern.SdkAction.AiInputText + | Skyvern.SdkAction.AiSelectOption + | Skyvern.SdkAction.Extract; + +export namespace SdkAction { + export interface AiClick extends Skyvern.ClickAction { + type: "ai_click"; + } + + export interface AiInputText extends Skyvern.InputTextAction { + type: "ai_input_text"; + } + + export interface AiSelectOption extends Skyvern.SelectOptionAction { + type: "ai_select_option"; + } + + export interface Extract extends Skyvern.ExtractAction { + type: "extract"; + } +} diff --git a/skyvern-ts/client/src/api/types/SelectOptionAction.ts b/skyvern-ts/client/src/api/types/SelectOptionAction.ts new file mode 100644 index 00000000..966ab094 --- /dev/null +++ b/skyvern-ts/client/src/api/types/SelectOptionAction.ts @@ -0,0 +1,21 @@ +// This file was auto-generated by Fern from our API Definition. + +export interface SelectOptionAction { + /** CSS selector for the element */ + selector?: string; + /** Value to select */ + value?: string; + /** The intention or goal of the selection */ + intention?: string; + /** Additional context data */ + data?: SelectOptionAction.Data; + /** Timeout in milliseconds */ + timeout?: number; +} + +export namespace SelectOptionAction { + /** + * Additional context data + */ + export type Data = string | Record; +} diff --git a/skyvern-ts/client/src/api/types/index.ts b/skyvern-ts/client/src/api/types/index.ts index 26eca585..07bbca99 100644 --- a/skyvern-ts/client/src/api/types/index.ts +++ b/skyvern-ts/client/src/api/types/index.ts @@ -19,6 +19,7 @@ export * from "./BitwardenSensitiveInformationParameter.js"; export * from "./BitwardenSensitiveInformationParameterYaml.js"; export * from "./BlockType.js"; export * from "./BrowserSessionResponse.js"; +export * from "./ClickAction.js"; export * from "./CodeBlock.js"; export * from "./CodeBlockParametersItem.js"; export * from "./CodeBlockYaml.js"; @@ -33,6 +34,7 @@ export * from "./CredentialTypeOutput.js"; export * from "./CreditCardCredentialResponse.js"; export * from "./DownloadToS3Block.js"; export * from "./DownloadToS3BlockYaml.js"; +export * from "./ExtractAction.js"; export * from "./ExtractionBlock.js"; export * from "./ExtractionBlockParametersItem.js"; export * from "./ExtractionBlockYaml.js"; @@ -62,6 +64,7 @@ export * from "./HumanInteractionBlock.js"; export * from "./HumanInteractionBlockParametersItem.js"; export * from "./HumanInteractionBlockYaml.js"; export * from "./InputOrSelectContext.js"; +export * from "./InputTextAction.js"; export * from "./LoginBlock.js"; export * from "./LoginBlockParametersItem.js"; export * from "./LoginBlockYaml.js"; @@ -80,11 +83,14 @@ export * from "./PdfParserBlock.js"; export * from "./PdfParserBlockYaml.js"; export * from "./ProxyLocation.js"; export * from "./RunEngine.js"; +export * from "./RunSdkActionResponse.js"; export * from "./RunStatus.js"; export * from "./Script.js"; export * from "./ScriptFileCreate.js"; export * from "./ScriptRunResponse.js"; +export * from "./SdkAction.js"; export * from "./SelectOption.js"; +export * from "./SelectOptionAction.js"; export * from "./SendEmailBlock.js"; export * from "./SendEmailBlockYaml.js"; export * from "./SkyvernForgeSdkSchemasCredentialsCredentialType.js"; diff --git a/skyvern-ts/client/tests/wire/main.test.ts b/skyvern-ts/client/tests/wire/main.test.ts index 6a5e178b..7fc0a08a 100644 --- a/skyvern-ts/client/tests/wire/main.test.ts +++ b/skyvern-ts/client/tests/wire/main.test.ts @@ -2429,4 +2429,128 @@ describe("SkyvernClient", () => { }); }).rejects.toThrow(Skyvern.UnprocessableEntityError); }); + + test("run_sdk_action (1)", async () => { + const server = mockServerPool.createServer(); + const client = new SkyvernClient({ apiKey: "test", environment: server.baseUrl }); + const rawRequestBody = { url: "url", action: { type: "ai_click" } }; + const rawResponseBody = { workflow_run_id: "workflow_run_id", result: "result" }; + server + .mockEndpoint() + .post("/v1/sdk/run_action") + .header("x-user-agent", "x-user-agent") + .jsonBody(rawRequestBody) + .respondWith() + .statusCode(200) + .jsonBody(rawResponseBody) + .build(); + + const response = await client.runSdkAction({ + "x-user-agent": "x-user-agent", + url: "url", + action: { + type: "ai_click", + }, + }); + expect(response).toEqual({ + workflow_run_id: "workflow_run_id", + result: "result", + }); + }); + + test("run_sdk_action (2)", async () => { + const server = mockServerPool.createServer(); + const client = new SkyvernClient({ apiKey: "test", environment: server.baseUrl }); + const rawRequestBody = { url: "url", action: { type: "ai_click" } }; + const rawResponseBody = { key: "value" }; + server + .mockEndpoint() + .post("/v1/sdk/run_action") + .jsonBody(rawRequestBody) + .respondWith() + .statusCode(400) + .jsonBody(rawResponseBody) + .build(); + + await expect(async () => { + return await client.runSdkAction({ + url: "url", + action: { + type: "ai_click", + }, + }); + }).rejects.toThrow(Skyvern.BadRequestError); + }); + + test("run_sdk_action (3)", async () => { + const server = mockServerPool.createServer(); + const client = new SkyvernClient({ apiKey: "test", environment: server.baseUrl }); + const rawRequestBody = { url: "url", action: { type: "ai_click" } }; + const rawResponseBody = { key: "value" }; + server + .mockEndpoint() + .post("/v1/sdk/run_action") + .jsonBody(rawRequestBody) + .respondWith() + .statusCode(403) + .jsonBody(rawResponseBody) + .build(); + + await expect(async () => { + return await client.runSdkAction({ + url: "url", + action: { + type: "ai_click", + }, + }); + }).rejects.toThrow(Skyvern.ForbiddenError); + }); + + test("run_sdk_action (4)", async () => { + const server = mockServerPool.createServer(); + const client = new SkyvernClient({ apiKey: "test", environment: server.baseUrl }); + const rawRequestBody = { url: "url", action: { type: "ai_click" } }; + const rawResponseBody = { key: "value" }; + server + .mockEndpoint() + .post("/v1/sdk/run_action") + .jsonBody(rawRequestBody) + .respondWith() + .statusCode(404) + .jsonBody(rawResponseBody) + .build(); + + await expect(async () => { + return await client.runSdkAction({ + url: "url", + action: { + type: "ai_click", + }, + }); + }).rejects.toThrow(Skyvern.NotFoundError); + }); + + test("run_sdk_action (5)", async () => { + const server = mockServerPool.createServer(); + const client = new SkyvernClient({ apiKey: "test", environment: server.baseUrl }); + const rawRequestBody = { url: "url", action: { type: "ai_click" } }; + const rawResponseBody = { key: "value" }; + server + .mockEndpoint() + .post("/v1/sdk/run_action") + .jsonBody(rawRequestBody) + .respondWith() + .statusCode(422) + .jsonBody(rawResponseBody) + .build(); + + await expect(async () => { + return await client.runSdkAction({ + url: "url", + action: { + type: "ai_click", + }, + }); + }).rejects.toThrow(Skyvern.UnprocessableEntityError); + }); }); diff --git a/skyvern/client/__init__.py b/skyvern/client/__init__.py index 8048b095..b2b87372 100644 --- a/skyvern/client/__init__.py +++ b/skyvern/client/__init__.py @@ -265,6 +265,7 @@ if typing.TYPE_CHECKING: ProxyLocation, RunEngine, RunSdkActionResponse, + RunSdkActionResponseResult, RunStatus, Script, ScriptFileCreate, @@ -727,6 +728,7 @@ _dynamic_imports: typing.Dict[str, str] = { "ProxyLocation": ".types", "RunEngine": ".types", "RunSdkActionResponse": ".types", + "RunSdkActionResponseResult": ".types", "RunStatus": ".types", "Script": ".types", "ScriptFileCreate": ".types", @@ -1212,6 +1214,7 @@ __all__ = [ "ProxyLocation", "RunEngine", "RunSdkActionResponse", + "RunSdkActionResponseResult", "RunStatus", "Script", "ScriptFileCreate", diff --git a/skyvern/client/raw_client.py b/skyvern/client/raw_client.py index feb8b1ea..87193ec5 100644 --- a/skyvern/client/raw_client.py +++ b/skyvern/client/raw_client.py @@ -2146,6 +2146,17 @@ class RawSkyvern: ), ), ) + if _response.status_code == 404: + raise NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + typing.Optional[typing.Any], + parse_obj_as( + type_=typing.Optional[typing.Any], # type: ignore + object_=_response.json(), + ), + ), + ) if _response.status_code == 422: raise UnprocessableEntityError( headers=dict(_response.headers), @@ -4267,6 +4278,17 @@ class AsyncRawSkyvern: ), ), ) + if _response.status_code == 404: + raise NotFoundError( + headers=dict(_response.headers), + body=typing.cast( + typing.Optional[typing.Any], + parse_obj_as( + type_=typing.Optional[typing.Any], # type: ignore + object_=_response.json(), + ), + ), + ) if _response.status_code == 422: raise UnprocessableEntityError( headers=dict(_response.headers), diff --git a/skyvern/client/types/__init__.py b/skyvern/client/types/__init__.py index 44001b53..0310da7b 100644 --- a/skyvern/client/types/__init__.py +++ b/skyvern/client/types/__init__.py @@ -290,6 +290,7 @@ if typing.TYPE_CHECKING: from .proxy_location import ProxyLocation from .run_engine import RunEngine from .run_sdk_action_response import RunSdkActionResponse + from .run_sdk_action_response_result import RunSdkActionResponseResult from .run_status import RunStatus from .script import Script from .script_file_create import ScriptFileCreate @@ -762,6 +763,7 @@ _dynamic_imports: typing.Dict[str, str] = { "ProxyLocation": ".proxy_location", "RunEngine": ".run_engine", "RunSdkActionResponse": ".run_sdk_action_response", + "RunSdkActionResponseResult": ".run_sdk_action_response_result", "RunStatus": ".run_status", "Script": ".script", "ScriptFileCreate": ".script_file_create", @@ -1238,6 +1240,7 @@ __all__ = [ "ProxyLocation", "RunEngine", "RunSdkActionResponse", + "RunSdkActionResponseResult", "RunStatus", "Script", "ScriptFileCreate", diff --git a/skyvern/client/types/run_sdk_action_response.py b/skyvern/client/types/run_sdk_action_response.py index 065f19a8..845d393f 100644 --- a/skyvern/client/types/run_sdk_action_response.py +++ b/skyvern/client/types/run_sdk_action_response.py @@ -4,6 +4,7 @@ import typing import pydantic from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel +from .run_sdk_action_response_result import RunSdkActionResponseResult class RunSdkActionResponse(UniversalBaseModel): @@ -12,7 +13,7 @@ class RunSdkActionResponse(UniversalBaseModel): The workflow run ID used for this action """ - result: typing.Optional[typing.Optional[typing.Any]] = pydantic.Field(default=None) + result: typing.Optional[RunSdkActionResponseResult] = pydantic.Field(default=None) """ The result from the action (e.g., selector, value, extracted data) """ diff --git a/skyvern/client/types/run_sdk_action_response_result.py b/skyvern/client/types/run_sdk_action_response_result.py new file mode 100644 index 00000000..b771d251 --- /dev/null +++ b/skyvern/client/types/run_sdk_action_response_result.py @@ -0,0 +1,7 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +RunSdkActionResponseResult = typing.Union[ + str, typing.Dict[str, typing.Optional[typing.Any]], typing.List[typing.Optional[typing.Any]], float, bool +] diff --git a/skyvern/forge/sdk/routes/sdk.py b/skyvern/forge/sdk/routes/sdk.py index bfb9ecbb..c6a2afbe 100644 --- a/skyvern/forge/sdk/routes/sdk.py +++ b/skyvern/forge/sdk/routes/sdk.py @@ -1,4 +1,4 @@ -import json +from typing import Any import structlog from fastapi import Depends, HTTPException, status @@ -95,8 +95,8 @@ async def run_sdk_action( task = await app.DATABASE.create_task( organization_id=organization_id, url=action_request.url, - navigation_goal=None, - navigation_payload=None, + navigation_goal=action.intention, + navigation_payload=action.data, data_extraction_goal=None, title=f"SDK Action Task: {action_request.action.type}", workflow_run_id=workflow_run.workflow_run_id, @@ -118,6 +118,18 @@ async def run_sdk_action( task_id=task.task_id, ) + await app.WORKFLOW_CONTEXT_MANAGER.initialize_workflow_run_context( + organization, + workflow_run.workflow_run_id, + workflow.title, + workflow.workflow_id, + workflow.workflow_permanent_id, + [], + [], + [], + [], + ) + context = skyvern_context.ensure_context() skyvern_context.set( SkyvernContext( @@ -131,7 +143,7 @@ async def run_sdk_action( workflow_run_id=workflow_run.workflow_run_id, ) ) - result = None + result: Any | None = None try: scraped_page = await SkyvernPage.create_scraped_page(browser_session_id=browser_session_id) page = await scraped_page._browser_state.must_get_working_page() @@ -170,7 +182,7 @@ async def run_sdk_action( intention=action.intention, data=action.data, ) - result = json.dumps(extract_result) + result = extract_result finally: skyvern_context.reset() diff --git a/skyvern/library/SdkSkyvernPageAi.py b/skyvern/library/SdkSkyvernPageAi.py index c3a41298..8c766b7c 100644 --- a/skyvern/library/SdkSkyvernPageAi.py +++ b/skyvern/library/SdkSkyvernPageAi.py @@ -2,14 +2,9 @@ from typing import TYPE_CHECKING, Any from playwright.async_api import Page +from skyvern.client import SdkAction_AiClick, SdkAction_AiInputText, SdkAction_AiSelectOption, SdkAction_Extract from skyvern.config import settings from skyvern.core.script_generations.skyvern_page_ai import SkyvernPageAi -from skyvern.forge.sdk.schemas.sdk_actions import ( - ClickAction, - ExtractAction, - InputTextAction, - SelectOptionAction, -) if TYPE_CHECKING: from skyvern.library.skyvern_browser import SkyvernBrowser @@ -35,18 +30,17 @@ class SdkSkyvernPageAi(SkyvernPageAi): ) -> str: """Click an element using AI via API call.""" - action = ClickAction( - selector=selector, - intention=intention, - data=data, - timeout=timeout, - ) response = await self._browser.client.run_sdk_action( url=self._page.url, browser_session_id=self._browser.browser_session_id, browser_address=self._browser.browser_address, workflow_run_id=self._browser.workflow_run_id, - action=action, + action=SdkAction_AiClick( + selector=selector, + intention=intention, + data=data, + timeout=timeout, + ), ) self._browser.workflow_run_id = response.workflow_run_id return response.result if response.result else selector @@ -65,7 +59,7 @@ class SdkSkyvernPageAi(SkyvernPageAi): response = await self._browser.client.run_sdk_action( url=self._page.url, - action=InputTextAction( + action=SdkAction_AiInputText( selector=selector, value=value, intention=intention, @@ -93,7 +87,7 @@ class SdkSkyvernPageAi(SkyvernPageAi): response = await self._browser.client.run_sdk_action( url=self._page.url, - action=SelectOptionAction( + action=SdkAction_AiSelectOption( selector=selector, value=value, intention=intention, @@ -129,7 +123,7 @@ class SdkSkyvernPageAi(SkyvernPageAi): response = await self._browser.client.run_sdk_action( url=self._page.url, - action=ExtractAction( + action=SdkAction_Extract( prompt=prompt, extract_schema=schema, error_code_mapping=error_code_mapping, diff --git a/skyvern/library/skyvern_browser_page.py b/skyvern/library/skyvern_browser_page.py index 44785742..de559398 100644 --- a/skyvern/library/skyvern_browser_page.py +++ b/skyvern/library/skyvern_browser_page.py @@ -1,5 +1,5 @@ import asyncio -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, overload from playwright.async_api import Page @@ -230,24 +230,64 @@ class SkyvernBrowserPage: self._ai = SdkSkyvernPageAi(browser, page) self.run = SkyvernPageRun(browser, page) + @overload + async def click( + self, + selector: str, + *, + prompt: str | None = None, + ai: str | None = "fallback", + data: str | dict[str, Any] | None = None, + timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, + ) -> str | None: ... + + @overload async def click( self, *, + prompt: str, + ai: str | None = "fallback", + data: str | dict[str, Any] | None = None, + timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, + ) -> str | None: ... + + async def click( + self, selector: str | None = None, - intention: str | None = None, + *, + prompt: str | None = None, ai: str | None = "fallback", data: str | dict[str, Any] | None = None, timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, ) -> str | None: - """Click an element identified by ``selector``. + """Click an element using a CSS selector, AI-powered prompt matching, or both. - When ``intention`` and ``data`` are provided a new click action is - generated via the ``single-click-action`` prompt. The model returns a - fresh "xpath=..." selector based on the current DOM and the updated data for this run. - The browser then clicks the element using this newly generated xpath selector. + This method supports three modes: + - **Selector-based**: Click the element matching the CSS selector + - **AI-powered**: Use natural language to describe which element to click + - **Fallback mode** (default): Try the selector first, fall back to AI if it fails - If the prompt generation or parsing fails for any reason we fall back to - clicking the originally supplied ``selector``. + Args: + selector: CSS selector for the target element. + prompt: Natural language description of which element to click. + ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI. + data: Additional context data for AI processing. + timeout: Maximum time to wait for the click action in milliseconds. + + Returns: + The selector string that was successfully used to click the element, or None. + + Examples: + ```python + # Click using a CSS selector + await page.click("#open-invoice-button") + + # Click using AI with natural language + await page.click(prompt="Click on the 'Open Invoice' button") + + # Try selector first, fall back to AI if selector fails + await page.click("#open-invoice-button", prompt="Click on the 'Open Invoice' button") + ``` """ if ai == "fallback": @@ -262,10 +302,10 @@ class SkyvernBrowserPage: error_to_raise = e # if the original selector doesn't work, try to click the element with the ai generated selector - if intention: + if prompt: return await self._ai.ai_click( selector=selector or "", - intention=intention, + intention=prompt, data=data, timeout=timeout, ) @@ -274,10 +314,10 @@ class SkyvernBrowserPage: else: return selector elif ai == "proactive": - if intention: + if prompt: return await self._ai.ai_click( selector=selector or "", - intention=intention, + intention=prompt, data=data, timeout=timeout, ) @@ -287,6 +327,244 @@ class SkyvernBrowserPage: await locator.click(timeout=timeout) return selector + @overload + async def fill( + self, + selector: str, + value: str, + *, + prompt: str | None = None, + ai: str | None = "fallback", + data: str | dict[str, Any] | None = None, + timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, + totp_identifier: str | None = None, + totp_url: str | None = None, + ) -> str: ... + + @overload + async def fill( + self, + *, + prompt: str, + value: str | None = None, + selector: str | None = None, + ai: str | None = "fallback", + data: str | dict[str, Any] | None = None, + timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, + totp_identifier: str | None = None, + totp_url: str | None = None, + ) -> str: ... + + async def fill( + self, + selector: str | None = None, + value: str | None = None, + *, + prompt: str | None = None, + ai: str | None = "fallback", + data: str | dict[str, Any] | None = None, + timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, + totp_identifier: str | None = None, + totp_url: str | None = None, + ) -> str: + """Fill an input field using a CSS selector, AI-powered prompt matching, or both. + + This method supports three modes: + - **Selector-based**: Fill the input field with a value using CSS selector + - **AI-powered**: Use natural language prompt (AI extracts value from prompt) + - **Fallback mode** (default): Try the selector first, fall back to AI if it fails + + Args: + selector: CSS selector for the target input element. + value: The text value to input into the field. + prompt: Natural language description of which field to fill and what value. + ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI. + data: Additional context data for AI processing. + timeout: Maximum time to wait for the fill action in milliseconds. + totp_identifier: TOTP identifier for time-based one-time password fields. + totp_url: URL to fetch TOTP codes from for authentication. + + Returns: + The value that was successfully filled into the field. + + Examples: + ```python + # Fill using selector and value (both positional) + await page.fill("#email-input", "user@example.com") + + # Fill using AI with natural language (prompt only) + await page.fill(prompt="Fill 'user@example.com' in the email address field") + + # Try selector first, fall back to AI if selector fails + await page.fill( + "#email-input", + "user@example.com", + prompt="Fill the email address with user@example.com" + ) + ``` + """ + return await self._input_text( + selector=selector or "", + value=value or "", + ai=ai, + intention=prompt, + data=data, + timeout=timeout, + totp_identifier=totp_identifier, + totp_url=totp_url, + ) + + async def goto(self, url: str, **kwargs: Any) -> None: + """Navigate to the given URL. + + Args: + url: URL to navigate page to. + **kwargs: Additional options like timeout, wait_until, referer, etc. + """ + await self._page.goto(url, **kwargs) + + async def type(self, selector: str, text: str, **kwargs: Any) -> None: + """Type text into an element character by character. + + Args: + selector: A selector to search for an element to type into. + text: Text to type into the element. + **kwargs: Additional options like delay, timeout, no_wait_after, etc. + """ + await self._page.type(selector, text, **kwargs) + + @overload + async def select_option( + self, + selector: str, + value: str, + *, + prompt: str | None = None, + ai: str | None = "fallback", + data: str | dict[str, Any] | None = None, + timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, + ) -> str: ... + + @overload + async def select_option( + self, + *, + prompt: str, + value: str | None = None, + selector: str | None = None, + ai: str | None = "fallback", + data: str | dict[str, Any] | None = None, + timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, + ) -> str: ... + + async def select_option( + self, + selector: str | None = None, + value: str | None = None, + *, + prompt: str | None = None, + ai: str | None = "fallback", + data: str | dict[str, Any] | None = None, + timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, + ) -> str: + """Select an option from a dropdown using a CSS selector, AI-powered prompt matching, or both. + + This method supports three modes: + - **Selector-based**: Select the option with a value using CSS selector + - **AI-powered**: Use natural language prompt (AI extracts value from prompt) + - **Fallback mode** (default): Try the selector first, fall back to AI if it fails + + Args: + selector: CSS selector for the target select/dropdown element. + value: The option value to select. + prompt: Natural language description of which option to select. + ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI. + data: Additional context data for AI processing. + timeout: Maximum time to wait for the select action in milliseconds. + + Returns: + The value that was successfully selected. + + Examples: + ```python + # Select using selector and value (both positional) + await page.select_option("#country", "us") + + # Select using AI with natural language (prompt only) + await page.select_option(prompt="Select 'United States' from the country dropdown") + + # Try selector first, fall back to AI if selector fails + await page.select_option( + "#country", + "us", + prompt="Select United States from country" + ) + ``` + """ + value = value or "" + if ai == "fallback": + error_to_raise = None + if selector: + try: + locator = self._page.locator(selector) + await locator.select_option(value, timeout=timeout) + return value + except Exception as e: + error_to_raise = e + if prompt: + return await self._ai.ai_select_option( + selector=selector or "", + value=value, + intention=prompt, + data=data, + timeout=timeout, + ) + if error_to_raise: + raise error_to_raise + else: + return value + elif ai == "proactive" and prompt: + return await self._ai.ai_select_option( + selector=selector or "", + value=value, + intention=prompt, + data=data, + timeout=timeout, + ) + if selector: + locator = self._page.locator(selector) + await locator.select_option(value, timeout=timeout) + return value + + async def extract( + self, + prompt: str, + schema: dict[str, Any] | list | str | None = None, + error_code_mapping: dict[str, str] | None = None, + intention: str | None = None, + data: str | dict[str, Any] | None = None, + ) -> dict[str, Any] | list | str | None: + return await self._ai.ai_extract(prompt, schema, error_code_mapping, intention, data) + + async def reload(self, **kwargs: Any) -> None: + """Reload the current page. + + Args: + **kwargs: Additional options like timeout, wait_until, etc. + """ + await self._page.reload(**kwargs) + + async def screenshot(self, **kwargs: Any) -> bytes: + """Take a screenshot of the page. + + Args: + **kwargs: Additional options like path, full_page, clip, type, quality, etc. + + Returns: + bytes: The screenshot as bytes (unless path is specified, then saves to file). + """ + return await self._page.screenshot(**kwargs) + async def _input_text( self, selector: str, @@ -346,76 +624,3 @@ class SkyvernBrowserPage: locator = self._page.locator(selector) await handler_utils.input_sequentially(locator, value, timeout=timeout) return value - - async def fill( - self, - selector: str, - value: str, - ai: str | None = "fallback", - intention: str | None = None, - data: str | dict[str, Any] | None = None, - timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, - totp_identifier: str | None = None, - totp_url: str | None = None, - ) -> str: - return await self._input_text( - selector=selector, - value=value, - ai=ai, - intention=intention, - data=data, - timeout=timeout, - totp_identifier=totp_identifier, - totp_url=totp_url, - ) - - async def goto(self, url: str, **kwargs: Any) -> None: - """Navigate to the given URL. - - Args: - url: URL to navigate page to. - **kwargs: Additional options like timeout, wait_until, referer, etc. - """ - await self._page.goto(url, **kwargs) - - async def type(self, selector: str, text: str, **kwargs: Any) -> None: - """Type text into an element character by character. - - Args: - selector: A selector to search for an element to type into. - text: Text to type into the element. - **kwargs: Additional options like delay, timeout, no_wait_after, etc. - """ - await self._page.type(selector, text, **kwargs) - - async def select_option(self, selector: str, value: Any = None, **kwargs: Any) -> list[str]: - """Select option(s) in a