SDK: support select_option and extract (#3850)

2025-10-30 09:05:20 -06:00
parent ac069838c7
commit af9a5f31e4
21 changed files with 774 additions and 124 deletions
--- a/fern/openapi/skyvern_openapi.json
+++ b/fern/openapi/skyvern_openapi.json
@@ -2317,6 +2317,9 @@
          "403": {
            "description": "Unauthorized - Invalid or missing authentication"
          },
+          "404": {
+            "description": "Workflow run or workflow not found"
+          },
          "400": {
            "description": "Invalid operation"
          },
@@ -9733,6 +9736,26 @@
            "description": "The workflow run ID used for this action"
          },
          "result": {
+            "anyOf": [
+              {
+                "type": "string"
+              },
+              {
+                "type": "object"
+              },
+              {
+                "type": "array"
+              },
+              {
+                "type": "number"
+              },
+              {
+                "type": "boolean"
+              },
+              {
+                "type": "null"
+              }
+            ],
            "title": "Result",
            "description": "The result from the action (e.g., selector, value, extracted data)"
          }
--- a/skyvern-ts/client/reference.md
+++ b/skyvern-ts/client/reference.md
@@ -1,5 +1,5 @@
 # Reference
-<details><summary><code>client.<a href="/src/Client.ts">deployScript</a>(scriptId, { ...params }) -> Skyvern.CreateScriptResponse</code></summary>
+<details><summary><code>client.<a href="/src/Client.ts">runSdkAction</a>({ ...params }) -> Skyvern.RunSdkActionResponse</code></summary>
 <dl>
 <dd>

@@ -11,7 +11,7 @@
 <dl>
 <dd>

-Deploy a script with updated files, creating a new version
+Execute a single SDK action with the specified parameters
 </dd>
 </dl>
 </dd>
@@ -26,11 +26,12 @@ Deploy a script with updated files, creating a new version
 <dd>

 ```typescript
-await client.deployScript("s_abc123", {
-    files: [{
-            path: "src/main.py",
-            content: "content"
-        }]
+await client.runSdkAction({
+    "x-user-agent": "x-user-agent",
+    url: "url",
+    action: {
+        type: "ai_click"
+    }
 });

 ```
@@ -47,15 +48,7 @@ await client.deployScript("s_abc123", {
 <dl>
 <dd>

-**scriptId:** `string` — The unique identifier of the script
-    
-</dd>
-</dl>
-
-<dl>
-<dd>
-
-**request:** `Skyvern.DeployScriptRequest` 
+**request:** `Skyvern.RunSdkActionRequest` 
    
 </dd>
 </dl>
--- a/skyvern-ts/client/src/Client.ts
+++ b/skyvern-ts/client/src/Client.ts
@@ -2202,4 +2202,101 @@ export class SkyvernClient {
                });
        }
    }
+
+    /**
+     * Execute a single SDK action with the specified parameters
+     *
+     * @param {Skyvern.RunSdkActionRequest} request
+     * @param {SkyvernClient.RequestOptions} requestOptions - Request-specific configuration.
+     *
+     * @throws {@link Skyvern.BadRequestError}
+     * @throws {@link Skyvern.ForbiddenError}
+     * @throws {@link Skyvern.NotFoundError}
+     * @throws {@link Skyvern.UnprocessableEntityError}
+     *
+     * @example
+     *     await client.runSdkAction({
+     *         "x-user-agent": "x-user-agent",
+     *         url: "url",
+     *         action: {
+     *             type: "ai_click"
+     *         }
+     *     })
+     */
+    public runSdkAction(
+        request: Skyvern.RunSdkActionRequest,
+        requestOptions?: SkyvernClient.RequestOptions,
+    ): core.HttpResponsePromise<Skyvern.RunSdkActionResponse> {
+        return core.HttpResponsePromise.fromPromise(this.__runSdkAction(request, requestOptions));
+    }
+
+    private async __runSdkAction(
+        request: Skyvern.RunSdkActionRequest,
+        requestOptions?: SkyvernClient.RequestOptions,
+    ): Promise<core.WithRawResponse<Skyvern.RunSdkActionResponse>> {
+        const { "x-user-agent": userAgent, ..._body } = request;
+        const _headers: core.Fetcher.Args["headers"] = mergeHeaders(
+            this._options?.headers,
+            mergeOnlyDefinedHeaders({
+                "x-user-agent": userAgent != null ? userAgent : undefined,
+                "x-api-key": requestOptions?.apiKey ?? this._options?.apiKey,
+            }),
+            requestOptions?.headers,
+        );
+        const _response = await core.fetcher({
+            url: core.url.join(
+                (await core.Supplier.get(this._options.baseUrl)) ??
+                    (await core.Supplier.get(this._options.environment)) ??
+                    environments.SkyvernEnvironment.Production,
+                "v1/sdk/run_action",
+            ),
+            method: "POST",
+            headers: _headers,
+            contentType: "application/json",
+            queryParameters: requestOptions?.queryParams,
+            requestType: "json",
+            body: _body,
+            timeoutMs: (requestOptions?.timeoutInSeconds ?? this._options?.timeoutInSeconds ?? 60) * 1000,
+            maxRetries: requestOptions?.maxRetries ?? this._options?.maxRetries,
+            abortSignal: requestOptions?.abortSignal,
+        });
+        if (_response.ok) {
+            return { data: _response.body as Skyvern.RunSdkActionResponse, rawResponse: _response.rawResponse };
+        }
+
+        if (_response.error.reason === "status-code") {
+            switch (_response.error.statusCode) {
+                case 400:
+                    throw new Skyvern.BadRequestError(_response.error.body as unknown, _response.rawResponse);
+                case 403:
+                    throw new Skyvern.ForbiddenError(_response.error.body as unknown, _response.rawResponse);
+                case 404:
+                    throw new Skyvern.NotFoundError(_response.error.body as unknown, _response.rawResponse);
+                case 422:
+                    throw new Skyvern.UnprocessableEntityError(_response.error.body as unknown, _response.rawResponse);
+                default:
+                    throw new errors.SkyvernError({
+                        statusCode: _response.error.statusCode,
+                        body: _response.error.body,
+                        rawResponse: _response.rawResponse,
+                    });
+            }
+        }
+
+        switch (_response.error.reason) {
+            case "non-json":
+                throw new errors.SkyvernError({
+                    statusCode: _response.error.statusCode,
+                    body: _response.error.rawBody,
+                    rawResponse: _response.rawResponse,
+                });
+            case "timeout":
+                throw new errors.SkyvernTimeoutError("Timeout exceeded when calling POST /v1/sdk/run_action.");
+            case "unknown":
+                throw new errors.SkyvernError({
+                    message: _response.error.errorMessage,
+                    rawResponse: _response.rawResponse,
+                });
+        }
+    }
 }
--- a/skyvern-ts/client/src/api/client/requests/RunSdkActionRequest.ts
+++ b/skyvern-ts/client/src/api/client/requests/RunSdkActionRequest.ts
@@ -0,0 +1,27 @@
+// This file was auto-generated by Fern from our API Definition.
+
+import type * as Skyvern from "../../index.js";
+
+/**
+ * @example
+ *     {
+ *         "x-user-agent": "x-user-agent",
+ *         url: "url",
+ *         action: {
+ *             type: "ai_click"
+ *         }
+ *     }
+ */
+export interface RunSdkActionRequest {
+    "x-user-agent"?: string;
+    /** The URL where the action should be executed */
+    url: string;
+    /** The browser session ID */
+    browser_session_id?: string;
+    /** The browser address */
+    browser_address?: string;
+    /** Optional workflow run ID to continue an existing workflow run */
+    workflow_run_id?: string;
+    /** The action to execute with its specific parameters */
+    action: Skyvern.SdkAction;
+}
--- a/skyvern-ts/client/src/api/client/requests/index.ts
+++ b/skyvern-ts/client/src/api/client/requests/index.ts
@@ -7,6 +7,7 @@ export type { GetRunArtifactsRequest } from "./GetRunArtifactsRequest.js";
 export type { GetScriptsRequest } from "./GetScriptsRequest.js";
 export type { GetWorkflowsRequest } from "./GetWorkflowsRequest.js";
 export type { LoginRequest } from "./LoginRequest.js";
+export type { RunSdkActionRequest } from "./RunSdkActionRequest.js";
 export type { RunTaskRequest } from "./RunTaskRequest.js";
 export type { RunWorkflowRequest } from "./RunWorkflowRequest.js";
 export type { TotpCodeCreate } from "./TotpCodeCreate.js";
--- a/skyvern-ts/client/src/api/types/ClickAction.ts
+++ b/skyvern-ts/client/src/api/types/ClickAction.ts
@@ -0,0 +1,19 @@
+// This file was auto-generated by Fern from our API Definition.
+
+export interface ClickAction {
+    /** CSS selector for the element */
+    selector?: string;
+    /** The intention or goal of the click */
+    intention?: string;
+    /** Additional context data */
+    data?: ClickAction.Data;
+    /** Timeout in milliseconds */
+    timeout?: number;
+}
+
+export namespace ClickAction {
+    /**
+     * Additional context data
+     */
+    export type Data = string | Record<string, unknown>;
+}
--- a/skyvern-ts/client/src/api/types/ExtractAction.ts
+++ b/skyvern-ts/client/src/api/types/ExtractAction.ts
@@ -0,0 +1,25 @@
+// This file was auto-generated by Fern from our API Definition.
+
+export interface ExtractAction {
+    /** Extraction prompt */
+    prompt?: string;
+    /** Schema for extraction */
+    extract_schema?: ExtractAction.ExtractSchema;
+    /** Error code mapping for extraction */
+    error_code_mapping?: Record<string, string | undefined>;
+    /** The intention or goal of the extraction */
+    intention?: string;
+    /** Additional context data */
+    data?: ExtractAction.Data;
+}
+
+export namespace ExtractAction {
+    /**
+     * Schema for extraction
+     */
+    export type ExtractSchema = Record<string, unknown> | unknown[] | string;
+    /**
+     * Additional context data
+     */
+    export type Data = string | Record<string, unknown>;
+}
--- a/skyvern-ts/client/src/api/types/InputTextAction.ts
+++ b/skyvern-ts/client/src/api/types/InputTextAction.ts
@@ -0,0 +1,25 @@
+// This file was auto-generated by Fern from our API Definition.
+
+export interface InputTextAction {
+    /** CSS selector for the element */
+    selector?: string;
+    /** Value to input */
+    value?: string;
+    /** The intention or goal of the input */
+    intention?: string;
+    /** Additional context data */
+    data?: InputTextAction.Data;
+    /** TOTP identifier for input_text actions */
+    totp_identifier?: string;
+    /** TOTP URL for input_text actions */
+    totp_url?: string;
+    /** Timeout in milliseconds */
+    timeout?: number;
+}
+
+export namespace InputTextAction {
+    /**
+     * Additional context data
+     */
+    export type Data = string | Record<string, unknown>;
+}
--- a/skyvern-ts/client/src/api/types/RunSdkActionResponse.ts
+++ b/skyvern-ts/client/src/api/types/RunSdkActionResponse.ts
@@ -0,0 +1,15 @@
+// This file was auto-generated by Fern from our API Definition.
+
+export interface RunSdkActionResponse {
+    /** The workflow run ID used for this action */
+    workflow_run_id: string;
+    /** The result from the action (e.g., selector, value, extracted data) */
+    result?: RunSdkActionResponse.Result;
+}
+
+export namespace RunSdkActionResponse {
+    /**
+     * The result from the action (e.g., selector, value, extracted data)
+     */
+    export type Result = string | Record<string, unknown> | unknown[] | number | boolean;
+}
--- a/skyvern-ts/client/src/api/types/SdkAction.ts
+++ b/skyvern-ts/client/src/api/types/SdkAction.ts
@@ -0,0 +1,27 @@
+// This file was auto-generated by Fern from our API Definition.
+
+import type * as Skyvern from "../index.js";
+
+export type SdkAction =
+    | Skyvern.SdkAction.AiClick
+    | Skyvern.SdkAction.AiInputText
+    | Skyvern.SdkAction.AiSelectOption
+    | Skyvern.SdkAction.Extract;
+
+export namespace SdkAction {
+    export interface AiClick extends Skyvern.ClickAction {
+        type: "ai_click";
+    }
+
+    export interface AiInputText extends Skyvern.InputTextAction {
+        type: "ai_input_text";
+    }
+
+    export interface AiSelectOption extends Skyvern.SelectOptionAction {
+        type: "ai_select_option";
+    }
+
+    export interface Extract extends Skyvern.ExtractAction {
+        type: "extract";
+    }
+}
--- a/skyvern-ts/client/src/api/types/SelectOptionAction.ts
+++ b/skyvern-ts/client/src/api/types/SelectOptionAction.ts
@@ -0,0 +1,21 @@
+// This file was auto-generated by Fern from our API Definition.
+
+export interface SelectOptionAction {
+    /** CSS selector for the element */
+    selector?: string;
+    /** Value to select */
+    value?: string;
+    /** The intention or goal of the selection */
+    intention?: string;
+    /** Additional context data */
+    data?: SelectOptionAction.Data;
+    /** Timeout in milliseconds */
+    timeout?: number;
+}
+
+export namespace SelectOptionAction {
+    /**
+     * Additional context data
+     */
+    export type Data = string | Record<string, unknown>;
+}
--- a/skyvern-ts/client/src/api/types/index.ts
+++ b/skyvern-ts/client/src/api/types/index.ts
@@ -19,6 +19,7 @@ export * from "./BitwardenSensitiveInformationParameter.js";
 export * from "./BitwardenSensitiveInformationParameterYaml.js";
 export * from "./BlockType.js";
 export * from "./BrowserSessionResponse.js";
+export * from "./ClickAction.js";
 export * from "./CodeBlock.js";
 export * from "./CodeBlockParametersItem.js";
 export * from "./CodeBlockYaml.js";
@@ -33,6 +34,7 @@ export * from "./CredentialTypeOutput.js";
 export * from "./CreditCardCredentialResponse.js";
 export * from "./DownloadToS3Block.js";
 export * from "./DownloadToS3BlockYaml.js";
+export * from "./ExtractAction.js";
 export * from "./ExtractionBlock.js";
 export * from "./ExtractionBlockParametersItem.js";
 export * from "./ExtractionBlockYaml.js";
@@ -62,6 +64,7 @@ export * from "./HumanInteractionBlock.js";
 export * from "./HumanInteractionBlockParametersItem.js";
 export * from "./HumanInteractionBlockYaml.js";
 export * from "./InputOrSelectContext.js";
+export * from "./InputTextAction.js";
 export * from "./LoginBlock.js";
 export * from "./LoginBlockParametersItem.js";
 export * from "./LoginBlockYaml.js";
@@ -80,11 +83,14 @@ export * from "./PdfParserBlock.js";
 export * from "./PdfParserBlockYaml.js";
 export * from "./ProxyLocation.js";
 export * from "./RunEngine.js";
+export * from "./RunSdkActionResponse.js";
 export * from "./RunStatus.js";
 export * from "./Script.js";
 export * from "./ScriptFileCreate.js";
 export * from "./ScriptRunResponse.js";
+export * from "./SdkAction.js";
 export * from "./SelectOption.js";
+export * from "./SelectOptionAction.js";
 export * from "./SendEmailBlock.js";
 export * from "./SendEmailBlockYaml.js";
 export * from "./SkyvernForgeSdkSchemasCredentialsCredentialType.js";
--- a/skyvern-ts/client/tests/wire/main.test.ts
+++ b/skyvern-ts/client/tests/wire/main.test.ts
@@ -2429,4 +2429,128 @@ describe("SkyvernClient", () => {
            });
        }).rejects.toThrow(Skyvern.UnprocessableEntityError);
    });
+
+    test("run_sdk_action (1)", async () => {
+        const server = mockServerPool.createServer();
+        const client = new SkyvernClient({ apiKey: "test", environment: server.baseUrl });
+        const rawRequestBody = { url: "url", action: { type: "ai_click" } };
+        const rawResponseBody = { workflow_run_id: "workflow_run_id", result: "result" };
+        server
+            .mockEndpoint()
+            .post("/v1/sdk/run_action")
+            .header("x-user-agent", "x-user-agent")
+            .jsonBody(rawRequestBody)
+            .respondWith()
+            .statusCode(200)
+            .jsonBody(rawResponseBody)
+            .build();
+
+        const response = await client.runSdkAction({
+            "x-user-agent": "x-user-agent",
+            url: "url",
+            action: {
+                type: "ai_click",
+            },
+        });
+        expect(response).toEqual({
+            workflow_run_id: "workflow_run_id",
+            result: "result",
+        });
+    });
+
+    test("run_sdk_action (2)", async () => {
+        const server = mockServerPool.createServer();
+        const client = new SkyvernClient({ apiKey: "test", environment: server.baseUrl });
+        const rawRequestBody = { url: "url", action: { type: "ai_click" } };
+        const rawResponseBody = { key: "value" };
+        server
+            .mockEndpoint()
+            .post("/v1/sdk/run_action")
+            .jsonBody(rawRequestBody)
+            .respondWith()
+            .statusCode(400)
+            .jsonBody(rawResponseBody)
+            .build();
+
+        await expect(async () => {
+            return await client.runSdkAction({
+                url: "url",
+                action: {
+                    type: "ai_click",
+                },
+            });
+        }).rejects.toThrow(Skyvern.BadRequestError);
+    });
+
+    test("run_sdk_action (3)", async () => {
+        const server = mockServerPool.createServer();
+        const client = new SkyvernClient({ apiKey: "test", environment: server.baseUrl });
+        const rawRequestBody = { url: "url", action: { type: "ai_click" } };
+        const rawResponseBody = { key: "value" };
+        server
+            .mockEndpoint()
+            .post("/v1/sdk/run_action")
+            .jsonBody(rawRequestBody)
+            .respondWith()
+            .statusCode(403)
+            .jsonBody(rawResponseBody)
+            .build();
+
+        await expect(async () => {
+            return await client.runSdkAction({
+                url: "url",
+                action: {
+                    type: "ai_click",
+                },
+            });
+        }).rejects.toThrow(Skyvern.ForbiddenError);
+    });
+
+    test("run_sdk_action (4)", async () => {
+        const server = mockServerPool.createServer();
+        const client = new SkyvernClient({ apiKey: "test", environment: server.baseUrl });
+        const rawRequestBody = { url: "url", action: { type: "ai_click" } };
+        const rawResponseBody = { key: "value" };
+        server
+            .mockEndpoint()
+            .post("/v1/sdk/run_action")
+            .jsonBody(rawRequestBody)
+            .respondWith()
+            .statusCode(404)
+            .jsonBody(rawResponseBody)
+            .build();
+
+        await expect(async () => {
+            return await client.runSdkAction({
+                url: "url",
+                action: {
+                    type: "ai_click",
+                },
+            });
+        }).rejects.toThrow(Skyvern.NotFoundError);
+    });
+
+    test("run_sdk_action (5)", async () => {
+        const server = mockServerPool.createServer();
+        const client = new SkyvernClient({ apiKey: "test", environment: server.baseUrl });
+        const rawRequestBody = { url: "url", action: { type: "ai_click" } };
+        const rawResponseBody = { key: "value" };
+        server
+            .mockEndpoint()
+            .post("/v1/sdk/run_action")
+            .jsonBody(rawRequestBody)
+            .respondWith()
+            .statusCode(422)
+            .jsonBody(rawResponseBody)
+            .build();
+
+        await expect(async () => {
+            return await client.runSdkAction({
+                url: "url",
+                action: {
+                    type: "ai_click",
+                },
+            });
+        }).rejects.toThrow(Skyvern.UnprocessableEntityError);
+    });
 });
--- a/skyvern/client/init.py
+++ b/skyvern/client/init.py
@@ -265,6 +265,7 @@ if typing.TYPE_CHECKING:
        ProxyLocation,
        RunEngine,
        RunSdkActionResponse,
+        RunSdkActionResponseResult,
        RunStatus,
        Script,
        ScriptFileCreate,
@@ -727,6 +728,7 @@ _dynamic_imports: typing.Dict[str, str] = {
    "ProxyLocation": ".types",
    "RunEngine": ".types",
    "RunSdkActionResponse": ".types",
+    "RunSdkActionResponseResult": ".types",
    "RunStatus": ".types",
    "Script": ".types",
    "ScriptFileCreate": ".types",
@@ -1212,6 +1214,7 @@ __all__ = [
    "ProxyLocation",
    "RunEngine",
    "RunSdkActionResponse",
+    "RunSdkActionResponseResult",
    "RunStatus",
    "Script",
    "ScriptFileCreate",
--- a/skyvern/client/raw_client.py
+++ b/skyvern/client/raw_client.py
@@ -2146,6 +2146,17 @@ class RawSkyvern:
                        ),
                    ),
                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Optional[typing.Any],
+                        parse_obj_as(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
            if _response.status_code == 422:
                raise UnprocessableEntityError(
                    headers=dict(_response.headers),
@@ -4267,6 +4278,17 @@ class AsyncRawSkyvern:
                        ),
                    ),
                )
+            if _response.status_code == 404:
+                raise NotFoundError(
+                    headers=dict(_response.headers),
+                    body=typing.cast(
+                        typing.Optional[typing.Any],
+                        parse_obj_as(
+                            type_=typing.Optional[typing.Any],  # type: ignore
+                            object_=_response.json(),
+                        ),
+                    ),
+                )
            if _response.status_code == 422:
                raise UnprocessableEntityError(
                    headers=dict(_response.headers),
--- a/skyvern/client/types/init.py
+++ b/skyvern/client/types/init.py
@@ -290,6 +290,7 @@ if typing.TYPE_CHECKING:
    from .proxy_location import ProxyLocation
    from .run_engine import RunEngine
    from .run_sdk_action_response import RunSdkActionResponse
+    from .run_sdk_action_response_result import RunSdkActionResponseResult
    from .run_status import RunStatus
    from .script import Script
    from .script_file_create import ScriptFileCreate
@@ -762,6 +763,7 @@ _dynamic_imports: typing.Dict[str, str] = {
    "ProxyLocation": ".proxy_location",
    "RunEngine": ".run_engine",
    "RunSdkActionResponse": ".run_sdk_action_response",
+    "RunSdkActionResponseResult": ".run_sdk_action_response_result",
    "RunStatus": ".run_status",
    "Script": ".script",
    "ScriptFileCreate": ".script_file_create",
@@ -1238,6 +1240,7 @@ __all__ = [
    "ProxyLocation",
    "RunEngine",
    "RunSdkActionResponse",
+    "RunSdkActionResponseResult",
    "RunStatus",
    "Script",
    "ScriptFileCreate",
--- a/skyvern/client/types/run_sdk_action_response.py
+++ b/skyvern/client/types/run_sdk_action_response.py
@@ -4,6 +4,7 @@ import typing

 import pydantic
 from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
+from .run_sdk_action_response_result import RunSdkActionResponseResult


 class RunSdkActionResponse(UniversalBaseModel):
@@ -12,7 +13,7 @@ class RunSdkActionResponse(UniversalBaseModel):
    The workflow run ID used for this action
    """

-    result: typing.Optional[typing.Optional[typing.Any]] = pydantic.Field(default=None)
+    result: typing.Optional[RunSdkActionResponseResult] = pydantic.Field(default=None)
    """
    The result from the action (e.g., selector, value, extracted data)
    """
--- a/skyvern/client/types/run_sdk_action_response_result.py
+++ b/skyvern/client/types/run_sdk_action_response_result.py
@@ -0,0 +1,7 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import typing
+
+RunSdkActionResponseResult = typing.Union[
+    str, typing.Dict[str, typing.Optional[typing.Any]], typing.List[typing.Optional[typing.Any]], float, bool
+]
--- a/skyvern/forge/sdk/routes/sdk.py
+++ b/skyvern/forge/sdk/routes/sdk.py
@@ -1,4 +1,4 @@
-import json
+from typing import Any

 import structlog
 from fastapi import Depends, HTTPException, status
@@ -95,8 +95,8 @@ async def run_sdk_action(
    task = await app.DATABASE.create_task(
        organization_id=organization_id,
        url=action_request.url,
-        navigation_goal=None,
-        navigation_payload=None,
+        navigation_goal=action.intention,
+        navigation_payload=action.data,
        data_extraction_goal=None,
        title=f"SDK Action Task: {action_request.action.type}",
        workflow_run_id=workflow_run.workflow_run_id,
@@ -118,6 +118,18 @@ async def run_sdk_action(
        task_id=task.task_id,
    )

+    await app.WORKFLOW_CONTEXT_MANAGER.initialize_workflow_run_context(
+        organization,
+        workflow_run.workflow_run_id,
+        workflow.title,
+        workflow.workflow_id,
+        workflow.workflow_permanent_id,
+        [],
+        [],
+        [],
+        [],
+    )
+
    context = skyvern_context.ensure_context()
    skyvern_context.set(
        SkyvernContext(
@@ -131,7 +143,7 @@ async def run_sdk_action(
            workflow_run_id=workflow_run.workflow_run_id,
        )
    )
-    result = None
+    result: Any | None = None
    try:
        scraped_page = await SkyvernPage.create_scraped_page(browser_session_id=browser_session_id)
        page = await scraped_page._browser_state.must_get_working_page()
@@ -170,7 +182,7 @@ async def run_sdk_action(
                intention=action.intention,
                data=action.data,
            )
-            result = json.dumps(extract_result)
+            result = extract_result
    finally:
        skyvern_context.reset()

--- a/skyvern/library/SdkSkyvernPageAi.py
+++ b/skyvern/library/SdkSkyvernPageAi.py
@@ -2,14 +2,9 @@ from typing import TYPE_CHECKING, Any

 from playwright.async_api import Page

+from skyvern.client import SdkAction_AiClick, SdkAction_AiInputText, SdkAction_AiSelectOption, SdkAction_Extract
 from skyvern.config import settings
 from skyvern.core.script_generations.skyvern_page_ai import SkyvernPageAi
-from skyvern.forge.sdk.schemas.sdk_actions import (
-    ClickAction,
-    ExtractAction,
-    InputTextAction,
-    SelectOptionAction,
-)

 if TYPE_CHECKING:
    from skyvern.library.skyvern_browser import SkyvernBrowser
@@ -35,18 +30,17 @@ class SdkSkyvernPageAi(SkyvernPageAi):
    ) -> str:
        """Click an element using AI via API call."""

-        action = ClickAction(
-            selector=selector,
-            intention=intention,
-            data=data,
-            timeout=timeout,
-        )
        response = await self._browser.client.run_sdk_action(
            url=self._page.url,
            browser_session_id=self._browser.browser_session_id,
            browser_address=self._browser.browser_address,
            workflow_run_id=self._browser.workflow_run_id,
-            action=action,
+            action=SdkAction_AiClick(
+                selector=selector,
+                intention=intention,
+                data=data,
+                timeout=timeout,
+            ),
        )
        self._browser.workflow_run_id = response.workflow_run_id
        return response.result if response.result else selector
@@ -65,7 +59,7 @@ class SdkSkyvernPageAi(SkyvernPageAi):

        response = await self._browser.client.run_sdk_action(
            url=self._page.url,
-            action=InputTextAction(
+            action=SdkAction_AiInputText(
                selector=selector,
                value=value,
                intention=intention,
@@ -93,7 +87,7 @@ class SdkSkyvernPageAi(SkyvernPageAi):

        response = await self._browser.client.run_sdk_action(
            url=self._page.url,
-            action=SelectOptionAction(
+            action=SdkAction_AiSelectOption(
                selector=selector,
                value=value,
                intention=intention,
@@ -129,7 +123,7 @@ class SdkSkyvernPageAi(SkyvernPageAi):

        response = await self._browser.client.run_sdk_action(
            url=self._page.url,
-            action=ExtractAction(
+            action=SdkAction_Extract(
                prompt=prompt,
                extract_schema=schema,
                error_code_mapping=error_code_mapping,
--- a/skyvern/library/skyvern_browser_page.py
+++ b/skyvern/library/skyvern_browser_page.py
@@ -1,5 +1,5 @@
 import asyncio
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, overload

 from playwright.async_api import Page

@@ -230,24 +230,64 @@ class SkyvernBrowserPage:
        self._ai = SdkSkyvernPageAi(browser, page)
        self.run = SkyvernPageRun(browser, page)

+    @overload
+    async def click(
+        self,
+        selector: str,
+        *,
+        prompt: str | None = None,
+        ai: str | None = "fallback",
+        data: str | dict[str, Any] | None = None,
+        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
+    ) -> str | None: ...
+
+    @overload
    async def click(
        self,
        *,
+        prompt: str,
+        ai: str | None = "fallback",
+        data: str | dict[str, Any] | None = None,
+        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
+    ) -> str | None: ...
+
+    async def click(
+        self,
        selector: str | None = None,
-        intention: str | None = None,
+        *,
+        prompt: str | None = None,
        ai: str | None = "fallback",
        data: str | dict[str, Any] | None = None,
        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
    ) -> str | None:
-        """Click an element identified by ``selector``.
+        """Click an element using a CSS selector, AI-powered prompt matching, or both.

-        When ``intention`` and ``data`` are provided a new click action is
-        generated via the ``single-click-action`` prompt.  The model returns a
-        fresh "xpath=..." selector based on the current DOM and the updated data for this run.
-        The browser then clicks the element using this newly generated xpath selector.
+        This method supports three modes:
+        - **Selector-based**: Click the element matching the CSS selector
+        - **AI-powered**: Use natural language to describe which element to click
+        - **Fallback mode** (default): Try the selector first, fall back to AI if it fails

-        If the prompt generation or parsing fails for any reason we fall back to
-        clicking the originally supplied ``selector``.
+        Args:
+            selector: CSS selector for the target element.
+            prompt: Natural language description of which element to click.
+            ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
+            data: Additional context data for AI processing.
+            timeout: Maximum time to wait for the click action in milliseconds.
+
+        Returns:
+            The selector string that was successfully used to click the element, or None.
+
+        Examples:
+            ```python
+            # Click using a CSS selector
+            await page.click("#open-invoice-button")
+
+            # Click using AI with natural language
+            await page.click(prompt="Click on the 'Open Invoice' button")
+
+            # Try selector first, fall back to AI if selector fails
+            await page.click("#open-invoice-button", prompt="Click on the 'Open Invoice' button")
+            ```
        """

        if ai == "fallback":
@@ -262,10 +302,10 @@ class SkyvernBrowserPage:
                    error_to_raise = e

            # if the original selector doesn't work, try to click the element with the ai generated selector
-            if intention:
+            if prompt:
                return await self._ai.ai_click(
                    selector=selector or "",
-                    intention=intention,
+                    intention=prompt,
                    data=data,
                    timeout=timeout,
                )
@@ -274,10 +314,10 @@ class SkyvernBrowserPage:
            else:
                return selector
        elif ai == "proactive":
-            if intention:
+            if prompt:
                return await self._ai.ai_click(
                    selector=selector or "",
-                    intention=intention,
+                    intention=prompt,
                    data=data,
                    timeout=timeout,
                )
@@ -287,6 +327,244 @@ class SkyvernBrowserPage:
            await locator.click(timeout=timeout)
        return selector

+    @overload
+    async def fill(
+        self,
+        selector: str,
+        value: str,
+        *,
+        prompt: str | None = None,
+        ai: str | None = "fallback",
+        data: str | dict[str, Any] | None = None,
+        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
+        totp_identifier: str | None = None,
+        totp_url: str | None = None,
+    ) -> str: ...
+
+    @overload
+    async def fill(
+        self,
+        *,
+        prompt: str,
+        value: str | None = None,
+        selector: str | None = None,
+        ai: str | None = "fallback",
+        data: str | dict[str, Any] | None = None,
+        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
+        totp_identifier: str | None = None,
+        totp_url: str | None = None,
+    ) -> str: ...
+
+    async def fill(
+        self,
+        selector: str | None = None,
+        value: str | None = None,
+        *,
+        prompt: str | None = None,
+        ai: str | None = "fallback",
+        data: str | dict[str, Any] | None = None,
+        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
+        totp_identifier: str | None = None,
+        totp_url: str | None = None,
+    ) -> str:
+        """Fill an input field using a CSS selector, AI-powered prompt matching, or both.
+
+        This method supports three modes:
+        - **Selector-based**: Fill the input field with a value using CSS selector
+        - **AI-powered**: Use natural language prompt (AI extracts value from prompt)
+        - **Fallback mode** (default): Try the selector first, fall back to AI if it fails
+
+        Args:
+            selector: CSS selector for the target input element.
+            value: The text value to input into the field.
+            prompt: Natural language description of which field to fill and what value.
+            ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
+            data: Additional context data for AI processing.
+            timeout: Maximum time to wait for the fill action in milliseconds.
+            totp_identifier: TOTP identifier for time-based one-time password fields.
+            totp_url: URL to fetch TOTP codes from for authentication.
+
+        Returns:
+            The value that was successfully filled into the field.
+
+        Examples:
+            ```python
+            # Fill using selector and value (both positional)
+            await page.fill("#email-input", "user@example.com")
+
+            # Fill using AI with natural language (prompt only)
+            await page.fill(prompt="Fill 'user@example.com' in the email address field")
+
+            # Try selector first, fall back to AI if selector fails
+            await page.fill(
+                "#email-input",
+                "user@example.com",
+                prompt="Fill the email address with user@example.com"
+            )
+            ```
+        """
+        return await self._input_text(
+            selector=selector or "",
+            value=value or "",
+            ai=ai,
+            intention=prompt,
+            data=data,
+            timeout=timeout,
+            totp_identifier=totp_identifier,
+            totp_url=totp_url,
+        )
+
+    async def goto(self, url: str, **kwargs: Any) -> None:
+        """Navigate to the given URL.
+
+        Args:
+            url: URL to navigate page to.
+            **kwargs: Additional options like timeout, wait_until, referer, etc.
+        """
+        await self._page.goto(url, **kwargs)
+
+    async def type(self, selector: str, text: str, **kwargs: Any) -> None:
+        """Type text into an element character by character.
+
+        Args:
+            selector: A selector to search for an element to type into.
+            text: Text to type into the element.
+            **kwargs: Additional options like delay, timeout, no_wait_after, etc.
+        """
+        await self._page.type(selector, text, **kwargs)
+
+    @overload
+    async def select_option(
+        self,
+        selector: str,
+        value: str,
+        *,
+        prompt: str | None = None,
+        ai: str | None = "fallback",
+        data: str | dict[str, Any] | None = None,
+        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
+    ) -> str: ...
+
+    @overload
+    async def select_option(
+        self,
+        *,
+        prompt: str,
+        value: str | None = None,
+        selector: str | None = None,
+        ai: str | None = "fallback",
+        data: str | dict[str, Any] | None = None,
+        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
+    ) -> str: ...
+
+    async def select_option(
+        self,
+        selector: str | None = None,
+        value: str | None = None,
+        *,
+        prompt: str | None = None,
+        ai: str | None = "fallback",
+        data: str | dict[str, Any] | None = None,
+        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
+    ) -> str:
+        """Select an option from a dropdown using a CSS selector, AI-powered prompt matching, or both.
+
+        This method supports three modes:
+        - **Selector-based**: Select the option with a value using CSS selector
+        - **AI-powered**: Use natural language prompt (AI extracts value from prompt)
+        - **Fallback mode** (default): Try the selector first, fall back to AI if it fails
+
+        Args:
+            selector: CSS selector for the target select/dropdown element.
+            value: The option value to select.
+            prompt: Natural language description of which option to select.
+            ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
+            data: Additional context data for AI processing.
+            timeout: Maximum time to wait for the select action in milliseconds.
+
+        Returns:
+            The value that was successfully selected.
+
+        Examples:
+            ```python
+            # Select using selector and value (both positional)
+            await page.select_option("#country", "us")
+
+            # Select using AI with natural language (prompt only)
+            await page.select_option(prompt="Select 'United States' from the country dropdown")
+
+            # Try selector first, fall back to AI if selector fails
+            await page.select_option(
+                "#country",
+                "us",
+                prompt="Select United States from country"
+            )
+            ```
+        """
+        value = value or ""
+        if ai == "fallback":
+            error_to_raise = None
+            if selector:
+                try:
+                    locator = self._page.locator(selector)
+                    await locator.select_option(value, timeout=timeout)
+                    return value
+                except Exception as e:
+                    error_to_raise = e
+            if prompt:
+                return await self._ai.ai_select_option(
+                    selector=selector or "",
+                    value=value,
+                    intention=prompt,
+                    data=data,
+                    timeout=timeout,
+                )
+            if error_to_raise:
+                raise error_to_raise
+            else:
+                return value
+        elif ai == "proactive" and prompt:
+            return await self._ai.ai_select_option(
+                selector=selector or "",
+                value=value,
+                intention=prompt,
+                data=data,
+                timeout=timeout,
+            )
+        if selector:
+            locator = self._page.locator(selector)
+            await locator.select_option(value, timeout=timeout)
+        return value
+
+    async def extract(
+        self,
+        prompt: str,
+        schema: dict[str, Any] | list | str | None = None,
+        error_code_mapping: dict[str, str] | None = None,
+        intention: str | None = None,
+        data: str | dict[str, Any] | None = None,
+    ) -> dict[str, Any] | list | str | None:
+        return await self._ai.ai_extract(prompt, schema, error_code_mapping, intention, data)
+
+    async def reload(self, **kwargs: Any) -> None:
+        """Reload the current page.
+
+        Args:
+            **kwargs: Additional options like timeout, wait_until, etc.
+        """
+        await self._page.reload(**kwargs)
+
+    async def screenshot(self, **kwargs: Any) -> bytes:
+        """Take a screenshot of the page.
+
+        Args:
+            **kwargs: Additional options like path, full_page, clip, type, quality, etc.
+
+        Returns:
+            bytes: The screenshot as bytes (unless path is specified, then saves to file).
+        """
+        return await self._page.screenshot(**kwargs)
+
    async def _input_text(
        self,
        selector: str,
@@ -346,76 +624,3 @@ class SkyvernBrowserPage:
        locator = self._page.locator(selector)
        await handler_utils.input_sequentially(locator, value, timeout=timeout)
        return value
-
-    async def fill(
-        self,
-        selector: str,
-        value: str,
-        ai: str | None = "fallback",
-        intention: str | None = None,
-        data: str | dict[str, Any] | None = None,
-        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
-        totp_identifier: str | None = None,
-        totp_url: str | None = None,
-    ) -> str:
-        return await self._input_text(
-            selector=selector,
-            value=value,
-            ai=ai,
-            intention=intention,
-            data=data,
-            timeout=timeout,
-            totp_identifier=totp_identifier,
-            totp_url=totp_url,
-        )
-
-    async def goto(self, url: str, **kwargs: Any) -> None:
-        """Navigate to the given URL.
-
-        Args:
-            url: URL to navigate page to.
-            **kwargs: Additional options like timeout, wait_until, referer, etc.
-        """
-        await self._page.goto(url, **kwargs)
-
-    async def type(self, selector: str, text: str, **kwargs: Any) -> None:
-        """Type text into an element character by character.
-
-        Args:
-            selector: A selector to search for an element to type into.
-            text: Text to type into the element.
-            **kwargs: Additional options like delay, timeout, no_wait_after, etc.
-        """
-        await self._page.type(selector, text, **kwargs)
-
-    async def select_option(self, selector: str, value: Any = None, **kwargs: Any) -> list[str]:
-        """Select option(s) in a <select> element.
-
-        Args:
-            selector: A selector to search for a select element.
-            value: Option value(s) to select. Can be a string, list of strings, or dict with value/label/index.
-            **kwargs: Additional options like timeout, force, no_wait_after, etc.
-
-        Returns:
-            List of option values that have been successfully selected.
-        """
-        return await self._page.select_option(selector, value, **kwargs)
-
-    async def reload(self, **kwargs: Any) -> None:
-        """Reload the current page.
-
-        Args:
-            **kwargs: Additional options like timeout, wait_until, etc.
-        """
-        await self._page.reload(**kwargs)
-
-    async def screenshot(self, **kwargs: Any) -> bytes:
-        """Take a screenshot of the page.
-
-        Args:
-            **kwargs: Additional options like path, full_page, clip, type, quality, etc.
-
-        Returns:
-            bytes: The screenshot as bytes (unless path is specified, then saves to file).
-        """
-        return await self._page.screenshot(**kwargs)