SDK: support actions skeleton (#3817)

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
This commit is contained in:
Stanislav Novosad
2025-10-29 11:54:57 -06:00
committed by GitHub
parent d2d7b8e4b0
commit 33ad4cfcd1
26 changed files with 2274 additions and 426 deletions

View File

@@ -40,6 +40,8 @@ if typing.TYPE_CHECKING:
BitwardenSensitiveInformationParameterYaml,
BlockType,
BrowserSessionResponse,
ClickAction,
ClickActionData,
CodeBlock,
CodeBlockParametersItem,
CodeBlockParametersItem_AwsSecret,
@@ -79,6 +81,9 @@ if typing.TYPE_CHECKING:
CreditCardCredentialResponse,
DownloadToS3Block,
DownloadToS3BlockYaml,
ExtractAction,
ExtractActionData,
ExtractActionExtractSchema,
ExtractionBlock,
ExtractionBlockDataSchema,
ExtractionBlockParametersItem,
@@ -215,6 +220,8 @@ if typing.TYPE_CHECKING:
HumanInteractionBlockParametersItem_Workflow,
HumanInteractionBlockYaml,
InputOrSelectContext,
InputTextAction,
InputTextActionData,
LoginBlock,
LoginBlockDataSchema,
LoginBlockParametersItem,
@@ -257,11 +264,19 @@ if typing.TYPE_CHECKING:
PdfParserBlockYaml,
ProxyLocation,
RunEngine,
RunSdkActionResponse,
RunStatus,
Script,
ScriptFileCreate,
ScriptRunResponse,
SdkAction,
SdkAction_AiClick,
SdkAction_AiInputText,
SdkAction_AiSelectOption,
SdkAction_Extract,
SelectOption,
SelectOptionAction,
SelectOptionActionData,
SendEmailBlock,
SendEmailBlockYaml,
SkyvernForgeSdkSchemasCredentialsCredentialType,
@@ -485,6 +500,8 @@ _dynamic_imports: typing.Dict[str, str] = {
"BitwardenSensitiveInformationParameterYaml": ".types",
"BlockType": ".types",
"BrowserSessionResponse": ".types",
"ClickAction": ".types",
"ClickActionData": ".types",
"CodeBlock": ".types",
"CodeBlockParametersItem": ".types",
"CodeBlockParametersItem_AwsSecret": ".types",
@@ -524,6 +541,9 @@ _dynamic_imports: typing.Dict[str, str] = {
"CreditCardCredentialResponse": ".types",
"DownloadToS3Block": ".types",
"DownloadToS3BlockYaml": ".types",
"ExtractAction": ".types",
"ExtractActionData": ".types",
"ExtractActionExtractSchema": ".types",
"ExtractionBlock": ".types",
"ExtractionBlockDataSchema": ".types",
"ExtractionBlockParametersItem": ".types",
@@ -661,6 +681,8 @@ _dynamic_imports: typing.Dict[str, str] = {
"HumanInteractionBlockParametersItem_Workflow": ".types",
"HumanInteractionBlockYaml": ".types",
"InputOrSelectContext": ".types",
"InputTextAction": ".types",
"InputTextActionData": ".types",
"LoginBlock": ".types",
"LoginBlockDataSchema": ".types",
"LoginBlockParametersItem": ".types",
@@ -704,11 +726,19 @@ _dynamic_imports: typing.Dict[str, str] = {
"PdfParserBlockYaml": ".types",
"ProxyLocation": ".types",
"RunEngine": ".types",
"RunSdkActionResponse": ".types",
"RunStatus": ".types",
"Script": ".types",
"ScriptFileCreate": ".types",
"ScriptRunResponse": ".types",
"SdkAction": ".types",
"SdkAction_AiClick": ".types",
"SdkAction_AiInputText": ".types",
"SdkAction_AiSelectOption": ".types",
"SdkAction_Extract": ".types",
"SelectOption": ".types",
"SelectOptionAction": ".types",
"SelectOptionActionData": ".types",
"SendEmailBlock": ".types",
"SendEmailBlockYaml": ".types",
"Skyvern": ".client",
@@ -955,6 +985,8 @@ __all__ = [
"BitwardenSensitiveInformationParameterYaml",
"BlockType",
"BrowserSessionResponse",
"ClickAction",
"ClickActionData",
"CodeBlock",
"CodeBlockParametersItem",
"CodeBlockParametersItem_AwsSecret",
@@ -994,6 +1026,9 @@ __all__ = [
"CreditCardCredentialResponse",
"DownloadToS3Block",
"DownloadToS3BlockYaml",
"ExtractAction",
"ExtractActionData",
"ExtractActionExtractSchema",
"ExtractionBlock",
"ExtractionBlockDataSchema",
"ExtractionBlockParametersItem",
@@ -1131,6 +1166,8 @@ __all__ = [
"HumanInteractionBlockParametersItem_Workflow",
"HumanInteractionBlockYaml",
"InputOrSelectContext",
"InputTextAction",
"InputTextActionData",
"LoginBlock",
"LoginBlockDataSchema",
"LoginBlockParametersItem",
@@ -1174,11 +1211,19 @@ __all__ = [
"PdfParserBlockYaml",
"ProxyLocation",
"RunEngine",
"RunSdkActionResponse",
"RunStatus",
"Script",
"ScriptFileCreate",
"ScriptRunResponse",
"SdkAction",
"SdkAction_AiClick",
"SdkAction_AiInputText",
"SdkAction_AiSelectOption",
"SdkAction_Extract",
"SelectOption",
"SelectOptionAction",
"SelectOptionActionData",
"SendEmailBlock",
"SendEmailBlockYaml",
"Skyvern",

View File

@@ -19,8 +19,10 @@ from .types.credential_response import CredentialResponse
from .types.get_run_response import GetRunResponse
from .types.proxy_location import ProxyLocation
from .types.run_engine import RunEngine
from .types.run_sdk_action_response import RunSdkActionResponse
from .types.script import Script
from .types.script_file_create import ScriptFileCreate
from .types.sdk_action import SdkAction
from .types.skyvern_forge_sdk_schemas_credentials_credential_type import SkyvernForgeSdkSchemasCredentialsCredentialType
from .types.skyvern_schemas_run_blocks_credential_type import SkyvernSchemasRunBlocksCredentialType
from .types.task_run_request_data_extraction_schema import TaskRunRequestDataExtractionSchema
@@ -1494,6 +1496,71 @@ class Skyvern:
_response = self._raw_client.deploy_script(script_id, files=files, request_options=request_options)
return _response.data
def run_sdk_action(
self,
*,
url: str,
action: SdkAction,
user_agent: typing.Optional[str] = None,
browser_session_id: typing.Optional[str] = OMIT,
browser_address: typing.Optional[str] = OMIT,
workflow_run_id: typing.Optional[str] = OMIT,
request_options: typing.Optional[RequestOptions] = None,
) -> RunSdkActionResponse:
"""
Execute a single SDK action with the specified parameters
Parameters
----------
url : str
The URL where the action should be executed
action : SdkAction
The action to execute with its specific parameters
user_agent : typing.Optional[str]
browser_session_id : typing.Optional[str]
The browser session ID
browser_address : typing.Optional[str]
The browser address
workflow_run_id : typing.Optional[str]
Optional workflow run ID to continue an existing workflow run
request_options : typing.Optional[RequestOptions]
Request-specific configuration.
Returns
-------
RunSdkActionResponse
Successfully executed SDK action
Examples
--------
from skyvern import SdkAction_AiClick, Skyvern
client = Skyvern(
api_key="YOUR_API_KEY",
)
client.run_sdk_action(
user_agent="x-user-agent",
url="url",
action=SdkAction_AiClick(),
)
"""
_response = self._raw_client.run_sdk_action(
url=url,
action=action,
user_agent=user_agent,
browser_session_id=browser_session_id,
browser_address=browser_address,
workflow_run_id=workflow_run_id,
request_options=request_options,
)
return _response.data
@property
def scripts(self):
if self._scripts is None:
@@ -3174,6 +3241,79 @@ class AsyncSkyvern:
_response = await self._raw_client.deploy_script(script_id, files=files, request_options=request_options)
return _response.data
async def run_sdk_action(
self,
*,
url: str,
action: SdkAction,
user_agent: typing.Optional[str] = None,
browser_session_id: typing.Optional[str] = OMIT,
browser_address: typing.Optional[str] = OMIT,
workflow_run_id: typing.Optional[str] = OMIT,
request_options: typing.Optional[RequestOptions] = None,
) -> RunSdkActionResponse:
"""
Execute a single SDK action with the specified parameters
Parameters
----------
url : str
The URL where the action should be executed
action : SdkAction
The action to execute with its specific parameters
user_agent : typing.Optional[str]
browser_session_id : typing.Optional[str]
The browser session ID
browser_address : typing.Optional[str]
The browser address
workflow_run_id : typing.Optional[str]
Optional workflow run ID to continue an existing workflow run
request_options : typing.Optional[RequestOptions]
Request-specific configuration.
Returns
-------
RunSdkActionResponse
Successfully executed SDK action
Examples
--------
import asyncio
from skyvern import AsyncSkyvern, SdkAction_AiClick
client = AsyncSkyvern(
api_key="YOUR_API_KEY",
)
async def main() -> None:
await client.run_sdk_action(
user_agent="x-user-agent",
url="url",
action=SdkAction_AiClick(),
)
asyncio.run(main())
"""
_response = await self._raw_client.run_sdk_action(
url=url,
action=action,
user_agent=user_agent,
browser_session_id=browser_session_id,
browser_address=browser_address,
workflow_run_id=workflow_run_id,
request_options=request_options,
)
return _response.data
@property
def scripts(self):
if self._scripts is None:

View File

@@ -24,8 +24,10 @@ from .types.credential_response import CredentialResponse
from .types.get_run_response import GetRunResponse
from .types.proxy_location import ProxyLocation
from .types.run_engine import RunEngine
from .types.run_sdk_action_response import RunSdkActionResponse
from .types.script import Script
from .types.script_file_create import ScriptFileCreate
from .types.sdk_action import SdkAction
from .types.skyvern_forge_sdk_schemas_credentials_credential_type import SkyvernForgeSdkSchemasCredentialsCredentialType
from .types.skyvern_schemas_run_blocks_credential_type import SkyvernSchemasRunBlocksCredentialType
from .types.task_run_request_data_extraction_schema import TaskRunRequestDataExtractionSchema
@@ -2052,6 +2054,114 @@ class RawSkyvern:
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
def run_sdk_action(
self,
*,
url: str,
action: SdkAction,
user_agent: typing.Optional[str] = None,
browser_session_id: typing.Optional[str] = OMIT,
browser_address: typing.Optional[str] = OMIT,
workflow_run_id: typing.Optional[str] = OMIT,
request_options: typing.Optional[RequestOptions] = None,
) -> HttpResponse[RunSdkActionResponse]:
"""
Execute a single SDK action with the specified parameters
Parameters
----------
url : str
The URL where the action should be executed
action : SdkAction
The action to execute with its specific parameters
user_agent : typing.Optional[str]
browser_session_id : typing.Optional[str]
The browser session ID
browser_address : typing.Optional[str]
The browser address
workflow_run_id : typing.Optional[str]
Optional workflow run ID to continue an existing workflow run
request_options : typing.Optional[RequestOptions]
Request-specific configuration.
Returns
-------
HttpResponse[RunSdkActionResponse]
Successfully executed SDK action
"""
_response = self._client_wrapper.httpx_client.request(
"v1/sdk/run_action",
method="POST",
json={
"url": url,
"browser_session_id": browser_session_id,
"browser_address": browser_address,
"workflow_run_id": workflow_run_id,
"action": convert_and_respect_annotation_metadata(
object_=action, annotation=SdkAction, direction="write"
),
},
headers={
"content-type": "application/json",
"x-user-agent": str(user_agent) if user_agent is not None else None,
},
request_options=request_options,
omit=OMIT,
)
try:
if 200 <= _response.status_code < 300:
_data = typing.cast(
RunSdkActionResponse,
parse_obj_as(
type_=RunSdkActionResponse, # type: ignore
object_=_response.json(),
),
)
return HttpResponse(response=_response, data=_data)
if _response.status_code == 400:
raise BadRequestError(
headers=dict(_response.headers),
body=typing.cast(
typing.Optional[typing.Any],
parse_obj_as(
type_=typing.Optional[typing.Any], # type: ignore
object_=_response.json(),
),
),
)
if _response.status_code == 403:
raise ForbiddenError(
headers=dict(_response.headers),
body=typing.cast(
typing.Optional[typing.Any],
parse_obj_as(
type_=typing.Optional[typing.Any], # type: ignore
object_=_response.json(),
),
),
)
if _response.status_code == 422:
raise UnprocessableEntityError(
headers=dict(_response.headers),
body=typing.cast(
typing.Optional[typing.Any],
parse_obj_as(
type_=typing.Optional[typing.Any], # type: ignore
object_=_response.json(),
),
),
)
_response_json = _response.json()
except JSONDecodeError:
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
class AsyncRawSkyvern:
def __init__(self, *, client_wrapper: AsyncClientWrapper):
@@ -4064,3 +4174,111 @@ class AsyncRawSkyvern:
except JSONDecodeError:
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)
async def run_sdk_action(
self,
*,
url: str,
action: SdkAction,
user_agent: typing.Optional[str] = None,
browser_session_id: typing.Optional[str] = OMIT,
browser_address: typing.Optional[str] = OMIT,
workflow_run_id: typing.Optional[str] = OMIT,
request_options: typing.Optional[RequestOptions] = None,
) -> AsyncHttpResponse[RunSdkActionResponse]:
"""
Execute a single SDK action with the specified parameters
Parameters
----------
url : str
The URL where the action should be executed
action : SdkAction
The action to execute with its specific parameters
user_agent : typing.Optional[str]
browser_session_id : typing.Optional[str]
The browser session ID
browser_address : typing.Optional[str]
The browser address
workflow_run_id : typing.Optional[str]
Optional workflow run ID to continue an existing workflow run
request_options : typing.Optional[RequestOptions]
Request-specific configuration.
Returns
-------
AsyncHttpResponse[RunSdkActionResponse]
Successfully executed SDK action
"""
_response = await self._client_wrapper.httpx_client.request(
"v1/sdk/run_action",
method="POST",
json={
"url": url,
"browser_session_id": browser_session_id,
"browser_address": browser_address,
"workflow_run_id": workflow_run_id,
"action": convert_and_respect_annotation_metadata(
object_=action, annotation=SdkAction, direction="write"
),
},
headers={
"content-type": "application/json",
"x-user-agent": str(user_agent) if user_agent is not None else None,
},
request_options=request_options,
omit=OMIT,
)
try:
if 200 <= _response.status_code < 300:
_data = typing.cast(
RunSdkActionResponse,
parse_obj_as(
type_=RunSdkActionResponse, # type: ignore
object_=_response.json(),
),
)
return AsyncHttpResponse(response=_response, data=_data)
if _response.status_code == 400:
raise BadRequestError(
headers=dict(_response.headers),
body=typing.cast(
typing.Optional[typing.Any],
parse_obj_as(
type_=typing.Optional[typing.Any], # type: ignore
object_=_response.json(),
),
),
)
if _response.status_code == 403:
raise ForbiddenError(
headers=dict(_response.headers),
body=typing.cast(
typing.Optional[typing.Any],
parse_obj_as(
type_=typing.Optional[typing.Any], # type: ignore
object_=_response.json(),
),
),
)
if _response.status_code == 422:
raise UnprocessableEntityError(
headers=dict(_response.headers),
body=typing.cast(
typing.Optional[typing.Any],
parse_obj_as(
type_=typing.Optional[typing.Any], # type: ignore
object_=_response.json(),
),
),
)
_response_json = _response.json()
except JSONDecodeError:
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response.text)
raise ApiError(status_code=_response.status_code, headers=dict(_response.headers), body=_response_json)

View File

@@ -41,6 +41,8 @@ if typing.TYPE_CHECKING:
from .bitwarden_sensitive_information_parameter_yaml import BitwardenSensitiveInformationParameterYaml
from .block_type import BlockType
from .browser_session_response import BrowserSessionResponse
from .click_action import ClickAction
from .click_action_data import ClickActionData
from .code_block import CodeBlock
from .code_block_parameters_item import (
CodeBlockParametersItem,
@@ -84,6 +86,9 @@ if typing.TYPE_CHECKING:
from .credit_card_credential_response import CreditCardCredentialResponse
from .download_to_s3block import DownloadToS3Block
from .download_to_s3block_yaml import DownloadToS3BlockYaml
from .extract_action import ExtractAction
from .extract_action_data import ExtractActionData
from .extract_action_extract_schema import ExtractActionExtractSchema
from .extraction_block import ExtractionBlock
from .extraction_block_data_schema import ExtractionBlockDataSchema
from .extraction_block_parameters_item import (
@@ -236,6 +241,8 @@ if typing.TYPE_CHECKING:
)
from .human_interaction_block_yaml import HumanInteractionBlockYaml
from .input_or_select_context import InputOrSelectContext
from .input_text_action import InputTextAction
from .input_text_action_data import InputTextActionData
from .login_block import LoginBlock
from .login_block_data_schema import LoginBlockDataSchema
from .login_block_parameters_item import (
@@ -282,11 +289,21 @@ if typing.TYPE_CHECKING:
from .pdf_parser_block_yaml import PdfParserBlockYaml
from .proxy_location import ProxyLocation
from .run_engine import RunEngine
from .run_sdk_action_response import RunSdkActionResponse
from .run_status import RunStatus
from .script import Script
from .script_file_create import ScriptFileCreate
from .script_run_response import ScriptRunResponse
from .sdk_action import (
SdkAction,
SdkAction_AiClick,
SdkAction_AiInputText,
SdkAction_AiSelectOption,
SdkAction_Extract,
)
from .select_option import SelectOption
from .select_option_action import SelectOptionAction
from .select_option_action_data import SelectOptionActionData
from .send_email_block import SendEmailBlock
from .send_email_block_yaml import SendEmailBlockYaml
from .skyvern_forge_sdk_schemas_credentials_credential_type import SkyvernForgeSdkSchemasCredentialsCredentialType
@@ -520,6 +537,8 @@ _dynamic_imports: typing.Dict[str, str] = {
"BitwardenSensitiveInformationParameterYaml": ".bitwarden_sensitive_information_parameter_yaml",
"BlockType": ".block_type",
"BrowserSessionResponse": ".browser_session_response",
"ClickAction": ".click_action",
"ClickActionData": ".click_action_data",
"CodeBlock": ".code_block",
"CodeBlockParametersItem": ".code_block_parameters_item",
"CodeBlockParametersItem_AwsSecret": ".code_block_parameters_item",
@@ -559,6 +578,9 @@ _dynamic_imports: typing.Dict[str, str] = {
"CreditCardCredentialResponse": ".credit_card_credential_response",
"DownloadToS3Block": ".download_to_s3block",
"DownloadToS3BlockYaml": ".download_to_s3block_yaml",
"ExtractAction": ".extract_action",
"ExtractActionData": ".extract_action_data",
"ExtractActionExtractSchema": ".extract_action_extract_schema",
"ExtractionBlock": ".extraction_block",
"ExtractionBlockDataSchema": ".extraction_block_data_schema",
"ExtractionBlockParametersItem": ".extraction_block_parameters_item",
@@ -695,6 +717,8 @@ _dynamic_imports: typing.Dict[str, str] = {
"HumanInteractionBlockParametersItem_Workflow": ".human_interaction_block_parameters_item",
"HumanInteractionBlockYaml": ".human_interaction_block_yaml",
"InputOrSelectContext": ".input_or_select_context",
"InputTextAction": ".input_text_action",
"InputTextActionData": ".input_text_action_data",
"LoginBlock": ".login_block",
"LoginBlockDataSchema": ".login_block_data_schema",
"LoginBlockParametersItem": ".login_block_parameters_item",
@@ -737,11 +761,19 @@ _dynamic_imports: typing.Dict[str, str] = {
"PdfParserBlockYaml": ".pdf_parser_block_yaml",
"ProxyLocation": ".proxy_location",
"RunEngine": ".run_engine",
"RunSdkActionResponse": ".run_sdk_action_response",
"RunStatus": ".run_status",
"Script": ".script",
"ScriptFileCreate": ".script_file_create",
"ScriptRunResponse": ".script_run_response",
"SdkAction": ".sdk_action",
"SdkAction_AiClick": ".sdk_action",
"SdkAction_AiInputText": ".sdk_action",
"SdkAction_AiSelectOption": ".sdk_action",
"SdkAction_Extract": ".sdk_action",
"SelectOption": ".select_option",
"SelectOptionAction": ".select_option_action",
"SelectOptionActionData": ".select_option_action_data",
"SendEmailBlock": ".send_email_block",
"SendEmailBlockYaml": ".send_email_block_yaml",
"SkyvernForgeSdkSchemasCredentialsCredentialType": ".skyvern_forge_sdk_schemas_credentials_credential_type",
@@ -981,6 +1013,8 @@ __all__ = [
"BitwardenSensitiveInformationParameterYaml",
"BlockType",
"BrowserSessionResponse",
"ClickAction",
"ClickActionData",
"CodeBlock",
"CodeBlockParametersItem",
"CodeBlockParametersItem_AwsSecret",
@@ -1020,6 +1054,9 @@ __all__ = [
"CreditCardCredentialResponse",
"DownloadToS3Block",
"DownloadToS3BlockYaml",
"ExtractAction",
"ExtractActionData",
"ExtractActionExtractSchema",
"ExtractionBlock",
"ExtractionBlockDataSchema",
"ExtractionBlockParametersItem",
@@ -1156,6 +1193,8 @@ __all__ = [
"HumanInteractionBlockParametersItem_Workflow",
"HumanInteractionBlockYaml",
"InputOrSelectContext",
"InputTextAction",
"InputTextActionData",
"LoginBlock",
"LoginBlockDataSchema",
"LoginBlockParametersItem",
@@ -1198,11 +1237,19 @@ __all__ = [
"PdfParserBlockYaml",
"ProxyLocation",
"RunEngine",
"RunSdkActionResponse",
"RunStatus",
"Script",
"ScriptFileCreate",
"ScriptRunResponse",
"SdkAction",
"SdkAction_AiClick",
"SdkAction_AiInputText",
"SdkAction_AiSelectOption",
"SdkAction_Extract",
"SelectOption",
"SelectOptionAction",
"SelectOptionActionData",
"SendEmailBlock",
"SendEmailBlockYaml",
"SkyvernForgeSdkSchemasCredentialsCredentialType",

View File

@@ -0,0 +1,38 @@
# This file was auto-generated by Fern from our API Definition.
import typing
import pydantic
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
from .click_action_data import ClickActionData
class ClickAction(UniversalBaseModel):
selector: typing.Optional[str] = pydantic.Field(default=None)
"""
CSS selector for the element
"""
intention: typing.Optional[str] = pydantic.Field(default=None)
"""
The intention or goal of the click
"""
data: typing.Optional[ClickActionData] = pydantic.Field(default=None)
"""
Additional context data
"""
timeout: typing.Optional[float] = pydantic.Field(default=None)
"""
Timeout in milliseconds
"""
if IS_PYDANTIC_V2:
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
else:
class Config:
frozen = True
smart_union = True
extra = pydantic.Extra.allow

View File

@@ -0,0 +1,5 @@
# This file was auto-generated by Fern from our API Definition.
import typing
ClickActionData = typing.Union[str, typing.Dict[str, typing.Optional[typing.Any]]]

View File

@@ -0,0 +1,44 @@
# This file was auto-generated by Fern from our API Definition.
import typing
import pydantic
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
from .extract_action_data import ExtractActionData
from .extract_action_extract_schema import ExtractActionExtractSchema
class ExtractAction(UniversalBaseModel):
prompt: typing.Optional[str] = pydantic.Field(default=None)
"""
Extraction prompt
"""
extract_schema: typing.Optional[ExtractActionExtractSchema] = pydantic.Field(default=None)
"""
Schema for extraction
"""
error_code_mapping: typing.Optional[typing.Dict[str, typing.Optional[str]]] = pydantic.Field(default=None)
"""
Error code mapping for extraction
"""
intention: typing.Optional[str] = pydantic.Field(default=None)
"""
The intention or goal of the extraction
"""
data: typing.Optional[ExtractActionData] = pydantic.Field(default=None)
"""
Additional context data
"""
if IS_PYDANTIC_V2:
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
else:
class Config:
frozen = True
smart_union = True
extra = pydantic.Extra.allow

View File

@@ -0,0 +1,5 @@
# This file was auto-generated by Fern from our API Definition.
import typing
ExtractActionData = typing.Union[str, typing.Dict[str, typing.Optional[typing.Any]]]

View File

@@ -0,0 +1,7 @@
# This file was auto-generated by Fern from our API Definition.
import typing
ExtractActionExtractSchema = typing.Union[
typing.Dict[str, typing.Optional[typing.Any]], typing.List[typing.Optional[typing.Any]], str
]

View File

@@ -0,0 +1,53 @@
# This file was auto-generated by Fern from our API Definition.
import typing
import pydantic
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
from .input_text_action_data import InputTextActionData
class InputTextAction(UniversalBaseModel):
selector: typing.Optional[str] = pydantic.Field(default=None)
"""
CSS selector for the element
"""
value: typing.Optional[str] = pydantic.Field(default=None)
"""
Value to input
"""
intention: typing.Optional[str] = pydantic.Field(default=None)
"""
The intention or goal of the input
"""
data: typing.Optional[InputTextActionData] = pydantic.Field(default=None)
"""
Additional context data
"""
totp_identifier: typing.Optional[str] = pydantic.Field(default=None)
"""
TOTP identifier for input_text actions
"""
totp_url: typing.Optional[str] = pydantic.Field(default=None)
"""
TOTP URL for input_text actions
"""
timeout: typing.Optional[float] = pydantic.Field(default=None)
"""
Timeout in milliseconds
"""
if IS_PYDANTIC_V2:
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
else:
class Config:
frozen = True
smart_union = True
extra = pydantic.Extra.allow

View File

@@ -0,0 +1,5 @@
# This file was auto-generated by Fern from our API Definition.
import typing
InputTextActionData = typing.Union[str, typing.Dict[str, typing.Optional[typing.Any]]]

View File

@@ -0,0 +1,27 @@
# This file was auto-generated by Fern from our API Definition.
import typing
import pydantic
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
class RunSdkActionResponse(UniversalBaseModel):
workflow_run_id: str = pydantic.Field()
"""
The workflow run ID used for this action
"""
result: typing.Optional[typing.Optional[typing.Any]] = pydantic.Field(default=None)
"""
The result from the action (e.g., selector, value, extracted data)
"""
if IS_PYDANTIC_V2:
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
else:
class Config:
frozen = True
smart_union = True
extra = pydantic.Extra.allow

View File

@@ -0,0 +1,89 @@
# This file was auto-generated by Fern from our API Definition.
from __future__ import annotations
import typing
import pydantic
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
from .click_action_data import ClickActionData
from .extract_action_data import ExtractActionData
from .extract_action_extract_schema import ExtractActionExtractSchema
from .input_text_action_data import InputTextActionData
from .select_option_action_data import SelectOptionActionData
class SdkAction_AiClick(UniversalBaseModel):
type: typing.Literal["ai_click"] = "ai_click"
selector: typing.Optional[str] = None
intention: typing.Optional[str] = None
data: typing.Optional[ClickActionData] = None
timeout: typing.Optional[float] = None
if IS_PYDANTIC_V2:
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
else:
class Config:
frozen = True
smart_union = True
extra = pydantic.Extra.allow
class SdkAction_AiInputText(UniversalBaseModel):
type: typing.Literal["ai_input_text"] = "ai_input_text"
selector: typing.Optional[str] = None
value: typing.Optional[str] = None
intention: typing.Optional[str] = None
data: typing.Optional[InputTextActionData] = None
totp_identifier: typing.Optional[str] = None
totp_url: typing.Optional[str] = None
timeout: typing.Optional[float] = None
if IS_PYDANTIC_V2:
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
else:
class Config:
frozen = True
smart_union = True
extra = pydantic.Extra.allow
class SdkAction_AiSelectOption(UniversalBaseModel):
type: typing.Literal["ai_select_option"] = "ai_select_option"
selector: typing.Optional[str] = None
value: typing.Optional[str] = None
intention: typing.Optional[str] = None
data: typing.Optional[SelectOptionActionData] = None
timeout: typing.Optional[float] = None
if IS_PYDANTIC_V2:
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
else:
class Config:
frozen = True
smart_union = True
extra = pydantic.Extra.allow
class SdkAction_Extract(UniversalBaseModel):
type: typing.Literal["extract"] = "extract"
prompt: typing.Optional[str] = None
extract_schema: typing.Optional[ExtractActionExtractSchema] = None
error_code_mapping: typing.Optional[typing.Dict[str, typing.Optional[str]]] = None
intention: typing.Optional[str] = None
data: typing.Optional[ExtractActionData] = None
if IS_PYDANTIC_V2:
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
else:
class Config:
frozen = True
smart_union = True
extra = pydantic.Extra.allow
SdkAction = typing.Union[SdkAction_AiClick, SdkAction_AiInputText, SdkAction_AiSelectOption, SdkAction_Extract]

View File

@@ -0,0 +1,43 @@
# This file was auto-generated by Fern from our API Definition.
import typing
import pydantic
from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
from .select_option_action_data import SelectOptionActionData
class SelectOptionAction(UniversalBaseModel):
selector: typing.Optional[str] = pydantic.Field(default=None)
"""
CSS selector for the element
"""
value: typing.Optional[str] = pydantic.Field(default=None)
"""
Value to select
"""
intention: typing.Optional[str] = pydantic.Field(default=None)
"""
The intention or goal of the selection
"""
data: typing.Optional[SelectOptionActionData] = pydantic.Field(default=None)
"""
Additional context data
"""
timeout: typing.Optional[float] = pydantic.Field(default=None)
"""
Timeout in milliseconds
"""
if IS_PYDANTIC_V2:
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
else:
class Config:
frozen = True
smart_union = True
extra = pydantic.Extra.allow

View File

@@ -0,0 +1,5 @@
# This file was auto-generated by Fern from our API Definition.
import typing
SelectOptionActionData = typing.Union[str, typing.Dict[str, typing.Optional[typing.Any]]]

View File

@@ -0,0 +1,441 @@
from __future__ import annotations
import json
from datetime import datetime, timezone
from typing import Any
import structlog
from jinja2.sandbox import SandboxedEnvironment
from playwright.async_api import Page
from skyvern.config import settings
from skyvern.constants import SPECIAL_FIELD_VERIFICATION_CODE
from skyvern.core.script_generations.skyvern_page_ai import SkyvernPageAi
from skyvern.forge import app
from skyvern.forge.prompts import prompt_engine
from skyvern.forge.sdk.api.files import download_file
from skyvern.forge.sdk.core import skyvern_context
from skyvern.forge.sdk.schemas.totp_codes import OTPType
from skyvern.services.otp_service import poll_otp_value
from skyvern.utils.prompt_engine import load_prompt_with_elements
from skyvern.webeye.actions import handler_utils
from skyvern.webeye.actions.actions import (
ActionStatus,
InputTextAction,
)
from skyvern.webeye.actions.handler import (
handle_click_action,
handle_input_text_action,
handle_select_option_action,
)
from skyvern.webeye.actions.parse_actions import parse_actions
from skyvern.webeye.scraper.scraper import ScrapedPage
jinja_sandbox_env = SandboxedEnvironment()
LOG = structlog.get_logger()
SELECT_OPTION_GOAL = """- The intention to select an option: {intention}.
- The overall goal that the user wants to achieve: {prompt}."""
async def _get_element_id_by_selector(selector: str, page: Page) -> str | None:
locator = page.locator(selector)
element_id = await locator.get_attribute("unique_id")
return element_id
def _get_context_data(data: str | dict[str, Any] | None = None) -> dict[str, Any] | str | None:
context = skyvern_context.current()
global_context_data = context.script_run_parameters if context else None
if not data:
return global_context_data
result: dict[str, Any] | str | None
if isinstance(data, dict):
result = {k: v for k, v in data.items() if v}
if global_context_data:
result.update(global_context_data)
else:
global_context_data_str = json.dumps(global_context_data) if global_context_data else ""
result = f"{data}\n{global_context_data_str}"
return result
def _render_template_with_label(template: str, label: str | None = None) -> str:
template_data = {}
context = skyvern_context.current()
if context and context.workflow_run_id:
workflow_run_context = app.WORKFLOW_CONTEXT_MANAGER.get_workflow_run_context(context.workflow_run_id)
block_reference_data: dict[str, Any] = workflow_run_context.get_block_metadata(label)
template_data = workflow_run_context.values.copy()
if label in template_data:
current_value = template_data[label]
if isinstance(current_value, dict):
block_reference_data.update(current_value)
else:
LOG.warning(
f"Script service: Parameter {label} has a registered reference value, going to overwrite it by block metadata"
)
if label:
template_data[label] = block_reference_data
# inject the forloop metadata as global variables
if "current_index" in block_reference_data:
template_data["current_index"] = block_reference_data["current_index"]
if "current_item" in block_reference_data:
template_data["current_item"] = block_reference_data["current_item"]
if "current_value" in block_reference_data:
template_data["current_value"] = block_reference_data["current_value"]
try:
return render_template(template, data=template_data)
except Exception:
LOG.exception("Failed to render template", template=template, data=template_data)
return template
def render_template(template: str, data: dict[str, Any] | None = None) -> str:
"""
Refer to Block.format_block_parameter_template_from_workflow_run_context
TODO: complete this function so that block code shares the same template rendering logic
"""
template_data = data.copy() if data else {}
jinja_template = jinja_sandbox_env.from_string(template)
context = skyvern_context.current()
if context and context.workflow_run_id:
workflow_run_id = context.workflow_run_id
workflow_run_context = app.WORKFLOW_CONTEXT_MANAGER.get_workflow_run_context(workflow_run_id)
template_data.update(workflow_run_context.values)
if template in template_data:
return template_data[template]
return jinja_template.render(template_data)
class RealSkyvernPageAi(SkyvernPageAi):
def __init__(
self,
scraped_page: ScrapedPage,
page: Page,
):
self.scraped_page = scraped_page
self.page = page
self.current_label: str | None = None
async def ai_click(
self,
selector: str,
intention: str,
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str:
"""Click an element using AI to locate it based on intention."""
try:
# Build the element tree of the current page for the prompt
context = skyvern_context.ensure_context()
payload_str = _get_context_data(data)
refreshed_page = await self.scraped_page.generate_scraped_page_without_screenshots()
element_tree = refreshed_page.build_element_tree()
single_click_prompt = prompt_engine.load_prompt(
template="single-click-action",
navigation_goal=intention,
navigation_payload_str=payload_str,
current_url=self.page.url,
elements=element_tree,
local_datetime=datetime.now(context.tz_info or datetime.now().astimezone().tzinfo).isoformat(),
# user_context=getattr(context, "prompt", None),
)
json_response = await app.SINGLE_CLICK_AGENT_LLM_API_HANDLER(
prompt=single_click_prompt,
prompt_name="single-click-action",
organization_id=context.organization_id,
)
actions_json = json_response.get("actions", [])
if actions_json:
organization_id = context.organization_id if context else None
task_id = context.task_id if context else None
step_id = context.step_id if context else None
task = await app.DATABASE.get_task(task_id, organization_id) if task_id and organization_id else None
step = await app.DATABASE.get_step(step_id, organization_id) if step_id and organization_id else None
if organization_id and task and step:
actions = parse_actions(
task, step.step_id, step.order, self.scraped_page, json_response.get("actions", [])
)
action = actions[0]
result = await handle_click_action(action, self.page, self.scraped_page, task, step)
if result and result[-1].success is False:
raise Exception(result[-1].exception_message)
xpath = action.get_xpath()
selector = f"xpath={xpath}" if xpath else selector
return selector
except Exception:
LOG.exception(
f"Failed to do ai click. Falling back to original selector={selector}, intention={intention}, data={data}"
)
if selector:
locator = self.page.locator(selector)
await locator.click(timeout=timeout)
return selector
async def ai_input_text(
self,
selector: str,
value: str,
intention: str,
data: str | dict[str, Any] | None = None,
totp_identifier: str | None = None,
totp_url: str | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str:
"""Input text into an element using AI to determine the value."""
context = skyvern_context.current()
value = value or ""
transformed_value = value
element_id: str | None = None
organization_id = context.organization_id if context else None
task_id = context.task_id if context else None
step_id = context.step_id if context else None
workflow_run_id = context.workflow_run_id if context else None
task = await app.DATABASE.get_task(task_id, organization_id) if task_id and organization_id else None
step = await app.DATABASE.get_step(step_id, organization_id) if step_id and organization_id else None
if intention:
try:
prompt = context.prompt if context else None
data = data or {}
if (totp_identifier or totp_url) and context and organization_id and task_id:
if totp_identifier:
totp_identifier = _render_template_with_label(totp_identifier, label=self.current_label)
if totp_url:
totp_url = _render_template_with_label(totp_url, label=self.current_label)
otp_value = await poll_otp_value(
organization_id=organization_id,
task_id=task_id,
workflow_run_id=workflow_run_id,
totp_identifier=totp_identifier,
totp_verification_url=totp_url,
)
if otp_value and otp_value.get_otp_type() == OTPType.TOTP:
verification_code = otp_value.value
if isinstance(data, dict) and SPECIAL_FIELD_VERIFICATION_CODE not in data:
data[SPECIAL_FIELD_VERIFICATION_CODE] = verification_code
elif isinstance(data, str) and SPECIAL_FIELD_VERIFICATION_CODE not in data:
data = f"{data}\n" + str({SPECIAL_FIELD_VERIFICATION_CODE: verification_code})
elif isinstance(data, list):
data.append({SPECIAL_FIELD_VERIFICATION_CODE: verification_code})
else:
data = {SPECIAL_FIELD_VERIFICATION_CODE: verification_code}
refreshed_page = await self.scraped_page.generate_scraped_page_without_screenshots()
self.scraped_page = refreshed_page
# get the element_id by the selector
element_id = await _get_element_id_by_selector(selector, self.page)
script_generation_input_text_prompt = prompt_engine.load_prompt(
template="script-generation-input-text-generatiion",
intention=intention,
goal=prompt,
data=data,
)
json_response = await app.SINGLE_INPUT_AGENT_LLM_API_HANDLER(
prompt=script_generation_input_text_prompt,
prompt_name="script-generation-input-text-generatiion",
organization_id=organization_id,
)
value = json_response.get("answer", value)
except Exception:
LOG.exception(f"Failed to adapt value for input text action on selector={selector}, value={value}")
if context and context.workflow_run_id:
transformed_value = await _get_actual_value_of_parameter_if_secret(context.workflow_run_id, str(value))
if element_id and organization_id and task and step:
action = InputTextAction(
element_id=element_id,
text=value,
status=ActionStatus.pending,
organization_id=organization_id,
workflow_run_id=workflow_run_id,
task_id=task_id,
step_id=context.step_id if context else None,
reasoning=intention,
intention=intention,
response=value,
)
result = await handle_input_text_action(action, self.page, self.scraped_page, task, step)
if result and result[-1].success is False:
raise Exception(result[-1].exception_message)
else:
locator = self.page.locator(selector)
await handler_utils.input_sequentially(locator, transformed_value, timeout=timeout)
return value
async def ai_upload_file(
self,
selector: str,
files: str,
intention: str,
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str:
"""Upload a file using AI to process the file URL."""
if intention:
try:
context = skyvern_context.current()
prompt = context.prompt if context else None
data = _get_context_data(data)
script_generation_file_url_prompt = prompt_engine.load_prompt(
template="script-generation-file-url-generation",
intention=intention,
data=data,
goal=prompt,
)
json_response = await app.SINGLE_INPUT_AGENT_LLM_API_HANDLER(
prompt=script_generation_file_url_prompt,
prompt_name="script-generation-file-url-generation",
organization_id=context.organization_id if context else None,
)
files = json_response.get("answer", files)
except Exception:
LOG.exception(f"Failed to adapt value for input text action on selector={selector}, file={files}")
if not files:
raise ValueError("file url must be provided")
file_path = await download_file(files)
locator = self.page.locator(selector)
await locator.set_input_files(file_path, timeout=timeout)
return files
async def ai_select_option(
self,
selector: str,
value: str,
intention: str,
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str:
"""Select an option from a dropdown using AI."""
option_value = value or ""
context = skyvern_context.current()
if context and context.task_id and context.step_id and context.organization_id:
task = await app.DATABASE.get_task(context.task_id, organization_id=context.organization_id)
step = await app.DATABASE.get_step(context.step_id, organization_id=context.organization_id)
if intention and task and step:
try:
prompt = context.prompt if context else None
# data = _get_context_data(data)
data = data or {}
refreshed_page = await self.scraped_page.generate_scraped_page_without_screenshots()
self.scraped_page = refreshed_page
element_tree = refreshed_page.build_element_tree()
merged_goal = SELECT_OPTION_GOAL.format(intention=intention, prompt=prompt)
single_select_prompt = prompt_engine.load_prompt(
template="single-select-action",
navigation_payload_str=data,
navigation_goal=merged_goal,
current_url=self.page.url,
elements=element_tree,
local_datetime=datetime.now(context.tz_info or datetime.now().astimezone().tzinfo).isoformat(),
)
json_response = await app.SELECT_AGENT_LLM_API_HANDLER(
prompt=single_select_prompt,
prompt_name="single-select-action",
organization_id=context.organization_id if context else None,
)
actions = parse_actions(
task, step.step_id, step.order, self.scraped_page, json_response.get("actions", [])
)
if actions:
action = actions[0]
if not action.option:
raise ValueError("SelectOptionAction requires an 'option' field")
option_value = action.option.value or action.option.label or ""
await handle_select_option_action(
action=action,
page=self.page,
scraped_page=self.scraped_page,
task=task,
step=step,
)
else:
LOG.exception(
f"Failed to parse actions for select option action on selector={selector}, value={value}"
)
except Exception:
LOG.exception(
f"Failed to adapt value for select option action on selector={selector}, value={value}"
)
else:
locator = self.page.locator(selector)
await locator.select_option(option_value, timeout=timeout)
return option_value
async def ai_extract(
self,
prompt: str,
schema: dict[str, Any] | list | str | None = None,
error_code_mapping: dict[str, str] | None = None,
intention: str | None = None,
data: str | dict[str, Any] | None = None,
) -> dict[str, Any] | list | str | None:
"""Extract information from the page using AI."""
scraped_page_refreshed = await self.scraped_page.refresh()
context = skyvern_context.current()
tz_info = datetime.now(tz=timezone.utc).tzinfo
if context and context.tz_info:
tz_info = context.tz_info
prompt = _render_template_with_label(prompt, label=self.current_label)
extract_information_prompt = load_prompt_with_elements(
element_tree_builder=scraped_page_refreshed,
prompt_engine=prompt_engine,
template_name="extract-information",
html_need_skyvern_attrs=False,
data_extraction_goal=prompt,
extracted_information_schema=schema,
current_url=scraped_page_refreshed.url,
extracted_text=scraped_page_refreshed.extracted_text,
error_code_mapping_str=(json.dumps(error_code_mapping) if error_code_mapping else None),
local_datetime=datetime.now(tz_info).isoformat(),
)
step = None
if context and context.organization_id and context.task_id and context.step_id:
step = await app.DATABASE.get_step(
step_id=context.step_id,
organization_id=context.organization_id,
)
result = await app.EXTRACTION_LLM_API_HANDLER(
prompt=extract_information_prompt,
step=step,
screenshots=scraped_page_refreshed.screenshots,
prompt_name="extract-information",
)
if context and context.script_mode:
print(f"\n✨ 📊 Extracted Information:\n{'-' * 50}")
try:
# Pretty print JSON if result is a dict/list
if isinstance(result, (dict, list)):
print(json.dumps(result, indent=2, ensure_ascii=False))
else:
print(result)
except Exception:
print(result)
print(f"{'-' * 50}\n")
return result
async def _get_actual_value_of_parameter_if_secret(workflow_run_id: str, parameter: str) -> Any:
"""
Get the actual value of a parameter if it's a secret. If it's not a secret, return the parameter value as is.
Just return the parameter value if the task isn't a workflow's task.
This is only used for InputTextAction, UploadFileAction, and ClickAction (if it has a file_url).
"""
workflow_run_context = app.WORKFLOW_CONTEXT_MANAGER.get_workflow_run_context(workflow_run_id)
secret_value = workflow_run_context.get_original_secret_value_or_none(parameter)
return secret_value if secret_value is not None else parameter

View File

@@ -10,7 +10,8 @@ import structlog
from playwright.async_api import Page
from skyvern.config import settings
from skyvern.core.script_generations.skyvern_page_ai import SkyvernPageAi, render_template
from skyvern.core.script_generations.real_skyvern_page_ai import RealSkyvernPageAi, render_template
from skyvern.core.script_generations.skyvern_page_ai import SkyvernPageAi
from skyvern.exceptions import ScriptTerminationException, WorkflowRunNotFound
from skyvern.forge import app
from skyvern.forge.prompts import prompt_engine
@@ -123,7 +124,7 @@ class SkyvernPage:
) -> SkyvernPage:
scraped_page = await cls.create_scraped_page(browser_session_id=browser_session_id)
page = await scraped_page._browser_state.must_get_working_page()
ai = SkyvernPageAi(scraped_page, page)
ai = RealSkyvernPageAi(scraped_page, page)
return cls(scraped_page=scraped_page, page=page, ai=ai)
@classmethod

View File

@@ -1,126 +1,12 @@
from __future__ import annotations
import json
from datetime import datetime, timezone
from typing import Any
import structlog
from jinja2.sandbox import SandboxedEnvironment
from playwright.async_api import Page
from typing import Any, Protocol
from skyvern.config import settings
from skyvern.constants import SPECIAL_FIELD_VERIFICATION_CODE
from skyvern.forge import app
from skyvern.forge.prompts import prompt_engine
from skyvern.forge.sdk.api.files import download_file
from skyvern.forge.sdk.core import skyvern_context
from skyvern.forge.sdk.schemas.totp_codes import OTPType
from skyvern.services.otp_service import poll_otp_value
from skyvern.utils.prompt_engine import load_prompt_with_elements
from skyvern.webeye.actions import handler_utils
from skyvern.webeye.actions.actions import (
ActionStatus,
InputTextAction,
)
from skyvern.webeye.actions.handler import (
handle_click_action,
handle_input_text_action,
handle_select_option_action,
)
from skyvern.webeye.actions.parse_actions import parse_actions
from skyvern.webeye.scraper.scraper import ScrapedPage
jinja_sandbox_env = SandboxedEnvironment()
LOG = structlog.get_logger()
SELECT_OPTION_GOAL = """- The intention to select an option: {intention}.
- The overall goal that the user wants to achieve: {prompt}."""
async def _get_element_id_by_selector(selector: str, page: Page) -> str | None:
locator = page.locator(selector)
element_id = await locator.get_attribute("unique_id")
return element_id
def _get_context_data(data: str | dict[str, Any] | None = None) -> dict[str, Any] | str | None:
context = skyvern_context.current()
global_context_data = context.script_run_parameters if context else None
if not data:
return global_context_data
result: dict[str, Any] | str | None
if isinstance(data, dict):
result = {k: v for k, v in data.items() if v}
if global_context_data:
result.update(global_context_data)
else:
global_context_data_str = json.dumps(global_context_data) if global_context_data else ""
result = f"{data}\n{global_context_data_str}"
return result
def _render_template_with_label(template: str, label: str | None = None) -> str:
template_data = {}
context = skyvern_context.current()
if context and context.workflow_run_id:
workflow_run_context = app.WORKFLOW_CONTEXT_MANAGER.get_workflow_run_context(context.workflow_run_id)
block_reference_data: dict[str, Any] = workflow_run_context.get_block_metadata(label)
template_data = workflow_run_context.values.copy()
if label in template_data:
current_value = template_data[label]
if isinstance(current_value, dict):
block_reference_data.update(current_value)
else:
LOG.warning(
f"Script service: Parameter {label} has a registered reference value, going to overwrite it by block metadata"
)
if label:
template_data[label] = block_reference_data
# inject the forloop metadata as global variables
if "current_index" in block_reference_data:
template_data["current_index"] = block_reference_data["current_index"]
if "current_item" in block_reference_data:
template_data["current_item"] = block_reference_data["current_item"]
if "current_value" in block_reference_data:
template_data["current_value"] = block_reference_data["current_value"]
try:
return render_template(template, data=template_data)
except Exception:
LOG.exception("Failed to render template", template=template, data=template_data)
return template
def render_template(template: str, data: dict[str, Any] | None = None) -> str:
"""
Refer to Block.format_block_parameter_template_from_workflow_run_context
TODO: complete this function so that block code shares the same template rendering logic
"""
template_data = data.copy() if data else {}
jinja_template = jinja_sandbox_env.from_string(template)
context = skyvern_context.current()
if context and context.workflow_run_id:
workflow_run_id = context.workflow_run_id
workflow_run_context = app.WORKFLOW_CONTEXT_MANAGER.get_workflow_run_context(workflow_run_id)
template_data.update(workflow_run_context.values)
if template in template_data:
return template_data[template]
return jinja_template.render(template_data)
class SkyvernPageAi:
def __init__(
self,
scraped_page: ScrapedPage,
page: Page,
):
self.scraped_page = scraped_page
self.page = page
self.current_label: str | None = None
class SkyvernPageAi(Protocol):
"""Protocol defining the interface for AI-powered page interactions."""
async def ai_click(
self,
@@ -130,52 +16,7 @@ class SkyvernPageAi:
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str:
"""Click an element using AI to locate it based on intention."""
try:
# Build the element tree of the current page for the prompt
context = skyvern_context.ensure_context()
payload_str = _get_context_data(data)
refreshed_page = await self.scraped_page.generate_scraped_page_without_screenshots()
element_tree = refreshed_page.build_element_tree()
single_click_prompt = prompt_engine.load_prompt(
template="single-click-action",
navigation_goal=intention,
navigation_payload_str=payload_str,
current_url=self.page.url,
elements=element_tree,
local_datetime=datetime.now(context.tz_info or datetime.now().astimezone().tzinfo).isoformat(),
# user_context=getattr(context, "prompt", None),
)
json_response = await app.SINGLE_CLICK_AGENT_LLM_API_HANDLER(
prompt=single_click_prompt,
prompt_name="single-click-action",
organization_id=context.organization_id,
)
actions_json = json_response.get("actions", [])
if actions_json:
organization_id = context.organization_id if context else None
task_id = context.task_id if context else None
step_id = context.step_id if context else None
task = await app.DATABASE.get_task(task_id, organization_id) if task_id and organization_id else None
step = await app.DATABASE.get_step(step_id, organization_id) if step_id and organization_id else None
if organization_id and task and step:
actions = parse_actions(
task, step.step_id, step.order, self.scraped_page, json_response.get("actions", [])
)
action = actions[0]
result = await handle_click_action(action, self.page, self.scraped_page, task, step)
if result and result[-1].success is False:
raise Exception(result[-1].exception_message)
xpath = action.get_xpath()
selector = f"xpath={xpath}" if xpath else selector
return selector
except Exception:
LOG.exception(
f"Failed to do ai click. Falling back to original selector={selector}, intention={intention}, data={data}"
)
locator = self.page.locator(selector)
await locator.click(timeout=timeout)
return selector
...
async def ai_input_text(
self,
@@ -188,86 +29,7 @@ class SkyvernPageAi:
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str:
"""Input text into an element using AI to determine the value."""
context = skyvern_context.current()
value = value or ""
transformed_value = value
element_id: str | None = None
organization_id = context.organization_id if context else None
task_id = context.task_id if context else None
step_id = context.step_id if context else None
workflow_run_id = context.workflow_run_id if context else None
task = await app.DATABASE.get_task(task_id, organization_id) if task_id and organization_id else None
step = await app.DATABASE.get_step(step_id, organization_id) if step_id and organization_id else None
if intention:
try:
prompt = context.prompt if context else None
data = data or {}
if (totp_identifier or totp_url) and context and organization_id and task_id:
if totp_identifier:
totp_identifier = _render_template_with_label(totp_identifier, label=self.current_label)
if totp_url:
totp_url = _render_template_with_label(totp_url, label=self.current_label)
otp_value = await poll_otp_value(
organization_id=organization_id,
task_id=task_id,
workflow_run_id=workflow_run_id,
totp_identifier=totp_identifier,
totp_verification_url=totp_url,
)
if otp_value and otp_value.get_otp_type() == OTPType.TOTP:
verification_code = otp_value.value
if isinstance(data, dict) and SPECIAL_FIELD_VERIFICATION_CODE not in data:
data[SPECIAL_FIELD_VERIFICATION_CODE] = verification_code
elif isinstance(data, str) and SPECIAL_FIELD_VERIFICATION_CODE not in data:
data = f"{data}\n" + str({SPECIAL_FIELD_VERIFICATION_CODE: verification_code})
elif isinstance(data, list):
data.append({SPECIAL_FIELD_VERIFICATION_CODE: verification_code})
else:
data = {SPECIAL_FIELD_VERIFICATION_CODE: verification_code}
refreshed_page = await self.scraped_page.generate_scraped_page_without_screenshots()
self.scraped_page = refreshed_page
# get the element_id by the selector
element_id = await _get_element_id_by_selector(selector, self.page)
script_generation_input_text_prompt = prompt_engine.load_prompt(
template="script-generation-input-text-generatiion",
intention=intention,
goal=prompt,
data=data,
)
json_response = await app.SINGLE_INPUT_AGENT_LLM_API_HANDLER(
prompt=script_generation_input_text_prompt,
prompt_name="script-generation-input-text-generatiion",
organization_id=organization_id,
)
value = json_response.get("answer", value)
except Exception:
LOG.exception(f"Failed to adapt value for input text action on selector={selector}, value={value}")
if context and context.workflow_run_id:
transformed_value = await _get_actual_value_of_parameter_if_secret(context.workflow_run_id, str(value))
if element_id and organization_id and task and step:
action = InputTextAction(
element_id=element_id,
text=value,
status=ActionStatus.pending,
organization_id=organization_id,
workflow_run_id=workflow_run_id,
task_id=task_id,
step_id=context.step_id if context else None,
reasoning=intention,
intention=intention,
response=value,
)
result = await handle_input_text_action(action, self.page, self.scraped_page, task, step)
if result and result[-1].success is False:
raise Exception(result[-1].exception_message)
else:
locator = self.page.locator(selector)
await handler_utils.input_sequentially(locator, transformed_value, timeout=timeout)
return value
...
async def ai_upload_file(
self,
@@ -278,32 +40,7 @@ class SkyvernPageAi:
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str:
"""Upload a file using AI to process the file URL."""
if intention:
try:
context = skyvern_context.current()
prompt = context.prompt if context else None
data = _get_context_data(data)
script_generation_file_url_prompt = prompt_engine.load_prompt(
template="script-generation-file-url-generation",
intention=intention,
data=data,
goal=prompt,
)
json_response = await app.SINGLE_INPUT_AGENT_LLM_API_HANDLER(
prompt=script_generation_file_url_prompt,
prompt_name="script-generation-file-url-generation",
organization_id=context.organization_id if context else None,
)
files = json_response.get("answer", files)
except Exception:
LOG.exception(f"Failed to adapt value for input text action on selector={selector}, file={files}")
if not files:
raise ValueError("file url must be provided")
file_path = await download_file(files)
locator = self.page.locator(selector)
await locator.set_input_files(file_path, timeout=timeout)
return files
...
async def ai_select_option(
self,
@@ -314,61 +51,7 @@ class SkyvernPageAi:
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str:
"""Select an option from a dropdown using AI."""
option_value = value or ""
context = skyvern_context.current()
if context and context.task_id and context.step_id and context.organization_id:
task = await app.DATABASE.get_task(context.task_id, organization_id=context.organization_id)
step = await app.DATABASE.get_step(context.step_id, organization_id=context.organization_id)
if intention and task and step:
try:
prompt = context.prompt if context else None
# data = _get_context_data(data)
data = data or {}
refreshed_page = await self.scraped_page.generate_scraped_page_without_screenshots()
self.scraped_page = refreshed_page
element_tree = refreshed_page.build_element_tree()
merged_goal = SELECT_OPTION_GOAL.format(intention=intention, prompt=prompt)
single_select_prompt = prompt_engine.load_prompt(
template="single-select-action",
navigation_payload_str=data,
navigation_goal=merged_goal,
current_url=self.page.url,
elements=element_tree,
local_datetime=datetime.now(context.tz_info or datetime.now().astimezone().tzinfo).isoformat(),
)
json_response = await app.SELECT_AGENT_LLM_API_HANDLER(
prompt=single_select_prompt,
prompt_name="single-select-action",
organization_id=context.organization_id if context else None,
)
actions = parse_actions(
task, step.step_id, step.order, self.scraped_page, json_response.get("actions", [])
)
if actions:
action = actions[0]
if not action.option:
raise ValueError("SelectOptionAction requires an 'option' field")
option_value = action.option.value or action.option.label or ""
await handle_select_option_action(
action=action,
page=self.page,
scraped_page=self.scraped_page,
task=task,
step=step,
)
else:
LOG.exception(
f"Failed to parse actions for select option action on selector={selector}, value={value}"
)
except Exception:
LOG.exception(
f"Failed to adapt value for select option action on selector={selector}, value={value}"
)
else:
locator = self.page.locator(selector)
await locator.select_option(option_value, timeout=timeout)
return option_value
...
async def ai_extract(
self,
@@ -379,61 +62,4 @@ class SkyvernPageAi:
data: str | dict[str, Any] | None = None,
) -> dict[str, Any] | list | str | None:
"""Extract information from the page using AI."""
scraped_page_refreshed = await self.scraped_page.refresh()
context = skyvern_context.current()
tz_info = datetime.now(tz=timezone.utc).tzinfo
if context and context.tz_info:
tz_info = context.tz_info
prompt = _render_template_with_label(prompt, label=self.current_label)
extract_information_prompt = load_prompt_with_elements(
element_tree_builder=scraped_page_refreshed,
prompt_engine=prompt_engine,
template_name="extract-information",
html_need_skyvern_attrs=False,
data_extraction_goal=prompt,
extracted_information_schema=schema,
current_url=scraped_page_refreshed.url,
extracted_text=scraped_page_refreshed.extracted_text,
error_code_mapping_str=(json.dumps(error_code_mapping) if error_code_mapping else None),
local_datetime=datetime.now(tz_info).isoformat(),
)
step = None
if context and context.organization_id and context.task_id and context.step_id:
step = await app.DATABASE.get_step(
step_id=context.step_id,
organization_id=context.organization_id,
)
result = await app.EXTRACTION_LLM_API_HANDLER(
prompt=extract_information_prompt,
step=step,
screenshots=scraped_page_refreshed.screenshots,
prompt_name="extract-information",
)
if context and context.script_mode:
print(f"\n✨ 📊 Extracted Information:\n{'-' * 50}")
try:
# Pretty print JSON if result is a dict/list
if isinstance(result, (dict, list)):
print(json.dumps(result, indent=2, ensure_ascii=False))
else:
print(result)
except Exception:
print(result)
print(f"{'-' * 50}\n")
return result
async def _get_actual_value_of_parameter_if_secret(workflow_run_id: str, parameter: str) -> Any:
"""
Get the actual value of a parameter if it's a secret. If it's not a secret, return the parameter value as is.
Just return the parameter value if the task isn't a workflow's task.
This is only used for InputTextAction, UploadFileAction, and ClickAction (if it has a file_url).
"""
workflow_run_context = app.WORKFLOW_CONTEXT_MANAGER.get_workflow_run_context(workflow_run_id)
secret_value = workflow_run_context.get_original_secret_value_or_none(parameter)
return secret_value if secret_value is not None else parameter
...

View File

@@ -5,6 +5,7 @@ from skyvern.forge.sdk.routes import debug_sessions # noqa: F401
from skyvern.forge.sdk.routes import pylon # noqa: F401
from skyvern.forge.sdk.routes import run_blocks # noqa: F401
from skyvern.forge.sdk.routes import scripts # noqa: F401
from skyvern.forge.sdk.routes import sdk # noqa: F401
from skyvern.forge.sdk.routes import streaming # noqa: F401
from skyvern.forge.sdk.routes import streaming_messages # noqa: F401
from skyvern.forge.sdk.routes import streaming_vnc # noqa: F401

View File

@@ -0,0 +1,180 @@
import json
import structlog
from fastapi import Depends, HTTPException, status
from skyvern import SkyvernPage
from skyvern.core.script_generations.real_skyvern_page_ai import RealSkyvernPageAi
from skyvern.forge import app
from skyvern.forge.sdk.core import skyvern_context
from skyvern.forge.sdk.core.skyvern_context import SkyvernContext
from skyvern.forge.sdk.routes.routers import base_router
from skyvern.forge.sdk.schemas.organizations import Organization
from skyvern.forge.sdk.schemas.sdk_actions import (
RunSdkActionRequest,
RunSdkActionResponse,
)
from skyvern.forge.sdk.services import org_auth_service
from skyvern.forge.sdk.workflow.models.workflow import (
WorkflowRequestBody,
WorkflowRunStatus,
)
from skyvern.schemas.workflows import BlockType, WorkflowStatus
LOG = structlog.get_logger()
@base_router.post(
"/sdk/run_action",
response_model=RunSdkActionResponse,
summary="Run an SDK action",
description="Execute a single SDK action with the specified parameters",
tags=["SDK"],
openapi_extra={
"x-fern-sdk-method-name": "run_sdk_action",
},
)
@base_router.post("/sdk/run_action/", include_in_schema=False)
async def run_sdk_action(
action_request: RunSdkActionRequest,
organization: Organization = Depends(org_auth_service.get_current_org),
) -> RunSdkActionResponse:
"""Execute a single SDK action with the specified parameters."""
LOG.info(
"Running SDK action",
organization_id=organization.organization_id,
action_type=action_request.action.type,
)
organization_id = organization.organization_id
browser_session_id = action_request.browser_session_id
browser_address = action_request.browser_address
action = action_request.action
# Use existing workflow_run_id if provided, otherwise create a new one
if action_request.workflow_run_id:
workflow_run = await app.DATABASE.get_workflow_run(
workflow_run_id=action_request.workflow_run_id,
organization_id=organization_id,
)
if not workflow_run:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Workflow run {action_request.workflow_run_id} not found",
)
workflow = await app.DATABASE.get_workflow(
workflow_id=workflow_run.workflow_id,
organization_id=organization_id,
)
if not workflow:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Workflow {workflow_run.workflow_id} not found",
)
else:
workflow = await app.WORKFLOW_SERVICE.create_empty_workflow(
organization,
title="SDK Workflow",
status=WorkflowStatus.auto_generated,
)
workflow_run = await app.WORKFLOW_SERVICE.setup_workflow_run(
request_id=None,
workflow_request=WorkflowRequestBody(
browser_session_id=browser_session_id,
browser_address=browser_address,
),
workflow_permanent_id=workflow.workflow_permanent_id,
organization=organization,
version=None,
)
workflow_run = await app.DATABASE.update_workflow_run(
workflow_run_id=workflow_run.workflow_run_id,
status=WorkflowRunStatus.completed,
)
task = await app.DATABASE.create_task(
organization_id=organization_id,
url=action_request.url,
navigation_goal=None,
navigation_payload=None,
data_extraction_goal=None,
title=f"SDK Action Task: {action_request.action.type}",
workflow_run_id=workflow_run.workflow_run_id,
browser_session_id=browser_session_id,
browser_address=browser_address,
)
step = await app.DATABASE.create_step(
task.task_id,
order=0,
retry_index=0,
organization_id=organization.organization_id,
)
await app.DATABASE.create_workflow_run_block(
workflow_run_id=workflow_run.workflow_run_id,
organization_id=organization_id,
block_type=BlockType.ACTION,
task_id=task.task_id,
)
context = skyvern_context.ensure_context()
skyvern_context.set(
SkyvernContext(
request_id=context.request_id,
organization_id=task.organization_id,
task_id=task.task_id,
step_id=step.step_id,
browser_session_id=browser_session_id,
max_screenshot_scrolls=task.max_screenshot_scrolls,
workflow_id=workflow.workflow_id,
workflow_run_id=workflow_run.workflow_run_id,
)
)
result = None
try:
scraped_page = await SkyvernPage.create_scraped_page(browser_session_id=browser_session_id)
page = await scraped_page._browser_state.must_get_working_page()
page_ai = RealSkyvernPageAi(scraped_page, page)
if action.type == "ai_click":
result = await page_ai.ai_click(
selector=action.selector,
intention=action.intention,
data=action.data,
timeout=action.timeout,
)
elif action.type == "ai_input_text":
result = await page_ai.ai_input_text(
selector=action.selector,
value=action.value,
intention=action.intention,
data=action.data,
totp_identifier=action.totp_identifier,
totp_url=action.totp_url,
timeout=action.timeout,
)
elif action.type == "ai_select_option":
result = await page_ai.ai_select_option(
selector=action.selector,
value=action.value,
intention=action.intention,
data=action.data,
timeout=action.timeout,
)
elif action.type == "extract":
extract_result = await page_ai.ai_extract(
prompt=action.prompt,
schema=action.extract_schema,
error_code_mapping=action.error_code_mapping,
intention=action.intention,
data=action.data,
)
result = json.dumps(extract_result)
finally:
skyvern_context.reset()
return RunSdkActionResponse(
workflow_run_id=workflow_run.workflow_run_id,
result=result,
)

View File

@@ -0,0 +1,100 @@
from enum import Enum
from typing import Annotated, Any, Literal, Union
from pydantic import BaseModel, Field
from skyvern.config import settings
class SdkActionType(str, Enum):
"""Enum for SDK action types that can be executed."""
AI_CLICK = "ai_click"
AI_INPUT_TEXT = "ai_input_text"
AI_SELECT_OPTION = "ai_select_option"
EXTRACT = "extract"
# Base action class
class SdkActionBase(BaseModel):
"""Base class for SDK actions."""
type: str = Field(..., description="The type of action")
# Specific action types
class ClickAction(SdkActionBase):
"""Click action parameters."""
type: Literal["ai_click"] = "ai_click"
selector: str = Field(default="", description="CSS selector for the element")
intention: str = Field(default="", description="The intention or goal of the click")
data: str | dict[str, Any] | None = Field(None, description="Additional context data")
timeout: float = Field(default=settings.BROWSER_ACTION_TIMEOUT_MS, description="Timeout in milliseconds")
class InputTextAction(SdkActionBase):
"""Input text action parameters."""
type: Literal["ai_input_text"] = "ai_input_text"
selector: str = Field(default="", description="CSS selector for the element")
value: str = Field(default="", description="Value to input")
intention: str = Field(default="", description="The intention or goal of the input")
data: str | dict[str, Any] | None = Field(None, description="Additional context data")
totp_identifier: str | None = Field(None, description="TOTP identifier for input_text actions")
totp_url: str | None = Field(None, description="TOTP URL for input_text actions")
timeout: float = Field(default=settings.BROWSER_ACTION_TIMEOUT_MS, description="Timeout in milliseconds")
class SelectOptionAction(SdkActionBase):
"""Select option action parameters."""
type: Literal["ai_select_option"] = "ai_select_option"
selector: str = Field(default="", description="CSS selector for the element")
value: str = Field(default="", description="Value to select")
intention: str = Field(default="", description="The intention or goal of the selection")
data: str | dict[str, Any] | None = Field(None, description="Additional context data")
timeout: float = Field(default=settings.BROWSER_ACTION_TIMEOUT_MS, description="Timeout in milliseconds")
class ExtractAction(SdkActionBase):
"""Extract data action parameters."""
type: Literal["extract"] = "extract"
prompt: str = Field(default="", description="Extraction prompt")
extract_schema: dict[str, Any] | list | str | None = Field(None, description="Schema for extraction")
error_code_mapping: dict[str, str] | None = Field(None, description="Error code mapping for extraction")
intention: str | None = Field(None, description="The intention or goal of the extraction")
data: str | dict[str, Any] | None = Field(None, description="Additional context data")
# Discriminated union of all action types
SdkAction = Annotated[
Union[ClickAction, InputTextAction, SelectOptionAction, ExtractAction],
Field(discriminator="type"),
]
class RunActionResponse(BaseModel):
"""Response from running an action."""
workflow_run_id: str = Field(..., description="The workflow run ID used for this action")
class RunSdkActionRequest(BaseModel):
"""Request to run a single SDK action."""
url: str = Field(..., description="The URL where the action should be executed")
browser_session_id: str | None = Field(None, description="The browser session ID")
browser_address: str | None = Field(None, description="The browser address")
workflow_run_id: str | None = Field(
None, description="Optional workflow run ID to continue an existing workflow run"
)
action: SdkAction = Field(..., description="The action to execute with its specific parameters")
class RunSdkActionResponse(BaseModel):
"""Response from running an SDK action."""
workflow_run_id: str = Field(..., description="The workflow run ID used for this action")
result: Any | None = Field(None, description="The result from the action (e.g., selector, value, extracted data)")

View File

@@ -0,0 +1,144 @@
from typing import TYPE_CHECKING, Any
from playwright.async_api import Page
from skyvern.config import settings
from skyvern.core.script_generations.skyvern_page_ai import SkyvernPageAi
from skyvern.forge.sdk.schemas.sdk_actions import (
ClickAction,
ExtractAction,
InputTextAction,
SelectOptionAction,
)
if TYPE_CHECKING:
from skyvern.library.skyvern_browser import SkyvernBrowser
class SdkSkyvernPageAi(SkyvernPageAi):
"""Implementation of SkyvernPageAi that makes API calls to the server."""
def __init__(
self,
browser: "SkyvernBrowser",
page: Page,
):
self._browser = browser
self._page = page
async def ai_click(
self,
selector: str,
intention: str,
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str:
"""Click an element using AI via API call."""
action = ClickAction(
selector=selector,
intention=intention,
data=data,
timeout=timeout,
)
response = await self._browser.client.run_sdk_action(
url=self._page.url,
browser_session_id=self._browser.browser_session_id,
browser_address=self._browser.browser_address,
workflow_run_id=self._browser.workflow_run_id,
action=action,
)
self._browser.workflow_run_id = response.workflow_run_id
return response.result if response.result else selector
async def ai_input_text(
self,
selector: str,
value: str,
intention: str,
data: str | dict[str, Any] | None = None,
totp_identifier: str | None = None,
totp_url: str | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str:
"""Input text into an element using AI via API call."""
response = await self._browser.client.run_sdk_action(
url=self._page.url,
action=InputTextAction(
selector=selector,
value=value,
intention=intention,
data=data,
totp_identifier=totp_identifier,
totp_url=totp_url,
timeout=timeout,
),
browser_session_id=self._browser.browser_session_id,
browser_address=self._browser.browser_address,
workflow_run_id=self._browser.workflow_run_id,
)
self._browser.workflow_run_id = response.workflow_run_id
return response.result if response.result else value
async def ai_select_option(
self,
selector: str,
value: str,
intention: str,
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str:
"""Select an option from a dropdown using AI via API call."""
response = await self._browser.client.run_sdk_action(
url=self._page.url,
action=SelectOptionAction(
selector=selector,
value=value,
intention=intention,
data=data,
timeout=timeout,
),
browser_session_id=self._browser.browser_session_id,
browser_address=self._browser.browser_address,
workflow_run_id=self._browser.workflow_run_id,
)
self._browser.workflow_run_id = response.workflow_run_id
return response.result if response.result else value
async def ai_upload_file(
self,
selector: str,
files: str,
intention: str,
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str:
raise NotImplementedError("Upload is not supported yet")
async def ai_extract(
self,
prompt: str,
schema: dict[str, Any] | list | str | None = None,
error_code_mapping: dict[str, str] | None = None,
intention: str | None = None,
data: str | dict[str, Any] | None = None,
) -> dict[str, Any] | list | str | None:
"""Extract information from the page using AI via API call."""
response = await self._browser.client.run_sdk_action(
url=self._page.url,
action=ExtractAction(
prompt=prompt,
extract_schema=schema,
error_code_mapping=error_code_mapping,
intention=intention,
data=data,
),
browser_session_id=self._browser.browser_session_id,
browser_address=self._browser.browser_address,
workflow_run_id=self._browser.workflow_run_id,
)
self._browser.workflow_run_id = response.workflow_run_id
return response.result if response.result else None

View File

@@ -1,7 +1,7 @@
from playwright.async_api import BrowserContext, Page
from skyvern.client import AsyncSkyvern
from skyvern.library.skyvern_browser_page import SkyvernBrowserPage, SkyvernPageRun
from skyvern.library.skyvern_browser_page import SkyvernBrowserPage
class SkyvernBrowser:
@@ -44,6 +44,20 @@ class SkyvernBrowser:
self._browser_address = browser_address
self._client = client
self.workflow_run_id: None | str = None
@property
def browser_session_id(self) -> str | None:
return self._browser_session_id
@property
def browser_address(self) -> str | None:
return self._browser_address
@property
def client(self) -> AsyncSkyvern:
return self._client
async def get_working_page(self) -> SkyvernBrowserPage:
"""Get the most recent page or create a new one if none exists.
@@ -73,5 +87,4 @@ class SkyvernBrowser:
return await self._create_skyvern_page(page)
async def _create_skyvern_page(self, page: Page) -> SkyvernBrowserPage:
page_ai = SkyvernPageRun(page, self._browser_session_id, self._browser_address, self._client)
return SkyvernBrowserPage(page, page_ai)
return SkyvernBrowserPage(self, page)

View File

@@ -1,11 +1,18 @@
import asyncio
from typing import Any
from typing import TYPE_CHECKING, Any
from playwright.async_api import Page
from skyvern.client import AsyncSkyvern, GetRunResponse
from skyvern.client import GetRunResponse
from skyvern.client.types.workflow_run_response import WorkflowRunResponse
from skyvern.config import settings
from skyvern.library.constants import DEFAULT_AGENT_HEARTBEAT_INTERVAL, DEFAULT_AGENT_TIMEOUT
from skyvern.library.SdkSkyvernPageAi import SdkSkyvernPageAi
from skyvern.webeye.actions import handler_utils
if TYPE_CHECKING:
from skyvern.library.skyvern_browser import SkyvernBrowser
from skyvern.schemas.run_blocks import CredentialType
from skyvern.schemas.runs import RunEngine, RunStatus, TaskRunResponse
@@ -18,13 +25,9 @@ class SkyvernPageRun:
and pre-defined workflows with automatic waiting for completion.
"""
def __init__(
self, page: Page, browser_session_id: str | None, browser_address: str | None, client: AsyncSkyvern
) -> None:
def __init__(self, browser: "SkyvernBrowser", page: Page) -> None:
self._browser = browser
self._page = page
self._browser_session_id = browser_session_id
self._browser_address = browser_address
self._client = client
async def run_task(
self,
@@ -63,7 +66,7 @@ class SkyvernPageRun:
TaskRunResponse containing the task execution results.
"""
task_run = await self._client.run_task(
task_run = await self._browser.client.run_task(
prompt=prompt,
engine=engine,
model=model,
@@ -75,8 +78,8 @@ class SkyvernPageRun:
error_code_mapping=error_code_mapping,
data_extraction_schema=data_extraction_schema,
max_steps=max_steps,
browser_session_id=self._browser_session_id,
browser_address=self._browser_address,
browser_session_id=self._browser.browser_session_id,
browser_address=self._browser.browser_address,
user_agent=user_agent,
)
@@ -121,7 +124,7 @@ class SkyvernPageRun:
WorkflowRunResponse containing the login workflow execution results.
"""
workflow_run = await self._client.login(
workflow_run = await self._browser.client.login(
credential_type=credential_type,
url=url or self._get_page_url(),
credential_id=credential_id,
@@ -133,8 +136,8 @@ class SkyvernPageRun:
webhook_url=webhook_url,
totp_identifier=totp_identifier,
totp_url=totp_url,
browser_session_id=self._browser_session_id,
browser_address=self._browser_address,
browser_session_id=self._browser.browser_session_id,
browser_address=self._browser.browser_address,
extra_http_headers=extra_http_headers,
)
@@ -167,7 +170,7 @@ class SkyvernPageRun:
Returns:
WorkflowRunResponse containing the workflow execution results.
"""
workflow_run = await self._client.run_workflow(
workflow_run = await self._browser.client.run_workflow(
workflow_id=workflow_id,
parameters=parameters,
template=template,
@@ -175,8 +178,8 @@ class SkyvernPageRun:
webhook_url=webhook_url,
totp_url=totp_url,
totp_identifier=totp_identifier,
browser_session_id=self._browser_session_id,
browser_address=self._browser_address,
browser_session_id=self._browser.browser_session_id,
browser_address=self._browser.browser_address,
)
workflow_run = await self._wait_for_run_completion(workflow_run.run_id, timeout)
@@ -185,7 +188,7 @@ class SkyvernPageRun:
async def _wait_for_run_completion(self, run_id: str, timeout: float) -> GetRunResponse:
async with asyncio.timeout(timeout):
while True:
task_run = await self._client.get_run(run_id)
task_run = await self._browser.client.get_run(run_id)
if RunStatus(task_run.status).is_final():
break
await asyncio.sleep(DEFAULT_AGENT_HEARTBEAT_INTERVAL)
@@ -221,28 +224,150 @@ class SkyvernBrowserPage:
run: SkyvernPageRun instance for executing AI-powered tasks and workflows.
"""
def __init__(self, page: Page, run: SkyvernPageRun):
self.run = run
self._playwright_page = page
def __init__(self, browser: "SkyvernBrowser", page: Page):
self._browser = browser
self._page = page
self._ai = SdkSkyvernPageAi(browser, page)
self.run = SkyvernPageRun(browser, page)
async def click(self, selector: str, **kwargs: Any) -> None:
"""Click an element matching the selector.
async def click(
self,
*,
selector: str | None = None,
intention: str | None = None,
ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str | None:
"""Click an element identified by ``selector``.
Args:
selector: A selector to search for an element to click.
**kwargs: Additional options like timeout, force, position, etc.
When ``intention`` and ``data`` are provided a new click action is
generated via the ``single-click-action`` prompt. The model returns a
fresh "xpath=..." selector based on the current DOM and the updated data for this run.
The browser then clicks the element using this newly generated xpath selector.
If the prompt generation or parsing fails for any reason we fall back to
clicking the originally supplied ``selector``.
"""
await self._playwright_page.click(selector, **kwargs)
async def fill(self, selector: str, value: str, **kwargs: Any) -> None:
"""Fill an input field with the given value.
if ai == "fallback":
# try to click the element with the original selector first
error_to_raise = None
if selector:
try:
locator = self._page.locator(selector)
await locator.click(timeout=timeout)
return selector
except Exception as e:
error_to_raise = e
Args:
selector: A selector to search for an element to fill.
value: Value to fill for the input field.
**kwargs: Additional options like timeout, force, no_wait_after, etc.
# if the original selector doesn't work, try to click the element with the ai generated selector
if intention:
return await self._ai.ai_click(
selector=selector or "",
intention=intention,
data=data,
timeout=timeout,
)
if error_to_raise:
raise error_to_raise
else:
return selector
elif ai == "proactive":
if intention:
return await self._ai.ai_click(
selector=selector or "",
intention=intention,
data=data,
timeout=timeout,
)
if selector:
locator = self._page.locator(selector)
await locator.click(timeout=timeout)
return selector
async def _input_text(
self,
selector: str,
value: str,
ai: str | None = "fallback",
intention: str | None = None,
data: str | dict[str, Any] | None = None,
totp_identifier: str | None = None,
totp_url: str | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str:
"""Input text into an element identified by ``selector``.
When ``intention`` and ``data`` are provided a new input text action is
generated via the `script-generation-input-text-generation` prompt. The model returns a
fresh text based on the current DOM and the updated data for this run.
The browser then inputs the text using this newly generated text.
If the prompt generation or parsing fails for any reason we fall back to
inputting the originally supplied ``value``.
"""
await self._playwright_page.fill(selector, value, **kwargs)
# format the text with the actual value of the parameter if it's a secret when running a workflow
if ai == "fallback":
error_to_raise = None
try:
locator = self._page.locator(selector)
await handler_utils.input_sequentially(locator, value, timeout=timeout)
return value
except Exception as e:
error_to_raise = e
if intention:
return await self._ai.ai_input_text(
selector=selector,
value=value,
intention=intention,
data=data,
totp_identifier=totp_identifier,
totp_url=totp_url,
timeout=timeout,
)
if error_to_raise:
raise error_to_raise
else:
return value
elif ai == "proactive" and intention:
return await self._ai.ai_input_text(
selector=selector,
value=value,
intention=intention,
data=data,
totp_identifier=totp_identifier,
totp_url=totp_url,
timeout=timeout,
)
locator = self._page.locator(selector)
await handler_utils.input_sequentially(locator, value, timeout=timeout)
return value
async def fill(
self,
selector: str,
value: str,
ai: str | None = "fallback",
intention: str | None = None,
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
totp_identifier: str | None = None,
totp_url: str | None = None,
) -> str:
return await self._input_text(
selector=selector,
value=value,
ai=ai,
intention=intention,
data=data,
timeout=timeout,
totp_identifier=totp_identifier,
totp_url=totp_url,
)
async def goto(self, url: str, **kwargs: Any) -> None:
"""Navigate to the given URL.
@@ -251,7 +376,7 @@ class SkyvernBrowserPage:
url: URL to navigate page to.
**kwargs: Additional options like timeout, wait_until, referer, etc.
"""
await self._playwright_page.goto(url, **kwargs)
await self._page.goto(url, **kwargs)
async def type(self, selector: str, text: str, **kwargs: Any) -> None:
"""Type text into an element character by character.
@@ -261,7 +386,7 @@ class SkyvernBrowserPage:
text: Text to type into the element.
**kwargs: Additional options like delay, timeout, no_wait_after, etc.
"""
await self._playwright_page.type(selector, text, **kwargs)
await self._page.type(selector, text, **kwargs)
async def select_option(self, selector: str, value: Any = None, **kwargs: Any) -> list[str]:
"""Select option(s) in a <select> element.
@@ -274,7 +399,7 @@ class SkyvernBrowserPage:
Returns:
List of option values that have been successfully selected.
"""
return await self._playwright_page.select_option(selector, value, **kwargs)
return await self._page.select_option(selector, value, **kwargs)
async def reload(self, **kwargs: Any) -> None:
"""Reload the current page.
@@ -282,7 +407,7 @@ class SkyvernBrowserPage:
Args:
**kwargs: Additional options like timeout, wait_until, etc.
"""
await self._playwright_page.reload(**kwargs)
await self._page.reload(**kwargs)
async def screenshot(self, **kwargs: Any) -> bytes:
"""Take a screenshot of the page.
@@ -293,4 +418,4 @@ class SkyvernBrowserPage:
Returns:
bytes: The screenshot as bytes (unless path is specified, then saves to file).
"""
return await self._playwright_page.screenshot(**kwargs)
return await self._page.screenshot(**kwargs)