Files
Dorod-Sky/skyvern/forge/sdk/schemas/sdk_actions.py

170 lines
6.6 KiB
Python
Raw Normal View History

from enum import Enum
from typing import Annotated, Any, Literal, Union
from pydantic import BaseModel, Field
from skyvern.config import settings
class SdkActionType(str, Enum):
"""Enum for SDK action types that can be executed."""
AI_CLICK = "ai_click"
AI_INPUT_TEXT = "ai_input_text"
AI_SELECT_OPTION = "ai_select_option"
AI_UPLOAD_FILE = "ai_upload_file"
2025-11-04 11:28:43 -07:00
AI_ACT = "ai_act"
EXTRACT = "extract"
# Base action class
class SdkActionBase(BaseModel):
"""Base class for SDK actions."""
type: str = Field(..., description="The type of action")
def get_navigation_goal(self) -> str | None:
return None
def get_navigation_payload(self) -> dict[str, Any] | None:
return None
# Specific action types
class ClickAction(SdkActionBase):
"""Click action parameters."""
type: Literal["ai_click"] = "ai_click"
selector: str | None = Field(default="", description="CSS selector for the element")
intention: str = Field(default="", description="The intention or goal of the click")
data: str | dict[str, Any] | None = Field(None, description="Additional context data")
timeout: float = Field(default=settings.BROWSER_ACTION_TIMEOUT_MS, description="Timeout in milliseconds")
def get_navigation_goal(self) -> str | None:
return self.intention
def get_navigation_payload(self) -> dict[str, Any] | None:
return self.data if isinstance(self.data, dict) else None
class InputTextAction(SdkActionBase):
"""Input text action parameters."""
type: Literal["ai_input_text"] = "ai_input_text"
selector: str | None = Field(default="", description="CSS selector for the element")
value: str | None = Field(default="", description="Value to input")
intention: str = Field(default="", description="The intention or goal of the input")
data: str | dict[str, Any] | None = Field(None, description="Additional context data")
totp_identifier: str | None = Field(None, description="TOTP identifier for input_text actions")
totp_url: str | None = Field(None, description="TOTP URL for input_text actions")
timeout: float = Field(default=settings.BROWSER_ACTION_TIMEOUT_MS, description="Timeout in milliseconds")
def get_navigation_goal(self) -> str | None:
return self.intention
def get_navigation_payload(self) -> dict[str, Any] | None:
return self.data if isinstance(self.data, dict) else None
class SelectOptionAction(SdkActionBase):
"""Select option action parameters."""
type: Literal["ai_select_option"] = "ai_select_option"
selector: str | None = Field(default="", description="CSS selector for the element")
value: str | None = Field(default="", description="Value to select")
intention: str = Field(default="", description="The intention or goal of the selection")
data: str | dict[str, Any] | None = Field(None, description="Additional context data")
timeout: float = Field(default=settings.BROWSER_ACTION_TIMEOUT_MS, description="Timeout in milliseconds")
def get_navigation_goal(self) -> str | None:
return self.intention
def get_navigation_payload(self) -> dict[str, Any] | None:
return self.data if isinstance(self.data, dict) else None
class UploadFileAction(SdkActionBase):
"""Upload file action parameters."""
type: Literal["ai_upload_file"] = "ai_upload_file"
selector: str | None = Field(default="", description="CSS selector for the element")
file_url: str | None = Field(default="", description="File URL for upload")
intention: str = Field(default="", description="The intention or goal of the upload")
data: str | dict[str, Any] | None = Field(None, description="Additional context data")
timeout: float = Field(default=settings.BROWSER_ACTION_TIMEOUT_MS, description="Timeout in milliseconds")
def get_navigation_goal(self) -> str | None:
return self.intention
def get_navigation_payload(self) -> dict[str, Any] | None:
if self.data and not isinstance(self.data, dict):
return None
data = self.data or {}
if "files" not in data:
data["files"] = self.file_url
return data
2025-11-04 11:28:43 -07:00
class ActAction(SdkActionBase):
"""AI act action parameters."""
type: Literal["ai_act"] = "ai_act"
intention: str = Field(default="", description="Natural language prompt for the action")
data: str | dict[str, Any] | None = Field(None, description="Additional context data")
def get_navigation_goal(self) -> str | None:
return self.intention
def get_navigation_payload(self) -> dict[str, Any] | None:
return self.data if isinstance(self.data, dict) else None
2025-11-04 11:28:43 -07:00
class ExtractAction(SdkActionBase):
"""Extract data action parameters."""
type: Literal["extract"] = "extract"
prompt: str = Field(default="", description="Extraction prompt")
extract_schema: dict[str, Any] | list | str | None = Field(None, description="Schema for extraction")
error_code_mapping: dict[str, str] | None = Field(None, description="Error code mapping for extraction")
intention: str | None = Field(None, description="The intention or goal of the extraction")
data: str | dict[str, Any] | None = Field(None, description="Additional context data")
def get_navigation_goal(self) -> str | None:
return self.intention
def get_navigation_payload(self) -> dict[str, Any] | None:
return self.data if isinstance(self.data, dict) else None
# Discriminated union of all action types
SdkAction = Annotated[
Union[ClickAction, InputTextAction, SelectOptionAction, UploadFileAction, ActAction, ExtractAction],
Field(discriminator="type"),
]
class RunActionResponse(BaseModel):
"""Response from running an action."""
workflow_run_id: str = Field(..., description="The workflow run ID used for this action")
class RunSdkActionRequest(BaseModel):
"""Request to run a single SDK action."""
url: str = Field(..., description="The URL where the action should be executed")
browser_session_id: str | None = Field(None, description="The browser session ID")
browser_address: str | None = Field(None, description="The browser address")
workflow_run_id: str | None = Field(
None, description="Optional workflow run ID to continue an existing workflow run"
)
action: SdkAction = Field(..., description="The action to execute with its specific parameters")
class RunSdkActionResponse(BaseModel):
"""Response from running an SDK action."""
workflow_run_id: str = Field(..., description="The workflow run ID used for this action")
result: Any | None = Field(None, description="The result from the action (e.g., selector, value, extracted data)")