Files
Dorod-Sky/skyvern/webeye/actions/actions.py
2025-03-06 12:18:42 -08:00

268 lines
8.5 KiB
Python

from datetime import datetime
from enum import StrEnum
from typing import Annotated, Any, Type, TypeVar
import structlog
from litellm import ConfigDict
from pydantic import BaseModel, Field
LOG = structlog.get_logger()
T = TypeVar("T", bound="Action")
class ActionType(StrEnum):
CLICK = "click"
INPUT_TEXT = "input_text"
UPLOAD_FILE = "upload_file"
# This action is not used in the current implementation. Click actions are used instead."
DOWNLOAD_FILE = "download_file"
SELECT_OPTION = "select_option"
CHECKBOX = "checkbox"
WAIT = "wait"
NULL_ACTION = "null_action"
SOLVE_CAPTCHA = "solve_captcha"
TERMINATE = "terminate"
COMPLETE = "complete"
RELOAD_PAGE = "reload_page"
EXTRACT = "extract"
def is_web_action(self) -> bool:
return self in [
ActionType.CLICK,
ActionType.INPUT_TEXT,
ActionType.UPLOAD_FILE,
ActionType.DOWNLOAD_FILE,
ActionType.SELECT_OPTION,
ActionType.CHECKBOX,
]
class ActionStatus(StrEnum):
pending = "pending"
skipped = "skipped"
failed = "failed"
completed = "completed"
class UserDefinedError(BaseModel):
error_code: str
reasoning: str
confidence_float: float = Field(..., ge=0, le=1)
def __repr__(self) -> str:
return f"{self.reasoning}(error_code={self.error_code}, confidence_float={self.confidence_float})"
class SelectOption(BaseModel):
label: str | None = None
value: str | None = None
index: int | None = None
def __repr__(self) -> str:
return f"SelectOption(label={self.label}, value={self.value}, index={self.index})"
class CompleteVerifyResult(BaseModel):
user_goal_achieved: bool
thoughts: str
page_info: str | None = None
def __repr__(self) -> str:
return f"CompleteVerifyResponse(thoughts={self.thoughts}, user_goal_achieved={self.user_goal_achieved}, page_info={self.page_info})"
class InputOrSelectContext(BaseModel):
intention: str | None = None
field: str | None = None
is_required: bool | None = None
is_search_bar: bool | None = None # don't trigger custom-selection logic when it's a search bar
is_location_input: bool | None = None # address input usually requires auto completion
is_date_related: bool | None = None # date picker mini agent requires some special logic
def __repr__(self) -> str:
return f"InputOrSelectContext(field={self.field}, is_required={self.is_required}, is_search_bar={self.is_search_bar}, is_location_input={self.is_location_input}, intention={self.intention})"
class Action(BaseModel):
model_config = ConfigDict(from_attributes=True)
action_type: ActionType
status: ActionStatus = ActionStatus.pending
action_id: str | None = None
source_action_id: str | None = None
organization_id: str | None = None
workflow_run_id: str | None = None
task_id: str | None = None
step_id: str | None = None
step_order: int | None = None
action_order: int | None = None
confidence_float: float | None = None
description: str | None = None
reasoning: str | None = None
intention: str | None = None
response: str | None = None
element_id: Annotated[str, Field(coerce_numbers_to_str=True)] | None = None
skyvern_element_hash: str | None = None
skyvern_element_data: dict[str, Any] | None = None
# DecisiveAction (CompleteAction, TerminateAction) fields
errors: list[UserDefinedError] | None = None
data_extraction_goal: str | None = None
# WebAction fields
file_name: str | None = None
file_url: str | None = None
download: bool | None = None
is_upload_file_tag: bool | None = None
text: str | None = None
option: SelectOption | None = None
is_checked: bool | None = None
verified: bool = False
created_at: datetime | None = None
modified_at: datetime | None = None
@classmethod
def validate(cls: Type[T], value: Any) -> T:
if isinstance(value, dict):
action_type = value["action_type"]
if action_type is ActionType.CLICK:
return ClickAction.model_validate(value)
elif action_type is ActionType.INPUT_TEXT:
return InputTextAction.model_validate(value)
elif action_type is ActionType.UPLOAD_FILE:
return UploadFileAction.model_validate(value)
elif action_type is ActionType.DOWNLOAD_FILE:
return DownloadFileAction.model_validate(value)
elif action_type is ActionType.NULL_ACTION:
return NullAction.model_validate(value)
elif action_type is ActionType.TERMINATE:
return TerminateAction.model_validate(value)
elif action_type is ActionType.COMPLETE:
return CompleteAction.model_validate(value)
elif action_type is ActionType.SELECT_OPTION:
return SelectOptionAction.model_validate(value)
elif action_type is ActionType.CHECKBOX:
return CheckboxAction.model_validate(value)
elif action_type is ActionType.WAIT:
return WaitAction.model_validate(value)
elif action_type is ActionType.SOLVE_CAPTCHA:
return SolveCaptchaAction.model_validate(value)
elif action_type is ActionType.RELOAD_PAGE:
return ReloadPageAction.model_validate(value)
else:
raise ValueError(f"Unsupported action type: {action_type}")
else:
raise ValueError("Invalid action data")
class WebAction(Action):
element_id: Annotated[str, Field(coerce_numbers_to_str=True)]
class DecisiveAction(Action):
errors: list[UserDefinedError] = []
# TODO: consider to implement this as a WebAction in the future
class ReloadPageAction(Action):
action_type: ActionType = ActionType.RELOAD_PAGE
class ClickAction(WebAction):
action_type: ActionType = ActionType.CLICK
file_url: str | None = None
download: bool = False
def __repr__(self) -> str:
return f"ClickAction(element_id={self.element_id}, file_url={self.file_url}, download={self.download})"
class InputTextAction(WebAction):
action_type: ActionType = ActionType.INPUT_TEXT
text: str
def __repr__(self) -> str:
return f"InputTextAction(element_id={self.element_id}, text={self.text})"
class UploadFileAction(WebAction):
action_type: ActionType = ActionType.UPLOAD_FILE
file_url: str
is_upload_file_tag: bool = True
def __repr__(self) -> str:
return f"UploadFileAction(element_id={self.element_id}, file={self.file_url}, is_upload_file_tag={self.is_upload_file_tag})"
# this is a deprecated action type
class DownloadFileAction(WebAction):
action_type: ActionType = ActionType.DOWNLOAD_FILE
file_name: str
def __repr__(self) -> str:
return f"DownloadFileAction(element_id={self.element_id}, file_name={self.file_name})"
class NullAction(Action):
action_type: ActionType = ActionType.NULL_ACTION
class SolveCaptchaAction(Action):
action_type: ActionType = ActionType.SOLVE_CAPTCHA
class SelectOptionAction(WebAction):
action_type: ActionType = ActionType.SELECT_OPTION
option: SelectOption
def __repr__(self) -> str:
return f"SelectOptionAction(element_id={self.element_id}, option={self.option})"
###
# This action causes more harm than it does good.
# It frequently mis-behaves, or gets stuck in click loops.
# Treating checkbox actions as click actions seem to perform way more reliably
# Developers who tried this and failed: 2 (Suchintan and Shu 😂)
###
class CheckboxAction(WebAction):
action_type: ActionType = ActionType.CHECKBOX
is_checked: bool
def __repr__(self) -> str:
return f"CheckboxAction(element_id={self.element_id}, is_checked={self.is_checked})"
class WaitAction(Action):
action_type: ActionType = ActionType.WAIT
class TerminateAction(DecisiveAction):
action_type: ActionType = ActionType.TERMINATE
class CompleteAction(DecisiveAction):
action_type: ActionType = ActionType.COMPLETE
verified: bool = False
data_extraction_goal: str | None = None
class ExtractAction(Action):
action_type: ActionType = ActionType.EXTRACT
data_extraction_goal: str | None = None
data_extraction_schema: dict[str, Any] | None = None
class ScrapeResult(BaseModel):
"""
Scraped response from a webpage, including:
1. JSON representation of what the user is seeing
"""
scraped_data: dict[str, Any] | list | str | None