diff --git a/skyvern/exceptions.py b/skyvern/exceptions.py index f3f80ce7..f3db518d 100644 --- a/skyvern/exceptions.py +++ b/skyvern/exceptions.py @@ -266,12 +266,7 @@ class UnknownElementTreeFormat(SkyvernException): class StepTerminationError(SkyvernException): def __init__(self, step_id: str, reason: str) -> None: - super().__init__(f"Step {step_id} cannot be executed and task is failed. Reason: {reason}") - - -class StepUnableToExecuteError(SkyvernException): - def __init__(self, step_id: str, reason: str) -> None: - super().__init__(f"Step {step_id} cannot be executed and task execution is stopped. Reason: {reason}") + super().__init__(f"Step {step_id} cannot be executed and task is terminated. Reason: {reason}") class UnsupportedActionType(SkyvernException): diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index be728e1e..c7aac774 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -18,7 +18,6 @@ from skyvern.exceptions import ( InvalidWorkflowTaskURLState, MissingBrowserStatePage, StepTerminationError, - StepUnableToExecuteError, TaskNotFound, ) from skyvern.forge import app @@ -37,7 +36,6 @@ from skyvern.forge.sdk.workflow.models.workflow import Workflow, WorkflowRun from skyvern.webeye.actions.actions import ( Action, ActionType, - ActionTypeUnion, CompleteAction, UserDefinedError, WebAction, @@ -53,7 +51,7 @@ LOG = structlog.get_logger() class ActionLinkedNode: - def __init__(self, action: ActionTypeUnion) -> None: + def __init__(self, action: Action) -> None: self.action = action self.next: ActionLinkedNode | None = None @@ -330,16 +328,9 @@ class ForgeAgent: return step, detailed_output, next_step # TODO (kerem): Let's add other exceptions that we know about here as custom exceptions as well - except StepUnableToExecuteError: - LOG.error( - "Step cannot be executed. Task execution stopped", - task_id=task.task_id, - step_id=step.step_id, - ) - raise except StepTerminationError as e: LOG.error( - "Step cannot be executed. Task failed.", + "Step cannot be executed. Task terminated", task_id=task.task_id, step_id=step.step_id, ) @@ -841,7 +832,7 @@ class ForgeAgent: # Get action results from the last app.SETTINGS.PROMPT_ACTION_HISTORY_WINDOW steps steps = await app.DATABASE.get_task_steps(task_id=task.task_id, organization_id=task.organization_id) window_steps = steps[-1 * SettingsManager.get_settings().PROMPT_ACTION_HISTORY_WINDOW :] - actions_and_results: list[tuple[ActionTypeUnion, list[ActionResult]]] = [] + actions_and_results: list[tuple[Action, list[ActionResult]]] = [] for window_step in window_steps: if window_step.output and window_step.output.actions_and_results: actions_and_results.extend(window_step.output.actions_and_results) diff --git a/skyvern/forge/agent_functions.py b/skyvern/forge/agent_functions.py index 9e704ffa..cc51a561 100644 --- a/skyvern/forge/agent_functions.py +++ b/skyvern/forge/agent_functions.py @@ -1,6 +1,6 @@ from playwright.async_api import Page -from skyvern.exceptions import StepUnableToExecuteError +from skyvern.exceptions import StepTerminationError from skyvern.forge import app from skyvern.forge.async_operations import AsyncOperation from skyvern.forge.sdk.models import Organization, Step, StepStatus @@ -34,7 +34,7 @@ class AgentFunction: can_execute = has_valid_task_status and has_valid_step_status and has_no_running_steps if not can_execute: - raise StepUnableToExecuteError(step_id=step.step_id, reason=f"Cannot execute step. Reasons: {reasons}") + raise StepTerminationError(step_id=step.step_id, reason="Cannot execute step. Reasons: {reasons}") def generate_async_operations( self, diff --git a/skyvern/forge/sdk/db/client.py b/skyvern/forge/sdk/db/client.py index 52632fa1..ad42ca61 100644 --- a/skyvern/forge/sdk/db/client.py +++ b/skyvern/forge/sdk/db/client.py @@ -309,7 +309,7 @@ class AgentDB: if status is not None: step.status = status if output is not None: - step.output = output.model_dump() + step.output = output.model_dump(exclude_none=True) if is_last is not None: step.is_last = is_last if retry_index is not None: diff --git a/skyvern/forge/sdk/routes/agent_protocol.py b/skyvern/forge/sdk/routes/agent_protocol.py index a72485d6..1bb97f55 100644 --- a/skyvern/forge/sdk/routes/agent_protocol.py +++ b/skyvern/forge/sdk/routes/agent_protocol.py @@ -195,7 +195,7 @@ async def execute_agent_task_step( ) step, _, _ = await app.agent.execute_step(current_org, task, step) return Response( - content=step.model_dump_json() if step else "", + content=step.model_dump_json(exclude_none=True) if step else "", status_code=200, media_type="application/json", ) @@ -402,7 +402,7 @@ async def get_agent_task_steps( """ analytics.capture("skyvern-oss-agent-task-steps-get") steps = await app.DATABASE.get_task_steps(task_id, organization_id=current_org.organization_id) - return ORJSONResponse([step.model_dump() for step in steps]) + return ORJSONResponse([step.model_dump(exclude_none=True) for step in steps]) @base_router.get( diff --git a/skyvern/webeye/actions/actions.py b/skyvern/webeye/actions/actions.py index 9b534090..fce6f485 100644 --- a/skyvern/webeye/actions/actions.py +++ b/skyvern/webeye/actions/actions.py @@ -1,6 +1,5 @@ -import abc from enum import StrEnum -from typing import Any, Dict, List +from typing import Any, Dict import structlog from deprecation import deprecated @@ -27,17 +26,6 @@ class ActionType(StrEnum): SOLVE_CAPTCHA = "solve_captcha" TERMINATE = "terminate" COMPLETE = "complete" - # Note: Remember to update ActionTypeUnion with new actions - - -class Action(BaseModel): - action_type: ActionType - description: str | None = None - reasoning: str | None = None - - -class WebAction(Action, abc.ABC): - element_id: str class UserDefinedError(BaseModel): @@ -46,8 +34,41 @@ class UserDefinedError(BaseModel): confidence_float: float = Field(..., ge=0, le=1) -class DecisiveAction(Action, abc.ABC): - errors: List[UserDefinedError] = [] +class SelectOption(BaseModel): + label: str | None + value: str | None + index: int | None + + def __repr__(self) -> str: + return f"SelectOption(label={self.label}, value={self.value}, index={self.index})" + + +class Action(BaseModel): + action_type: ActionType + description: str | None = None + reasoning: str | None = None + element_id: str | None = None + + # DecisiveAction (CompleteAction, TerminateAction) fields + errors: list[UserDefinedError] | None = None + data_extraction_goal: str | None = None + + # WebAction fields + file_name: str | None = None + file_url: str | None = None + download: bool | None = None + is_upload_file_tag: bool | None = None + text: str | None = None + option: SelectOption | None = None + is_checked: bool | None = None + + +class WebAction(Action): + element_id: str + + +class DecisiveAction(Action): + errors: list[UserDefinedError] = [] class ClickAction(WebAction): @@ -55,6 +76,9 @@ class ClickAction(WebAction): file_url: str | None = None download: bool = False + def __repr__(self) -> str: + return f"ClickAction(element_id={self.element_id}, file_url={self.file_url}, download={self.download})" + class InputTextAction(WebAction): action_type: ActionType = ActionType.INPUT_TEXT @@ -90,15 +114,6 @@ class SolveCaptchaAction(Action): action_type: ActionType = ActionType.SOLVE_CAPTCHA -class SelectOption(BaseModel): - label: str | None - value: str | None - index: int | None - - def __repr__(self) -> str: - return f"SelectOption(label={self.label}, value={self.value}, index={self.index})" - - class SelectOptionAction(WebAction): action_type: ActionType = ActionType.SELECT_OPTION option: SelectOption @@ -221,8 +236,8 @@ def parse_action(action: Dict[str, Any], data_extraction_goal: str | None = None raise UnsupportedActionType(action_type=action_type) -def parse_actions(task: Task, json_response: List[Dict[str, Any]]) -> List[Action]: - actions: List[Action] = [] +def parse_actions(task: Task, json_response: list[Dict[str, Any]]) -> list[Action]: + actions: list[Action] = [] for action in json_response: try: action_instance = parse_action(action=action, data_extraction_goal=task.data_extraction_goal) @@ -257,7 +272,6 @@ def parse_actions(task: Task, json_response: List[Dict[str, Any]]) -> List[Actio raw_action=action, exc_info=True, ) - return actions @@ -268,20 +282,3 @@ class ScrapeResult(BaseModel): """ scraped_data: dict[str, Any] | list[dict[str, Any]] - - -# https://blog.devgenius.io/deserialize-child-classes-with-pydantic-that-gonna-work-784230e1cf83 -ActionTypeUnion = ( - ClickAction - | InputTextAction - | UploadFileAction - # Deprecated - # | DownloadFileAction - | SelectOptionAction - | CheckboxAction - | WaitAction - | NullAction - | SolveCaptchaAction - | TerminateAction - | CompleteAction -) diff --git a/skyvern/webeye/actions/models.py b/skyvern/webeye/actions/models.py index b480d7ea..59a92344 100644 --- a/skyvern/webeye/actions/models.py +++ b/skyvern/webeye/actions/models.py @@ -5,7 +5,7 @@ from typing import Any from pydantic import BaseModel from skyvern.forge.sdk.settings_manager import SettingsManager -from skyvern.webeye.actions.actions import Action, ActionTypeUnion, DecisiveAction, UserDefinedError +from skyvern.webeye.actions.actions import Action, DecisiveAction, UserDefinedError from skyvern.webeye.actions.responses import ActionResult from skyvern.webeye.scraper.scraper import ScrapedPage @@ -18,7 +18,7 @@ class AgentStepOutput(BaseModel): # Will be deprecated once we move to the new format below action_results: list[ActionResult] | None = None # Nullable for backwards compatibility, once backfill is done, this won't be nullable anymore - actions_and_results: list[tuple[ActionTypeUnion, list[ActionResult]]] | None = None + actions_and_results: list[tuple[Action, list[ActionResult]]] | None = None errors: list[UserDefinedError] = [] def __repr__(self) -> str: @@ -38,7 +38,7 @@ class DetailedAgentStepOutput(BaseModel): llm_response: dict[str, Any] | None actions: list[Action] | None action_results: list[ActionResult] | None - actions_and_results: list[tuple[ActionTypeUnion, list[ActionResult]]] | None + actions_and_results: list[tuple[Action, list[ActionResult]]] | None class Config: exclude = ["scraped_page", "extract_action_prompt"]