fix input text action with no text in GET steps API (#421)

This commit is contained in:
Kerem Yilmaz
2024-06-05 13:18:35 -07:00
committed by GitHub
parent cf3fb71012
commit 3f3fbbc63d
7 changed files with 53 additions and 70 deletions

View File

@@ -266,12 +266,7 @@ class UnknownElementTreeFormat(SkyvernException):
class StepTerminationError(SkyvernException): class StepTerminationError(SkyvernException):
def __init__(self, step_id: str, reason: str) -> None: def __init__(self, step_id: str, reason: str) -> None:
super().__init__(f"Step {step_id} cannot be executed and task is failed. Reason: {reason}") super().__init__(f"Step {step_id} cannot be executed and task is terminated. Reason: {reason}")
class StepUnableToExecuteError(SkyvernException):
def __init__(self, step_id: str, reason: str) -> None:
super().__init__(f"Step {step_id} cannot be executed and task execution is stopped. Reason: {reason}")
class UnsupportedActionType(SkyvernException): class UnsupportedActionType(SkyvernException):

View File

@@ -18,7 +18,6 @@ from skyvern.exceptions import (
InvalidWorkflowTaskURLState, InvalidWorkflowTaskURLState,
MissingBrowserStatePage, MissingBrowserStatePage,
StepTerminationError, StepTerminationError,
StepUnableToExecuteError,
TaskNotFound, TaskNotFound,
) )
from skyvern.forge import app from skyvern.forge import app
@@ -37,7 +36,6 @@ from skyvern.forge.sdk.workflow.models.workflow import Workflow, WorkflowRun
from skyvern.webeye.actions.actions import ( from skyvern.webeye.actions.actions import (
Action, Action,
ActionType, ActionType,
ActionTypeUnion,
CompleteAction, CompleteAction,
UserDefinedError, UserDefinedError,
WebAction, WebAction,
@@ -53,7 +51,7 @@ LOG = structlog.get_logger()
class ActionLinkedNode: class ActionLinkedNode:
def __init__(self, action: ActionTypeUnion) -> None: def __init__(self, action: Action) -> None:
self.action = action self.action = action
self.next: ActionLinkedNode | None = None self.next: ActionLinkedNode | None = None
@@ -330,16 +328,9 @@ class ForgeAgent:
return step, detailed_output, next_step return step, detailed_output, next_step
# TODO (kerem): Let's add other exceptions that we know about here as custom exceptions as well # TODO (kerem): Let's add other exceptions that we know about here as custom exceptions as well
except StepUnableToExecuteError:
LOG.error(
"Step cannot be executed. Task execution stopped",
task_id=task.task_id,
step_id=step.step_id,
)
raise
except StepTerminationError as e: except StepTerminationError as e:
LOG.error( LOG.error(
"Step cannot be executed. Task failed.", "Step cannot be executed. Task terminated",
task_id=task.task_id, task_id=task.task_id,
step_id=step.step_id, step_id=step.step_id,
) )
@@ -841,7 +832,7 @@ class ForgeAgent:
# Get action results from the last app.SETTINGS.PROMPT_ACTION_HISTORY_WINDOW steps # Get action results from the last app.SETTINGS.PROMPT_ACTION_HISTORY_WINDOW steps
steps = await app.DATABASE.get_task_steps(task_id=task.task_id, organization_id=task.organization_id) steps = await app.DATABASE.get_task_steps(task_id=task.task_id, organization_id=task.organization_id)
window_steps = steps[-1 * SettingsManager.get_settings().PROMPT_ACTION_HISTORY_WINDOW :] window_steps = steps[-1 * SettingsManager.get_settings().PROMPT_ACTION_HISTORY_WINDOW :]
actions_and_results: list[tuple[ActionTypeUnion, list[ActionResult]]] = [] actions_and_results: list[tuple[Action, list[ActionResult]]] = []
for window_step in window_steps: for window_step in window_steps:
if window_step.output and window_step.output.actions_and_results: if window_step.output and window_step.output.actions_and_results:
actions_and_results.extend(window_step.output.actions_and_results) actions_and_results.extend(window_step.output.actions_and_results)

View File

@@ -1,6 +1,6 @@
from playwright.async_api import Page from playwright.async_api import Page
from skyvern.exceptions import StepUnableToExecuteError from skyvern.exceptions import StepTerminationError
from skyvern.forge import app from skyvern.forge import app
from skyvern.forge.async_operations import AsyncOperation from skyvern.forge.async_operations import AsyncOperation
from skyvern.forge.sdk.models import Organization, Step, StepStatus from skyvern.forge.sdk.models import Organization, Step, StepStatus
@@ -34,7 +34,7 @@ class AgentFunction:
can_execute = has_valid_task_status and has_valid_step_status and has_no_running_steps can_execute = has_valid_task_status and has_valid_step_status and has_no_running_steps
if not can_execute: if not can_execute:
raise StepUnableToExecuteError(step_id=step.step_id, reason=f"Cannot execute step. Reasons: {reasons}") raise StepTerminationError(step_id=step.step_id, reason="Cannot execute step. Reasons: {reasons}")
def generate_async_operations( def generate_async_operations(
self, self,

View File

@@ -309,7 +309,7 @@ class AgentDB:
if status is not None: if status is not None:
step.status = status step.status = status
if output is not None: if output is not None:
step.output = output.model_dump() step.output = output.model_dump(exclude_none=True)
if is_last is not None: if is_last is not None:
step.is_last = is_last step.is_last = is_last
if retry_index is not None: if retry_index is not None:

View File

@@ -195,7 +195,7 @@ async def execute_agent_task_step(
) )
step, _, _ = await app.agent.execute_step(current_org, task, step) step, _, _ = await app.agent.execute_step(current_org, task, step)
return Response( return Response(
content=step.model_dump_json() if step else "", content=step.model_dump_json(exclude_none=True) if step else "",
status_code=200, status_code=200,
media_type="application/json", media_type="application/json",
) )
@@ -402,7 +402,7 @@ async def get_agent_task_steps(
""" """
analytics.capture("skyvern-oss-agent-task-steps-get") analytics.capture("skyvern-oss-agent-task-steps-get")
steps = await app.DATABASE.get_task_steps(task_id, organization_id=current_org.organization_id) steps = await app.DATABASE.get_task_steps(task_id, organization_id=current_org.organization_id)
return ORJSONResponse([step.model_dump() for step in steps]) return ORJSONResponse([step.model_dump(exclude_none=True) for step in steps])
@base_router.get( @base_router.get(

View File

@@ -1,6 +1,5 @@
import abc
from enum import StrEnum from enum import StrEnum
from typing import Any, Dict, List from typing import Any, Dict
import structlog import structlog
from deprecation import deprecated from deprecation import deprecated
@@ -27,17 +26,6 @@ class ActionType(StrEnum):
SOLVE_CAPTCHA = "solve_captcha" SOLVE_CAPTCHA = "solve_captcha"
TERMINATE = "terminate" TERMINATE = "terminate"
COMPLETE = "complete" COMPLETE = "complete"
# Note: Remember to update ActionTypeUnion with new actions
class Action(BaseModel):
action_type: ActionType
description: str | None = None
reasoning: str | None = None
class WebAction(Action, abc.ABC):
element_id: str
class UserDefinedError(BaseModel): class UserDefinedError(BaseModel):
@@ -46,8 +34,41 @@ class UserDefinedError(BaseModel):
confidence_float: float = Field(..., ge=0, le=1) confidence_float: float = Field(..., ge=0, le=1)
class DecisiveAction(Action, abc.ABC): class SelectOption(BaseModel):
errors: List[UserDefinedError] = [] label: str | None
value: str | None
index: int | None
def __repr__(self) -> str:
return f"SelectOption(label={self.label}, value={self.value}, index={self.index})"
class Action(BaseModel):
action_type: ActionType
description: str | None = None
reasoning: str | None = None
element_id: str | None = None
# DecisiveAction (CompleteAction, TerminateAction) fields
errors: list[UserDefinedError] | None = None
data_extraction_goal: str | None = None
# WebAction fields
file_name: str | None = None
file_url: str | None = None
download: bool | None = None
is_upload_file_tag: bool | None = None
text: str | None = None
option: SelectOption | None = None
is_checked: bool | None = None
class WebAction(Action):
element_id: str
class DecisiveAction(Action):
errors: list[UserDefinedError] = []
class ClickAction(WebAction): class ClickAction(WebAction):
@@ -55,6 +76,9 @@ class ClickAction(WebAction):
file_url: str | None = None file_url: str | None = None
download: bool = False download: bool = False
def __repr__(self) -> str:
return f"ClickAction(element_id={self.element_id}, file_url={self.file_url}, download={self.download})"
class InputTextAction(WebAction): class InputTextAction(WebAction):
action_type: ActionType = ActionType.INPUT_TEXT action_type: ActionType = ActionType.INPUT_TEXT
@@ -90,15 +114,6 @@ class SolveCaptchaAction(Action):
action_type: ActionType = ActionType.SOLVE_CAPTCHA action_type: ActionType = ActionType.SOLVE_CAPTCHA
class SelectOption(BaseModel):
label: str | None
value: str | None
index: int | None
def __repr__(self) -> str:
return f"SelectOption(label={self.label}, value={self.value}, index={self.index})"
class SelectOptionAction(WebAction): class SelectOptionAction(WebAction):
action_type: ActionType = ActionType.SELECT_OPTION action_type: ActionType = ActionType.SELECT_OPTION
option: SelectOption option: SelectOption
@@ -221,8 +236,8 @@ def parse_action(action: Dict[str, Any], data_extraction_goal: str | None = None
raise UnsupportedActionType(action_type=action_type) raise UnsupportedActionType(action_type=action_type)
def parse_actions(task: Task, json_response: List[Dict[str, Any]]) -> List[Action]: def parse_actions(task: Task, json_response: list[Dict[str, Any]]) -> list[Action]:
actions: List[Action] = [] actions: list[Action] = []
for action in json_response: for action in json_response:
try: try:
action_instance = parse_action(action=action, data_extraction_goal=task.data_extraction_goal) action_instance = parse_action(action=action, data_extraction_goal=task.data_extraction_goal)
@@ -257,7 +272,6 @@ def parse_actions(task: Task, json_response: List[Dict[str, Any]]) -> List[Actio
raw_action=action, raw_action=action,
exc_info=True, exc_info=True,
) )
return actions return actions
@@ -268,20 +282,3 @@ class ScrapeResult(BaseModel):
""" """
scraped_data: dict[str, Any] | list[dict[str, Any]] scraped_data: dict[str, Any] | list[dict[str, Any]]
# https://blog.devgenius.io/deserialize-child-classes-with-pydantic-that-gonna-work-784230e1cf83
ActionTypeUnion = (
ClickAction
| InputTextAction
| UploadFileAction
# Deprecated
# | DownloadFileAction
| SelectOptionAction
| CheckboxAction
| WaitAction
| NullAction
| SolveCaptchaAction
| TerminateAction
| CompleteAction
)

View File

@@ -5,7 +5,7 @@ from typing import Any
from pydantic import BaseModel from pydantic import BaseModel
from skyvern.forge.sdk.settings_manager import SettingsManager from skyvern.forge.sdk.settings_manager import SettingsManager
from skyvern.webeye.actions.actions import Action, ActionTypeUnion, DecisiveAction, UserDefinedError from skyvern.webeye.actions.actions import Action, DecisiveAction, UserDefinedError
from skyvern.webeye.actions.responses import ActionResult from skyvern.webeye.actions.responses import ActionResult
from skyvern.webeye.scraper.scraper import ScrapedPage from skyvern.webeye.scraper.scraper import ScrapedPage
@@ -18,7 +18,7 @@ class AgentStepOutput(BaseModel):
# Will be deprecated once we move to the new format below # Will be deprecated once we move to the new format below
action_results: list[ActionResult] | None = None action_results: list[ActionResult] | None = None
# Nullable for backwards compatibility, once backfill is done, this won't be nullable anymore # Nullable for backwards compatibility, once backfill is done, this won't be nullable anymore
actions_and_results: list[tuple[ActionTypeUnion, list[ActionResult]]] | None = None actions_and_results: list[tuple[Action, list[ActionResult]]] | None = None
errors: list[UserDefinedError] = [] errors: list[UserDefinedError] = []
def __repr__(self) -> str: def __repr__(self) -> str:
@@ -38,7 +38,7 @@ class DetailedAgentStepOutput(BaseModel):
llm_response: dict[str, Any] | None llm_response: dict[str, Any] | None
actions: list[Action] | None actions: list[Action] | None
action_results: list[ActionResult] | None action_results: list[ActionResult] | None
actions_and_results: list[tuple[ActionTypeUnion, list[ActionResult]]] | None actions_and_results: list[tuple[Action, list[ActionResult]]] | None
class Config: class Config:
exclude = ["scraped_page", "extract_action_prompt"] exclude = ["scraped_page", "extract_action_prompt"]