skip invalid actions during parsing (#381)

This commit is contained in:
LawyZheng
2024-05-30 09:23:58 +08:00
committed by GitHub
parent 0b86f9fb38
commit 6445fb93b0
2 changed files with 123 additions and 95 deletions

View File

@@ -267,3 +267,8 @@ class UnknownElementTreeFormat(SkyvernException):
class StepTerminationError(SkyvernException): class StepTerminationError(SkyvernException):
def __init__(self, step_id: str, reason: str) -> None: def __init__(self, step_id: str, reason: str) -> None:
super().__init__(f"Step {step_id} cannot be executed and task is terminated. Reason: {reason}") super().__init__(f"Step {step_id} cannot be executed and task is terminated. Reason: {reason}")
class UnsupportedActionType(SkyvernException):
def __init__(self, action_type: str):
super().__init__(f"Unsupport action type: {action_type}")

View File

@@ -4,8 +4,9 @@ from typing import Any, Dict, List
import structlog import structlog
from deprecation import deprecated from deprecation import deprecated
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, ValidationError
from skyvern.exceptions import UnsupportedActionType
from skyvern.forge.sdk.schemas.tasks import Task from skyvern.forge.sdk.schemas.tasks import Task
LOG = structlog.get_logger() LOG = structlog.get_logger()
@@ -133,9 +134,7 @@ class CompleteAction(DecisiveAction):
data_extraction_goal: str | None = None data_extraction_goal: str | None = None
def parse_actions(task: Task, json_response: List[Dict[str, Any]]) -> List[Action]: def parse_action(action: Dict[str, Any], data_extraction_goal: str | None = None) -> Action:
actions = []
for action in json_response:
if "id" in action: if "id" in action:
element_id = action["id"] element_id = action["id"]
elif "element_id" in action: elif "element_id" in action:
@@ -144,59 +143,49 @@ def parse_actions(task: Task, json_response: List[Dict[str, Any]]) -> List[Actio
element_id = None element_id = None
reasoning = action["reasoning"] if "reasoning" in action else None reasoning = action["reasoning"] if "reasoning" in action else None
if "action_type" not in action or action["action_type"] is None: if "action_type" not in action or action["action_type"] is None:
actions.append(NullAction(reasoning=reasoning)) return NullAction(reasoning=reasoning)
continue
# `.upper()` handles the case where the LLM returns a lowercase action type (e.g. "click" instead of "CLICK") # `.upper()` handles the case where the LLM returns a lowercase action type (e.g. "click" instead of "CLICK")
action_type = ActionType[action["action_type"].upper()] action_type = ActionType[action["action_type"].upper()]
if action_type == ActionType.TERMINATE: if action_type == ActionType.TERMINATE:
LOG.warning( return TerminateAction(
"Agent decided to terminate",
task_id=task.task_id,
llm_response=json_response,
reasoning=reasoning,
actions=actions,
)
actions.append(
TerminateAction(
reasoning=reasoning, reasoning=reasoning,
errors=action["errors"] if "errors" in action else [], errors=action["errors"] if "errors" in action else [],
) )
)
elif action_type == ActionType.CLICK: if action_type == ActionType.CLICK:
file_url = action["file_url"] if "file_url" in action else None file_url = action["file_url"] if "file_url" in action else None
actions.append( return ClickAction(
ClickAction(
element_id=element_id, element_id=element_id,
reasoning=reasoning, reasoning=reasoning,
file_url=file_url, file_url=file_url,
download=action.get("download", False), download=action.get("download", False),
) )
)
elif action_type == ActionType.INPUT_TEXT:
actions.append(InputTextAction(element_id=element_id, text=action["text"], reasoning=reasoning))
elif action_type == ActionType.UPLOAD_FILE:
# TODO: see if the element is a file input element. if it's not, convert this action into a click action
actions.append( if action_type == ActionType.INPUT_TEXT:
UploadFileAction( return InputTextAction(element_id=element_id, text=action["text"], reasoning=reasoning)
if action_type == ActionType.UPLOAD_FILE:
# TODO: see if the element is a file input element. if it's not, convert this action into a click action
return UploadFileAction(
element_id=element_id, element_id=element_id,
file_url=action["file_url"], file_url=action["file_url"],
reasoning=reasoning, reasoning=reasoning,
) )
)
# This action is not used in the current implementation. Click actions are used instead. # This action is not used in the current implementation. Click actions are used instead.
elif action_type == ActionType.DOWNLOAD_FILE: if action_type == ActionType.DOWNLOAD_FILE:
actions.append( return DownloadFileAction(
DownloadFileAction(
element_id=element_id, element_id=element_id,
file_name=action["file_name"], file_name=action["file_name"],
reasoning=reasoning, reasoning=reasoning,
) )
)
elif action_type == ActionType.SELECT_OPTION: if action_type == ActionType.SELECT_OPTION:
actions.append( return SelectOptionAction(
SelectOptionAction(
element_id=element_id, element_id=element_id,
option=SelectOption( option=SelectOption(
label=action["option"]["label"], label=action["option"]["label"],
@@ -205,36 +194,70 @@ def parse_actions(task: Task, json_response: List[Dict[str, Any]]) -> List[Actio
), ),
reasoning=reasoning, reasoning=reasoning,
) )
)
elif action_type == ActionType.CHECKBOX: if action_type == ActionType.CHECKBOX:
actions.append( return CheckboxAction(
CheckboxAction(
element_id=element_id, element_id=element_id,
is_checked=action["is_checked"], is_checked=action["is_checked"],
reasoning=reasoning, reasoning=reasoning,
) )
)
elif action_type == ActionType.WAIT: if action_type == ActionType.WAIT:
actions.append(WaitAction(reasoning=reasoning)) return WaitAction(reasoning=reasoning)
elif action_type == ActionType.COMPLETE:
actions.append( if action_type == ActionType.COMPLETE:
CompleteAction( return CompleteAction(
reasoning=reasoning, reasoning=reasoning,
data_extraction_goal=task.data_extraction_goal, data_extraction_goal=data_extraction_goal,
errors=action["errors"] if "errors" in action else [], errors=action["errors"] if "errors" in action else [],
) )
if action_type == "null":
return NullAction(reasoning=reasoning)
if action_type == ActionType.SOLVE_CAPTCHA:
return SolveCaptchaAction(reasoning=reasoning)
raise UnsupportedActionType(action_type=action_type)
def parse_actions(task: Task, json_response: List[Dict[str, Any]]) -> List[Action]:
actions: List[Action] = []
for action in json_response:
try:
action_instance = parse_action(action=action, data_extraction_goal=task.data_extraction_goal)
if isinstance(action_instance, TerminateAction):
LOG.warning(
"Agent decided to terminate",
task_id=task.task_id,
llm_response=json_response,
reasoning=action_instance.reasoning,
actions=actions,
) )
elif action_type == "null": actions.append(action_instance)
actions.append(NullAction(reasoning=reasoning))
elif action_type == ActionType.SOLVE_CAPTCHA: except UnsupportedActionType:
actions.append(SolveCaptchaAction(reasoning=reasoning))
else:
LOG.error( LOG.error(
"Unsupported action type when parsing actions", "Unsupported action type when parsing actions",
task_id=task.task_id, task_id=task.task_id,
action_type=action_type,
raw_action=action, raw_action=action,
exc_info=True,
) )
except ValidationError:
LOG.error(
"Invalid action",
task_id=task.task_id,
raw_action=action,
exc_info=True,
)
except Exception:
LOG.error(
"Failed to marshal action",
task_id=task.task_id,
raw_action=action,
exc_info=True,
)
return actions return actions