From 90f0b25f289bd7506671f76b511d662dd9a3e636 Mon Sep 17 00:00:00 2001 From: LawyZheng Date: Fri, 22 Nov 2024 01:32:46 +0800 Subject: [PATCH] move action type validate in workflow runtime (#1235) --- skyvern/forge/sdk/workflow/models/block.py | 34 ++++++++++++++++++++++ skyvern/forge/sdk/workflow/service.py | 30 ------------------- 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/skyvern/forge/sdk/workflow/models/block.py b/skyvern/forge/sdk/workflow/models/block.py index ed5269ed..fd3a44c0 100644 --- a/skyvern/forge/sdk/workflow/models/block.py +++ b/skyvern/forge/sdk/workflow/models/block.py @@ -45,6 +45,7 @@ from skyvern.forge.sdk.schemas.tasks import Task, TaskOutput, TaskStatus from skyvern.forge.sdk.settings_manager import SettingsManager from skyvern.forge.sdk.workflow.context_manager import WorkflowRunContext from skyvern.forge.sdk.workflow.exceptions import ( + FailedToParseActionInstruction, InvalidEmailClientConfiguration, InvalidFileType, NoValidEmailRecipient, @@ -56,6 +57,7 @@ from skyvern.forge.sdk.workflow.models.parameter import ( OutputParameter, WorkflowParameter, ) +from skyvern.webeye.actions.actions import ActionType from skyvern.webeye.browser_factory import BrowserState LOG = structlog.get_logger() @@ -1299,6 +1301,38 @@ class ActionBlock(BaseTaskBlock): block_type: Literal[BlockType.ACTION] = BlockType.ACTION async def execute(self, workflow_run_id: str, **kwargs: dict) -> BlockResult: + try: + prompt = prompt_engine.load_prompt("infer-action-type", navigation_goal=self.navigation_goal) + # TODO: no step here, so LLM call won't be saved as an artifact + json_response = await app.LLM_API_HANDLER(prompt=prompt) + if json_response.get("error"): + raise FailedToParseActionInstruction( + reason=json_response.get("thought"), error_type=json_response.get("error") + ) + + action_type: str = json_response.get("action_type") or "" + action_type = ActionType[action_type.upper()] + + prompt_template = "" + if action_type == ActionType.CLICK: + prompt_template = TaskPromptTemplate.SingleClickAction + elif action_type == ActionType.INPUT_TEXT: + prompt_template = TaskPromptTemplate.SingleInputAction + elif action_type == ActionType.UPLOAD_FILE: + prompt_template = TaskPromptTemplate.SingleUploadAction + elif action_type == ActionType.SELECT_OPTION: + prompt_template = TaskPromptTemplate.SingleSelectAction + + if not prompt_template: + raise Exception( + f"Not supported action for action block. Currently we only support [click, input_text, upload_file, select_option], but got [{action_type}]" + ) + except Exception as e: + return self.build_block_result( + success=False, failure_reason=str(e), output_parameter_value=None, status=BlockStatus.failed + ) + + self.prompt_template = prompt_template return await super().execute(workflow_run_id=workflow_run_id, kwargs=kwargs) diff --git a/skyvern/forge/sdk/workflow/service.py b/skyvern/forge/sdk/workflow/service.py index bad0f3b4..c6cc90ee 100644 --- a/skyvern/forge/sdk/workflow/service.py +++ b/skyvern/forge/sdk/workflow/service.py @@ -14,7 +14,6 @@ from skyvern.exceptions import ( WorkflowRunNotFound, ) from skyvern.forge import app -from skyvern.forge.prompts import prompt_engine from skyvern.forge.sdk.artifact.models import ArtifactType from skyvern.forge.sdk.core import skyvern_context from skyvern.forge.sdk.core.security import generate_skyvern_signature @@ -24,7 +23,6 @@ from skyvern.forge.sdk.models import Organization, Step from skyvern.forge.sdk.schemas.tasks import ProxyLocation, Task from skyvern.forge.sdk.workflow.exceptions import ( ContextParameterSourceNotDefined, - FailedToParseActionInstruction, InvalidWorkflowDefinition, WorkflowDefinitionHasDuplicateParameterKeys, WorkflowDefinitionHasReservedParameterKeys, @@ -66,7 +64,6 @@ from skyvern.forge.sdk.workflow.models.workflow import ( WorkflowRunStatusResponse, ) from skyvern.forge.sdk.workflow.models.yaml import BLOCK_YAML_TYPES, ForLoopBlockYAML, WorkflowCreateYAMLRequest -from skyvern.webeye.actions.actions import ActionType from skyvern.webeye.browser_factory import BrowserState LOG = structlog.get_logger() @@ -1372,34 +1369,7 @@ class WorkflowService: if not block_yaml.navigation_goal: raise Exception("empty action instruction") - prompt = prompt_engine.load_prompt("infer-action-type", navigation_goal=block_yaml.navigation_goal) - # TODO: no step here, so LLM call won't be saved as an artifact - json_response = await app.LLM_API_HANDLER(prompt=prompt) - if json_response.get("error"): - raise FailedToParseActionInstruction( - reason=json_response.get("thought"), error_type=json_response.get("error") - ) - - action_type: str = json_response.get("action_type") or "" - action_type = ActionType[action_type.upper()] - - prompt_template = "" - if action_type == ActionType.CLICK: - prompt_template = TaskPromptTemplate.SingleClickAction - elif action_type == ActionType.INPUT_TEXT: - prompt_template = TaskPromptTemplate.SingleInputAction - elif action_type == ActionType.UPLOAD_FILE: - prompt_template = TaskPromptTemplate.SingleUploadAction - elif action_type == ActionType.SELECT_OPTION: - prompt_template = TaskPromptTemplate.SingleSelectAction - - if not prompt_template: - raise Exception( - f"Not supported action for action block. Currently we only support [click, input_text, upload_file, select_option], but got [{action_type}]" - ) - return ActionBlock( - prompt_template=prompt_template, label=block_yaml.label, url=block_yaml.url, title=block_yaml.title,