infer action type from instruction (#1231)

This commit is contained in:
LawyZheng
2024-11-21 17:38:42 +08:00
committed by GitHub
parent 9cd1f15763
commit bb6d3e6a37
4 changed files with 47 additions and 7 deletions

View File

@@ -107,3 +107,10 @@ class WorkflowParameterMissingRequiredValue(BaseWorkflowHTTPException):
f"Missing required value for workflow parameter. Workflow parameter type: {workflow_parameter_type}. workflow_parameter_key: {workflow_parameter_key}. Required value: {required_value}",
status_code=status.HTTP_400_BAD_REQUEST,
)
class FailedToParseActionInstruction(SkyvernException):
def __init__(self, reason: str | None, error_type: str | None):
super().__init__(
f"Failed to parse the action instruction as '{reason}({error_type})'",
)

View File

@@ -6,7 +6,6 @@ from pydantic import BaseModel, Field
from skyvern.forge.sdk.schemas.tasks import ProxyLocation
from skyvern.forge.sdk.workflow.models.block import BlockType, FileType
from skyvern.forge.sdk.workflow.models.parameter import ParameterType, WorkflowParameterType
from skyvern.webeye.actions.actions import ActionType
class ParameterYAML(BaseModel, abc.ABC):
@@ -219,7 +218,6 @@ class ValidationBlockYAML(BlockYAML):
class ActionBlockYAML(BlockYAML):
action_type: ActionType
block_type: Literal[BlockType.ACTION] = BlockType.ACTION # type: ignore
url: str | None = None

View File

@@ -14,6 +14,7 @@ from skyvern.exceptions import (
WorkflowRunNotFound,
)
from skyvern.forge import app
from skyvern.forge.prompts import prompt_engine
from skyvern.forge.sdk.artifact.models import ArtifactType
from skyvern.forge.sdk.core import skyvern_context
from skyvern.forge.sdk.core.security import generate_skyvern_signature
@@ -23,6 +24,7 @@ from skyvern.forge.sdk.models import Organization, Step
from skyvern.forge.sdk.schemas.tasks import ProxyLocation, Task
from skyvern.forge.sdk.workflow.exceptions import (
ContextParameterSourceNotDefined,
FailedToParseActionInstruction,
InvalidWorkflowDefinition,
WorkflowDefinitionHasDuplicateParameterKeys,
WorkflowDefinitionHasReservedParameterKeys,
@@ -1366,18 +1368,35 @@ class WorkflowService:
if block_yaml.parameter_keys
else []
)
if not block_yaml.navigation_goal:
raise Exception("empty action instruction")
prompt = prompt_engine.load_prompt("infer-action-type", navigation_goal=block_yaml.navigation_goal)
# TODO: no step here, so LLM call won't be saved as an artifact
json_response = await app.LLM_API_HANDLER(prompt=prompt)
if json_response.get("error"):
raise FailedToParseActionInstruction(
reason=json_response.get("thought"), error_type=json_response.get("error")
)
action_type: str = json_response.get("action_type") or ""
action_type = action_type.lower()
prompt_template = ""
if block_yaml.action_type == ActionType.CLICK:
if action_type == ActionType.CLICK:
prompt_template = TaskPromptTemplate.SingleClickAction
elif block_yaml.action_type == ActionType.INPUT_TEXT:
elif action_type == ActionType.INPUT_TEXT:
prompt_template = TaskPromptTemplate.SingleInputAction
elif block_yaml.action_type == ActionType.UPLOAD_FILE:
elif action_type == ActionType.UPLOAD_FILE:
prompt_template = TaskPromptTemplate.SingleUploadAction
elif block_yaml.action_type == ActionType.SELECT_OPTION:
elif action_type == ActionType.SELECT_OPTION:
prompt_template = TaskPromptTemplate.SingleSelectAction
if not prompt_template:
raise Exception("not supported action type for action block")
raise Exception(
f"Not supported action for action block. Currently we only support [click, input_text, upload_file, select_option], but got [{action_type}]"
)
return ActionBlock(
prompt_template=prompt_template,