infer action type from instruction (#1231)
This commit is contained in:
16
skyvern/forge/prompts/skyvern/infer-action-type.j2
Normal file
16
skyvern/forge/prompts/skyvern/infer-action-type.j2
Normal file
@@ -0,0 +1,16 @@
|
||||
You are a browser agent performing actions on the web. You are instructed to take a single action. Help to identify which action type should be taken according to the action instruction.
|
||||
|
||||
MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments (//), no unnecessary quotes, etc.
|
||||
|
||||
Reply in the following JSON format:
|
||||
{
|
||||
"thought": str, // A string to describe how to infer the action type from the action instruction.
|
||||
"confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
|
||||
"action_type": str, // It's a string enum: "CLICK", "INPUT_TEXT", "UPLOAD_FILE", "SELECT_OPTION". "CLICK" means user wants to click. "INPUT_TEXT" means user wants to input text. "UPLOAD_FILE" means user wants to upload a file. "SELECT_OPTION" means user wants to select an option.
|
||||
"error": str, // It's a string enum to describe error. Null if you can identify the action as one of the defined action type. Use "UNKNOWN_ACTION" if none of the defined action type matched. Use "MULTIPLE_ACTIONS" if the instruction includes multiple actions.
|
||||
}
|
||||
|
||||
Action instruction
|
||||
```
|
||||
{{ navigation_goal }}
|
||||
```
|
||||
@@ -107,3 +107,10 @@ class WorkflowParameterMissingRequiredValue(BaseWorkflowHTTPException):
|
||||
f"Missing required value for workflow parameter. Workflow parameter type: {workflow_parameter_type}. workflow_parameter_key: {workflow_parameter_key}. Required value: {required_value}",
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
)
|
||||
|
||||
|
||||
class FailedToParseActionInstruction(SkyvernException):
|
||||
def __init__(self, reason: str | None, error_type: str | None):
|
||||
super().__init__(
|
||||
f"Failed to parse the action instruction as '{reason}({error_type})'",
|
||||
)
|
||||
|
||||
@@ -6,7 +6,6 @@ from pydantic import BaseModel, Field
|
||||
from skyvern.forge.sdk.schemas.tasks import ProxyLocation
|
||||
from skyvern.forge.sdk.workflow.models.block import BlockType, FileType
|
||||
from skyvern.forge.sdk.workflow.models.parameter import ParameterType, WorkflowParameterType
|
||||
from skyvern.webeye.actions.actions import ActionType
|
||||
|
||||
|
||||
class ParameterYAML(BaseModel, abc.ABC):
|
||||
@@ -219,7 +218,6 @@ class ValidationBlockYAML(BlockYAML):
|
||||
|
||||
|
||||
class ActionBlockYAML(BlockYAML):
|
||||
action_type: ActionType
|
||||
block_type: Literal[BlockType.ACTION] = BlockType.ACTION # type: ignore
|
||||
|
||||
url: str | None = None
|
||||
|
||||
@@ -14,6 +14,7 @@ from skyvern.exceptions import (
|
||||
WorkflowRunNotFound,
|
||||
)
|
||||
from skyvern.forge import app
|
||||
from skyvern.forge.prompts import prompt_engine
|
||||
from skyvern.forge.sdk.artifact.models import ArtifactType
|
||||
from skyvern.forge.sdk.core import skyvern_context
|
||||
from skyvern.forge.sdk.core.security import generate_skyvern_signature
|
||||
@@ -23,6 +24,7 @@ from skyvern.forge.sdk.models import Organization, Step
|
||||
from skyvern.forge.sdk.schemas.tasks import ProxyLocation, Task
|
||||
from skyvern.forge.sdk.workflow.exceptions import (
|
||||
ContextParameterSourceNotDefined,
|
||||
FailedToParseActionInstruction,
|
||||
InvalidWorkflowDefinition,
|
||||
WorkflowDefinitionHasDuplicateParameterKeys,
|
||||
WorkflowDefinitionHasReservedParameterKeys,
|
||||
@@ -1366,18 +1368,35 @@ class WorkflowService:
|
||||
if block_yaml.parameter_keys
|
||||
else []
|
||||
)
|
||||
|
||||
if not block_yaml.navigation_goal:
|
||||
raise Exception("empty action instruction")
|
||||
|
||||
prompt = prompt_engine.load_prompt("infer-action-type", navigation_goal=block_yaml.navigation_goal)
|
||||
# TODO: no step here, so LLM call won't be saved as an artifact
|
||||
json_response = await app.LLM_API_HANDLER(prompt=prompt)
|
||||
if json_response.get("error"):
|
||||
raise FailedToParseActionInstruction(
|
||||
reason=json_response.get("thought"), error_type=json_response.get("error")
|
||||
)
|
||||
|
||||
action_type: str = json_response.get("action_type") or ""
|
||||
action_type = action_type.lower()
|
||||
|
||||
prompt_template = ""
|
||||
if block_yaml.action_type == ActionType.CLICK:
|
||||
if action_type == ActionType.CLICK:
|
||||
prompt_template = TaskPromptTemplate.SingleClickAction
|
||||
elif block_yaml.action_type == ActionType.INPUT_TEXT:
|
||||
elif action_type == ActionType.INPUT_TEXT:
|
||||
prompt_template = TaskPromptTemplate.SingleInputAction
|
||||
elif block_yaml.action_type == ActionType.UPLOAD_FILE:
|
||||
elif action_type == ActionType.UPLOAD_FILE:
|
||||
prompt_template = TaskPromptTemplate.SingleUploadAction
|
||||
elif block_yaml.action_type == ActionType.SELECT_OPTION:
|
||||
elif action_type == ActionType.SELECT_OPTION:
|
||||
prompt_template = TaskPromptTemplate.SingleSelectAction
|
||||
|
||||
if not prompt_template:
|
||||
raise Exception("not supported action type for action block")
|
||||
raise Exception(
|
||||
f"Not supported action for action block. Currently we only support [click, input_text, upload_file, select_option], but got [{action_type}]"
|
||||
)
|
||||
|
||||
return ActionBlock(
|
||||
prompt_template=prompt_template,
|
||||
|
||||
Reference in New Issue
Block a user