infer action type from instruction (#1231)
This commit is contained in:
16
skyvern/forge/prompts/skyvern/infer-action-type.j2
Normal file
16
skyvern/forge/prompts/skyvern/infer-action-type.j2
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
You are a browser agent performing actions on the web. You are instructed to take a single action. Help to identify which action type should be taken according to the action instruction.
|
||||||
|
|
||||||
|
MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments (//), no unnecessary quotes, etc.
|
||||||
|
|
||||||
|
Reply in the following JSON format:
|
||||||
|
{
|
||||||
|
"thought": str, // A string to describe how to infer the action type from the action instruction.
|
||||||
|
"confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
|
||||||
|
"action_type": str, // It's a string enum: "CLICK", "INPUT_TEXT", "UPLOAD_FILE", "SELECT_OPTION". "CLICK" means user wants to click. "INPUT_TEXT" means user wants to input text. "UPLOAD_FILE" means user wants to upload a file. "SELECT_OPTION" means user wants to select an option.
|
||||||
|
"error": str, // It's a string enum to describe error. Null if you can identify the action as one of the defined action type. Use "UNKNOWN_ACTION" if none of the defined action type matched. Use "MULTIPLE_ACTIONS" if the instruction includes multiple actions.
|
||||||
|
}
|
||||||
|
|
||||||
|
Action instruction
|
||||||
|
```
|
||||||
|
{{ navigation_goal }}
|
||||||
|
```
|
||||||
@@ -107,3 +107,10 @@ class WorkflowParameterMissingRequiredValue(BaseWorkflowHTTPException):
|
|||||||
f"Missing required value for workflow parameter. Workflow parameter type: {workflow_parameter_type}. workflow_parameter_key: {workflow_parameter_key}. Required value: {required_value}",
|
f"Missing required value for workflow parameter. Workflow parameter type: {workflow_parameter_type}. workflow_parameter_key: {workflow_parameter_key}. Required value: {required_value}",
|
||||||
status_code=status.HTTP_400_BAD_REQUEST,
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class FailedToParseActionInstruction(SkyvernException):
|
||||||
|
def __init__(self, reason: str | None, error_type: str | None):
|
||||||
|
super().__init__(
|
||||||
|
f"Failed to parse the action instruction as '{reason}({error_type})'",
|
||||||
|
)
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ from pydantic import BaseModel, Field
|
|||||||
from skyvern.forge.sdk.schemas.tasks import ProxyLocation
|
from skyvern.forge.sdk.schemas.tasks import ProxyLocation
|
||||||
from skyvern.forge.sdk.workflow.models.block import BlockType, FileType
|
from skyvern.forge.sdk.workflow.models.block import BlockType, FileType
|
||||||
from skyvern.forge.sdk.workflow.models.parameter import ParameterType, WorkflowParameterType
|
from skyvern.forge.sdk.workflow.models.parameter import ParameterType, WorkflowParameterType
|
||||||
from skyvern.webeye.actions.actions import ActionType
|
|
||||||
|
|
||||||
|
|
||||||
class ParameterYAML(BaseModel, abc.ABC):
|
class ParameterYAML(BaseModel, abc.ABC):
|
||||||
@@ -219,7 +218,6 @@ class ValidationBlockYAML(BlockYAML):
|
|||||||
|
|
||||||
|
|
||||||
class ActionBlockYAML(BlockYAML):
|
class ActionBlockYAML(BlockYAML):
|
||||||
action_type: ActionType
|
|
||||||
block_type: Literal[BlockType.ACTION] = BlockType.ACTION # type: ignore
|
block_type: Literal[BlockType.ACTION] = BlockType.ACTION # type: ignore
|
||||||
|
|
||||||
url: str | None = None
|
url: str | None = None
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ from skyvern.exceptions import (
|
|||||||
WorkflowRunNotFound,
|
WorkflowRunNotFound,
|
||||||
)
|
)
|
||||||
from skyvern.forge import app
|
from skyvern.forge import app
|
||||||
|
from skyvern.forge.prompts import prompt_engine
|
||||||
from skyvern.forge.sdk.artifact.models import ArtifactType
|
from skyvern.forge.sdk.artifact.models import ArtifactType
|
||||||
from skyvern.forge.sdk.core import skyvern_context
|
from skyvern.forge.sdk.core import skyvern_context
|
||||||
from skyvern.forge.sdk.core.security import generate_skyvern_signature
|
from skyvern.forge.sdk.core.security import generate_skyvern_signature
|
||||||
@@ -23,6 +24,7 @@ from skyvern.forge.sdk.models import Organization, Step
|
|||||||
from skyvern.forge.sdk.schemas.tasks import ProxyLocation, Task
|
from skyvern.forge.sdk.schemas.tasks import ProxyLocation, Task
|
||||||
from skyvern.forge.sdk.workflow.exceptions import (
|
from skyvern.forge.sdk.workflow.exceptions import (
|
||||||
ContextParameterSourceNotDefined,
|
ContextParameterSourceNotDefined,
|
||||||
|
FailedToParseActionInstruction,
|
||||||
InvalidWorkflowDefinition,
|
InvalidWorkflowDefinition,
|
||||||
WorkflowDefinitionHasDuplicateParameterKeys,
|
WorkflowDefinitionHasDuplicateParameterKeys,
|
||||||
WorkflowDefinitionHasReservedParameterKeys,
|
WorkflowDefinitionHasReservedParameterKeys,
|
||||||
@@ -1366,18 +1368,35 @@ class WorkflowService:
|
|||||||
if block_yaml.parameter_keys
|
if block_yaml.parameter_keys
|
||||||
else []
|
else []
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if not block_yaml.navigation_goal:
|
||||||
|
raise Exception("empty action instruction")
|
||||||
|
|
||||||
|
prompt = prompt_engine.load_prompt("infer-action-type", navigation_goal=block_yaml.navigation_goal)
|
||||||
|
# TODO: no step here, so LLM call won't be saved as an artifact
|
||||||
|
json_response = await app.LLM_API_HANDLER(prompt=prompt)
|
||||||
|
if json_response.get("error"):
|
||||||
|
raise FailedToParseActionInstruction(
|
||||||
|
reason=json_response.get("thought"), error_type=json_response.get("error")
|
||||||
|
)
|
||||||
|
|
||||||
|
action_type: str = json_response.get("action_type") or ""
|
||||||
|
action_type = action_type.lower()
|
||||||
|
|
||||||
prompt_template = ""
|
prompt_template = ""
|
||||||
if block_yaml.action_type == ActionType.CLICK:
|
if action_type == ActionType.CLICK:
|
||||||
prompt_template = TaskPromptTemplate.SingleClickAction
|
prompt_template = TaskPromptTemplate.SingleClickAction
|
||||||
elif block_yaml.action_type == ActionType.INPUT_TEXT:
|
elif action_type == ActionType.INPUT_TEXT:
|
||||||
prompt_template = TaskPromptTemplate.SingleInputAction
|
prompt_template = TaskPromptTemplate.SingleInputAction
|
||||||
elif block_yaml.action_type == ActionType.UPLOAD_FILE:
|
elif action_type == ActionType.UPLOAD_FILE:
|
||||||
prompt_template = TaskPromptTemplate.SingleUploadAction
|
prompt_template = TaskPromptTemplate.SingleUploadAction
|
||||||
elif block_yaml.action_type == ActionType.SELECT_OPTION:
|
elif action_type == ActionType.SELECT_OPTION:
|
||||||
prompt_template = TaskPromptTemplate.SingleSelectAction
|
prompt_template = TaskPromptTemplate.SingleSelectAction
|
||||||
|
|
||||||
if not prompt_template:
|
if not prompt_template:
|
||||||
raise Exception("not supported action type for action block")
|
raise Exception(
|
||||||
|
f"Not supported action for action block. Currently we only support [click, input_text, upload_file, select_option], but got [{action_type}]"
|
||||||
|
)
|
||||||
|
|
||||||
return ActionBlock(
|
return ActionBlock(
|
||||||
prompt_template=prompt_template,
|
prompt_template=prompt_template,
|
||||||
|
|||||||
Reference in New Issue
Block a user