add ClickContext to support click action ai="fallback" in generated code (#3892)
This commit is contained in:
@@ -27,7 +27,8 @@ from skyvern.schemas.workflows import FileStorageType
|
||||
from skyvern.webeye.actions.action_types import ActionType
|
||||
|
||||
LOG = structlog.get_logger(__name__)
|
||||
GENERATE_CODE_AI_MODE = "proactive"
|
||||
GENERATE_CODE_AI_MODE_PROACTIVE = "proactive"
|
||||
GENERATE_CODE_AI_MODE_FALLBACK = "fallback"
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- #
|
||||
@@ -250,10 +251,14 @@ def _action_to_stmt(act: dict[str, Any], task: dict[str, Any], assign_to_output:
|
||||
)
|
||||
|
||||
if method == "click":
|
||||
ai_mode = GENERATE_CODE_AI_MODE_PROACTIVE
|
||||
click_context = act.get("click_context")
|
||||
if click_context and isinstance(click_context, dict) and click_context.get("single_option_click"):
|
||||
ai_mode = GENERATE_CODE_AI_MODE_FALLBACK
|
||||
args.append(
|
||||
cst.Arg(
|
||||
keyword=cst.Name("ai"),
|
||||
value=_value(GENERATE_CODE_AI_MODE),
|
||||
value=_value(ai_mode),
|
||||
whitespace_after_arg=cst.ParenthesizedWhitespace(
|
||||
indent=True,
|
||||
last_line=cst.SimpleWhitespace(INDENT),
|
||||
@@ -286,7 +291,7 @@ def _action_to_stmt(act: dict[str, Any], task: dict[str, Any], assign_to_output:
|
||||
args.append(
|
||||
cst.Arg(
|
||||
keyword=cst.Name("ai"),
|
||||
value=_value(GENERATE_CODE_AI_MODE),
|
||||
value=_value(GENERATE_CODE_AI_MODE_PROACTIVE),
|
||||
whitespace_after_arg=cst.ParenthesizedWhitespace(
|
||||
indent=True,
|
||||
last_line=cst.SimpleWhitespace(INDENT),
|
||||
@@ -343,7 +348,7 @@ def _action_to_stmt(act: dict[str, Any], task: dict[str, Any], assign_to_output:
|
||||
args.append(
|
||||
cst.Arg(
|
||||
keyword=cst.Name("ai"),
|
||||
value=_value(GENERATE_CODE_AI_MODE),
|
||||
value=_value(GENERATE_CODE_AI_MODE_PROACTIVE),
|
||||
whitespace_after_arg=cst.ParenthesizedWhitespace(
|
||||
indent=True,
|
||||
last_line=cst.SimpleWhitespace(INDENT),
|
||||
@@ -374,7 +379,7 @@ def _action_to_stmt(act: dict[str, Any], task: dict[str, Any], assign_to_output:
|
||||
args.append(
|
||||
cst.Arg(
|
||||
keyword=cst.Name("ai"),
|
||||
value=_value(GENERATE_CODE_AI_MODE),
|
||||
value=_value(GENERATE_CODE_AI_MODE_PROACTIVE),
|
||||
whitespace_after_arg=cst.ParenthesizedWhitespace(
|
||||
indent=True,
|
||||
last_line=cst.SimpleWhitespace(INDENT),
|
||||
|
||||
@@ -27,6 +27,10 @@ Reply in JSON format with the following keys:
|
||||
"label": str, // the label of the option if any. MAKE SURE YOU USE THIS LABEL TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION LABEL HERE
|
||||
"index": int, // the index corresponding to the option index under the select element.
|
||||
"value": str // the value of the option. MAKE SURE YOU USE THIS VALUE TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION VALUE HERE
|
||||
},
|
||||
"click_context": { // The context for CLICK action only. null if not CLICK action
|
||||
"thought": str, // Describe how you decided that this action is a single choice option or multi-choice option.
|
||||
"single_option_click": bool, // True if the click is the only choice to proceed towards the goal, regardless of different user context or input. False if there are multiple valid options that depend on user input. Examples: clicking a login button to login is True (it's the only way to login); clicking a radio button for a multi-choice question (e.g., selecting "male", "female", or "other" for gender) is False (the choice depends on user input). When clicking on radio buttons, dropdown options, or any element that represents one of multiple possible selections, this should be False.
|
||||
}{% if parse_select_feature_enabled %},
|
||||
"context": { // The context for INPUT_TEXT or SELECT_OPTION action only. null if not INPUT_TEXT or SELECT_OPTION action. Extract the following detailed information from the "reasoning", and double-check the information by analysing the HTML elements.
|
||||
"thought": str, // A string to describe how you double-check the context information to ensure the accuracy.
|
||||
|
||||
@@ -50,6 +50,11 @@ class InputOrSelectContext(BaseModel):
|
||||
return f"InputOrSelectContext(field={self.field}, is_required={self.is_required}, is_search_bar={self.is_search_bar}, is_location_input={self.is_location_input}, intention={self.intention})"
|
||||
|
||||
|
||||
class ClickContext(BaseModel):
|
||||
thought: str | None = None
|
||||
single_option_click: bool | None = None
|
||||
|
||||
|
||||
class Action(BaseModel):
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
@@ -88,6 +93,7 @@ class Action(BaseModel):
|
||||
option: SelectOption | None = None
|
||||
is_checked: bool | None = None
|
||||
verified: bool = False
|
||||
click_context: ClickContext | None = None
|
||||
|
||||
# TOTP timing information for multi-field TOTP sequences
|
||||
totp_timing_info: dict[str, Any] | None = None
|
||||
|
||||
@@ -21,6 +21,7 @@ from skyvern.webeye.actions.actions import (
|
||||
Action,
|
||||
CheckboxAction,
|
||||
ClickAction,
|
||||
ClickContext,
|
||||
ClosePageAction,
|
||||
CompleteAction,
|
||||
DownloadFileAction,
|
||||
@@ -97,7 +98,15 @@ def parse_action(
|
||||
|
||||
if action_type == ActionType.CLICK:
|
||||
file_url = action["file_url"] if "file_url" in action else None
|
||||
return ClickAction(**base_action_dict, file_url=file_url, download=action.get("download", False))
|
||||
click_context = action.get("click_context", None)
|
||||
if click_context:
|
||||
click_context = ClickContext.model_validate(click_context)
|
||||
return ClickAction(
|
||||
**base_action_dict,
|
||||
file_url=file_url,
|
||||
download=action.get("download", False),
|
||||
click_context=click_context,
|
||||
)
|
||||
|
||||
if action_type == ActionType.INPUT_TEXT:
|
||||
context_dict = action.get("context", {})
|
||||
|
||||
Reference in New Issue
Block a user