Add PRESS_ENTER action type to extract-action prompt (#SKY-7792) (#4669)
This commit is contained in:
@@ -52,4 +52,5 @@ POST_ACTION_EXECUTION_ACTION_TYPES = [
|
||||
ActionType.WAIT,
|
||||
ActionType.SOLVE_CAPTCHA,
|
||||
ActionType.EXTRACT,
|
||||
ActionType.KEYPRESS,
|
||||
]
|
||||
|
||||
@@ -87,7 +87,11 @@ def parse_action(
|
||||
return NullAction(**base_action_dict)
|
||||
|
||||
# `.upper()` handles the case where the LLM returns a lowercase action type (e.g. "click" instead of "CLICK")
|
||||
action_type = ActionType[action["action_type"].upper()]
|
||||
action_type_str = action["action_type"].upper()
|
||||
# Backward compat: map PRESS_ENTER to KEYPRESS (old prompt used PRESS_ENTER)
|
||||
if action_type_str == "PRESS_ENTER":
|
||||
action_type_str = "KEYPRESS"
|
||||
action_type = ActionType[action_type_str]
|
||||
|
||||
if not action_type.is_web_action():
|
||||
# LLM sometimes hallucinates and returns element id for non-web actions such as WAIT, TERMINATE, COMPLETE etc.
|
||||
@@ -187,6 +191,23 @@ def parse_action(
|
||||
**base_action_dict, captcha_type=CaptchaType[captcha_type.upper()] if captcha_type else None
|
||||
)
|
||||
|
||||
if action_type == ActionType.KEYPRESS:
|
||||
# KEYPRESS is a global keyboard action, not element-targeted
|
||||
base_action_dict["skyvern_element_hash"] = None
|
||||
base_action_dict["skyvern_element_data"] = None
|
||||
# Support both "key" (single key from prompt) and "keys" (list, from code/legacy)
|
||||
# Limited to navigation/submission keys to prevent misuse on regular form fields
|
||||
allowed_keys = {"Enter", "Tab", "Escape", "ArrowDown", "ArrowUp"}
|
||||
key = action.get("key")
|
||||
if key:
|
||||
if key not in allowed_keys:
|
||||
LOG.warning("KEYPRESS action has unsupported key, skipping action", key=key)
|
||||
return NullAction(**base_action_dict)
|
||||
keys = [key]
|
||||
else:
|
||||
keys = action.get("keys", ["Enter"])
|
||||
return KeypressAction(**base_action_dict, keys=keys)
|
||||
|
||||
if action_type == ActionType.CLOSE_PAGE:
|
||||
return ClosePageAction(**base_action_dict)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user