diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index 8d68bec6..91dd3189 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -117,6 +117,7 @@ from skyvern.webeye.actions.actions import ( DownloadFileAction, ExtractAction, GotoUrlAction, + KeypressAction, ReloadPageAction, TerminateAction, WebAction, @@ -1285,6 +1286,16 @@ class ForgeAgent: "is_retry": step.retry_index > 0, } + # Tell the handler to skip the auto-completion Tab hack when the + # next batched action would be broken by a focus change — e.g. a + # KEYPRESS Enter or another action on the same element. + if action.action_type == ActionType.INPUT_TEXT and action_idx + 1 < len(action_linked_list): + next_action = action_linked_list[action_idx + 1].action + if isinstance(next_action, KeypressAction) or ( + isinstance(next_action, WebAction) and next_action.element_id == action.element_id + ): + action.skip_auto_complete_tab = True + results = await ActionHandler.handle_action( scraped_page=scraped_page, task=task, diff --git a/skyvern/webeye/actions/actions.py b/skyvern/webeye/actions/actions.py index 65fdd126..1fea08e7 100644 --- a/skyvern/webeye/actions/actions.py +++ b/skyvern/webeye/actions/actions.py @@ -152,6 +152,11 @@ class Action(BaseModel): # flag indicating whether the action requires mini-agent mode has_mini_agent: bool | None = None + # When True, the auto-completion Tab hack is skipped because a follow-up + # action in the same batch targets the same element or presses a key (e.g. Enter). + # Pressing Tab would move focus away and break that next action. + skip_auto_complete_tab: bool = False + created_at: datetime | None = None modified_at: datetime | None = None created_by: str | None = None diff --git a/skyvern/webeye/actions/handler.py b/skyvern/webeye/actions/handler.py index 5859f5cb..14983fbc 100644 --- a/skyvern/webeye/actions/handler.py +++ b/skyvern/webeye/actions/handler.py @@ -1512,7 +1512,12 @@ async def handle_input_text_action( raise e finally: # HACK: force to finish missing auto completion input - if auto_complete_hacky_flag and await skyvern_element.is_visible() and not await skyvern_element.is_raw_input(): + if ( + auto_complete_hacky_flag + and await skyvern_element.is_visible() + and not await skyvern_element.is_raw_input() + and not action.skip_auto_complete_tab + ): LOG.debug( "Trigger input-selection hack, pressing Tab to choose one", action=action,