From 3cbfda57bd4d3b94f8e346768473a80f3d4143be Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Sun, 13 Apr 2025 00:22:46 -0700 Subject: [PATCH] add CUA MoveAction (#2144) --- skyvern/webeye/actions/actions.py | 7 +++++++ skyvern/webeye/actions/handler.py | 12 ++++++++++++ skyvern/webeye/actions/parse_actions.py | 8 ++++++++ 3 files changed, 27 insertions(+) diff --git a/skyvern/webeye/actions/actions.py b/skyvern/webeye/actions/actions.py index 0d360475..e91d6cc3 100644 --- a/skyvern/webeye/actions/actions.py +++ b/skyvern/webeye/actions/actions.py @@ -31,6 +31,7 @@ class ActionType(StrEnum): SCROLL = "scroll" KEYPRESS = "keypress" TYPE = "type" + MOVE = "move" def is_web_action(self) -> bool: return self in [ @@ -278,6 +279,12 @@ class KeypressAction(Action): keys: list[str] = [] +class MoveAction(Action): + action_type: ActionType = ActionType.MOVE + x: int + y: int + + class ScrapeResult(BaseModel): """ Scraped response from a webpage, including: diff --git a/skyvern/webeye/actions/handler.py b/skyvern/webeye/actions/handler.py index b7cdf049..36033063 100644 --- a/skyvern/webeye/actions/handler.py +++ b/skyvern/webeye/actions/handler.py @@ -1493,6 +1493,17 @@ async def handle_keypress_action( return [ActionSuccess()] +async def handle_move_action( + action: actions.MoveAction, + page: Page, + scraped_page: ScrapedPage, + task: Task, + step: Step, +) -> list[ActionResult]: + await page.mouse.move(action.x, action.y) + return [ActionSuccess()] + + ActionHandler.register_action_type(ActionType.SOLVE_CAPTCHA, handle_solve_captcha_action) ActionHandler.register_action_type(ActionType.CLICK, handle_click_action) ActionHandler.register_action_type(ActionType.INPUT_TEXT, handle_input_text_action) @@ -1506,6 +1517,7 @@ ActionHandler.register_action_type(ActionType.COMPLETE, handle_complete_action) ActionHandler.register_action_type(ActionType.EXTRACT, handle_extract_action) ActionHandler.register_action_type(ActionType.SCROLL, handle_scroll_action) ActionHandler.register_action_type(ActionType.KEYPRESS, handle_keypress_action) +ActionHandler.register_action_type(ActionType.MOVE, handle_move_action) async def get_actual_value_of_parameter_if_secret(task: Task, parameter: str) -> Any: diff --git a/skyvern/webeye/actions/parse_actions.py b/skyvern/webeye/actions/parse_actions.py index 8383750f..4017abd1 100644 --- a/skyvern/webeye/actions/parse_actions.py +++ b/skyvern/webeye/actions/parse_actions.py @@ -18,6 +18,7 @@ from skyvern.webeye.actions.actions import ( DownloadFileAction, InputTextAction, KeypressAction, + MoveAction, NullAction, ScrollAction, SelectOption, @@ -281,6 +282,13 @@ async def parse_cua_actions( reasoning=reasoning, intention=reasoning, ) + case "move": + action = MoveAction( + x=cua_action.x, + y=cua_action.y, + reasoning=reasoning, + intention=reasoning, + ) case _: raise ValueError(f"Unsupported action type: {action_type}") action.organization_id = task.organization_id