From 242a87bb24435c93c61271219d2ef73989e7ad90 Mon Sep 17 00:00:00 2001 From: LawyZheng Date: Fri, 5 Apr 2024 21:47:18 +0800 Subject: [PATCH] add action setup/teardown stage (#159) --- .../forge/prompts/skyvern/extract-action.j2 | 4 -- skyvern/webeye/actions/handler.py | 62 ++++++++++++++----- 2 files changed, 48 insertions(+), 18 deletions(-) diff --git a/skyvern/forge/prompts/skyvern/extract-action.j2 b/skyvern/forge/prompts/skyvern/extract-action.j2 index 10e226da..69663386 100644 --- a/skyvern/forge/prompts/skyvern/extract-action.j2 +++ b/skyvern/forge/prompts/skyvern/extract-action.j2 @@ -6,10 +6,6 @@ Each element is tagged with an ID. If you see any information in red in the page screenshot, this means a condition wasn't satisfied. prioritize actions with the red information. If you see a popup in the page screenshot, prioritize actions on the popup. -{% if "lever" in url %} -DO NOT UPDATE ANY LOCATION FIELDS -{% endif %} - Reply in JSON format with the following keys: { "actions": array // An array of actions. Here's the format of each action: diff --git a/skyvern/webeye/actions/handler.py b/skyvern/webeye/actions/handler.py index f7c7280c..b2ce2b54 100644 --- a/skyvern/webeye/actions/handler.py +++ b/skyvern/webeye/actions/handler.py @@ -30,6 +30,14 @@ class ActionHandler: ActionType, Callable[[Action, Page, ScrapedPage, Task, Step], Awaitable[list[ActionResult]]] ] = {} + _setup_action_types: dict[ + ActionType, Callable[[Action, Page, ScrapedPage, Task, Step], Awaitable[list[ActionResult]]] + ] = {} + + _teardown_action_types: dict[ + ActionType, Callable[[Action, Page, ScrapedPage, Task, Step], Awaitable[list[ActionResult]]] + ] = {} + @classmethod def register_action_type( cls, @@ -38,6 +46,22 @@ class ActionHandler: ) -> None: cls._handled_action_types[action_type] = handler + @classmethod + def register_setup_for_action_type( + cls, + action_type: ActionType, + handler: Callable[[Action, Page, ScrapedPage, Task, Step], Awaitable[list[ActionResult]]], + ) -> None: + cls._setup_action_types[action_type] = handler + + @classmethod + def register_teardown_for_action_type( + cls, + action_type: ActionType, + handler: Callable[[Action, Page, ScrapedPage, Task, Step], Awaitable[list[ActionResult]]], + ) -> None: + cls._teardown_action_types[action_type] = handler + @staticmethod async def handle_action( scraped_page: ScrapedPage, @@ -50,8 +74,31 @@ class ActionHandler: page = await browser_state.get_or_create_page() try: if action.action_type in ActionHandler._handled_action_types: + actions_result: list[ActionResult] = [] + + # do setup before action handler + if setup := ActionHandler._setup_action_types.get(action.action_type): + results = await setup(action, page, scraped_page, task, step) + actions_result.extend(results) + if results and results[-1] != ActionSuccess: + return actions_result + + # do the handler handler = ActionHandler._handled_action_types[action.action_type] - return await handler(action, page, scraped_page, task, step) + results = await handler(action, page, scraped_page, task, step) + actions_result.extend(results) + if not results or type(actions_result[-1]) != ActionSuccess: + return actions_result + + # do the teardown + teardown = ActionHandler._teardown_action_types.get(action.action_type) + if not teardown: + return actions_result + + results = await teardown(action, page, scraped_page, task, step) + actions_result.extend(results) + return actions_result + else: LOG.error("Unsupported action type in handler", action=action, type=type(action)) return [ActionFailure(Exception(f"Unsupported action type: {type(action)}"))] @@ -98,19 +145,6 @@ async def handle_input_text_action( await locator.clear() text = get_actual_value_of_parameter_if_secret(task, action.text) await locator.fill(text, timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS) - - # This is a hack that gets dropdowns to select the "best" option based on what's typed - # Fixes situations like tsk_228671423990405776 where the location isn't being autocompleted - await locator.press("Tab", timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS) - input_value = await locator.input_value(timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS) - if not input_value: - LOG.info("Failed to input the text, trying to press sequentially with an enter click", action=action) - await locator.clear() - await locator.press_sequentially(text, timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS) - await locator.press("Enter", timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS) - input_value = await locator.input_value(timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS) - LOG.info("Input value", input_value=input_value, action=action) - return [ActionSuccess()]