From 22566f01ce73f126de8d0f1d334519a7c68b70a9 Mon Sep 17 00:00:00 2001 From: Kerem Yilmaz Date: Wed, 3 Jul 2024 01:38:50 -0700 Subject: [PATCH] Mark options of disabled select non-interactable (#540) --- skyvern/forge/agent.py | 12 +++++++ .../forge/prompts/skyvern/extract-action.j2 | 2 +- skyvern/webeye/actions/handler.py | 16 +++++++++ skyvern/webeye/actions/responses.py | 3 ++ skyvern/webeye/scraper/domUtils.js | 34 ++++++++++++++++++- 5 files changed, 65 insertions(+), 2 deletions(-) diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index 78d619a1..689ff334 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -678,6 +678,17 @@ class ForgeAgent: ) # stop executing the rest actions break + elif results and not results[-1].success and not results[-1].stop_execution_on_failure: + LOG.warning( + "Action failed, but not stopping execution", + task_id=task.task_id, + step_id=step.step_id, + step_order=step.order, + step_retry=step.retry_index, + action_idx=action_idx, + action=action, + action_result=results, + ) else: if action_node.next is not None: LOG.warning( @@ -1004,6 +1015,7 @@ class ForgeAgent: "interacted_with_parent", "step_retry_number", "step_order", + "stop_execution_on_failure", }, ) for result in results diff --git a/skyvern/forge/prompts/skyvern/extract-action.j2 b/skyvern/forge/prompts/skyvern/extract-action.j2 index f06a2780..c0fcd93c 100644 --- a/skyvern/forge/prompts/skyvern/extract-action.j2 +++ b/skyvern/forge/prompts/skyvern/extract-action.j2 @@ -9,7 +9,7 @@ If you see a popup in the page screenshot, prioritize actions on the popup. Reply in JSON format with the following keys: { - "action_plan": str, // A string that describes the plan of actions you're going to take. Be specific and to the point. Use this as a quick summary of the actions you're going to take, and what order you're going to take them in + "action_plan": str, // A string that describes the plan of actions you're going to take. Be specific and to the point. Use this as a quick summary of the actions you're going to take, and what order you're going to take them in, and how that moves you towards your overall goal "actions": array // An array of actions. Here's the format of each action: [{ "reasoning": str, // The reasoning behind the action. Be specific, referencing any user information and their fields and element ids in your reasoning. Mention why you chose the action type, and why you chose the element id. Keep the reasoning short and to the point. diff --git a/skyvern/webeye/actions/handler.py b/skyvern/webeye/actions/handler.py index 6f01b08d..8d0d2ea6 100644 --- a/skyvern/webeye/actions/handler.py +++ b/skyvern/webeye/actions/handler.py @@ -109,6 +109,9 @@ class ActionHandler: if action.action_type in ActionHandler._handled_action_types: actions_result: list[ActionResult] = [] + if invalid_web_action_check := check_for_invalid_web_action(action, page, scraped_page, task, step): + return invalid_web_action_check + # do setup before action handler if setup := ActionHandler._setup_action_types.get(action.action_type): results = await setup(action, page, scraped_page, task, step) @@ -158,6 +161,19 @@ class ActionHandler: return [ActionFailure(e)] +def check_for_invalid_web_action( + action: actions.Action, + page: Page, + scraped_page: ScrapedPage, + task: Task, + step: Step, +) -> list[ActionResult]: + if isinstance(action, WebAction) and action.element_id not in scraped_page.id_to_element_dict: + return [ActionFailure(MissingElement(element_id=action.element_id), stop_execution_on_failure=False)] + + return [] + + async def handle_solve_captcha_action( action: actions.SolveCaptchaAction, page: Page, diff --git a/skyvern/webeye/actions/responses.py b/skyvern/webeye/actions/responses.py index eec15918..766aa4f3 100644 --- a/skyvern/webeye/actions/responses.py +++ b/skyvern/webeye/actions/responses.py @@ -7,6 +7,7 @@ from skyvern.webeye.string_util import remove_whitespace class ActionResult(BaseModel): success: bool + stop_execution_on_failure: bool = True exception_type: str | None = None exception_message: str | None = None data: dict[str, Any] | list | str | None = None @@ -67,6 +68,7 @@ class ActionFailure(ActionResult): def __init__( self, exception: Exception, + stop_execution_on_failure: bool = True, javascript_triggered: bool = False, download_triggered: bool | None = None, interacted_with_sibling: bool = False, @@ -75,6 +77,7 @@ class ActionFailure(ActionResult): super().__init__( success=False, exception_type=type(exception).__name__, + stop_execution_on_failure=stop_execution_on_failure, exception_message=remove_whitespace(str(exception)), javascript_triggered=javascript_triggered, download_triggered=download_triggered, diff --git a/skyvern/webeye/scraper/domUtils.js b/skyvern/webeye/scraper/domUtils.js index e6676733..f5a222a9 100644 --- a/skyvern/webeye/scraper/domUtils.js +++ b/skyvern/webeye/scraper/domUtils.js @@ -357,6 +357,11 @@ function isInteractable(element) { return true; } + // Check if the option's parent (select) is hidden or disabled + if (tagName === "option" && isHiddenOrDisabled(element.parentElement)) { + return false; + } + if ( tagName === "button" || tagName === "select" || @@ -718,7 +723,11 @@ async function buildTreeFromBody(frame = "main.frame", open_select = false) { } if (elementTagNameLower === "input" || elementTagNameLower === "textarea") { - attrs["value"] = element.value; + if (element.type === "radio") { + attrs["value"] = "" + element.checked + ""; + } else { + attrs["value"] = element.value; + } } let elementObj = { @@ -906,6 +915,11 @@ async function buildTreeFromBody(frame = "main.frame", open_select = false) { const children = getChildElements(element); for (let i = 0; i < children.length; i++) { const childElement = children[i]; + + // Skip processing option-children of an non-interactable select element as they are already added to the select.options + if (childElement.tagName.toLowerCase() === "option") { + continue; + } await processElement(childElement, parentId); } } @@ -1367,3 +1381,21 @@ async function scrollToNextPage(draw_boxes) { async function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } + +// Helper method for debugging +function findNodeById(arr, targetId, path = []) { + for (let i = 0; i < arr.length; i++) { + const currentPath = [...path, arr[i].id]; + if (arr[i].id === targetId) { + console.log("Lineage:", currentPath.join(" -> ")); + return arr[i]; + } + if (arr[i].children && arr[i].children.length > 0) { + const result = findNodeById(arr[i].children, targetId, currentPath); + if (result) { + return result; + } + } + } + return null; +}