From 03cc8a5a52470380a64290bb9a5361b67d3c7c7c Mon Sep 17 00:00:00 2001 From: LawyZheng Date: Tue, 22 Jul 2025 14:25:47 +0800 Subject: [PATCH] extend auto completion agent logic (#3010) --- .../skyvern/auto-completion-choose-option.j2 | 9 ++++- skyvern/forge/sdk/db/polls.py | 4 +- skyvern/webeye/actions/handler.py | 38 +++++++++++++++++-- 3 files changed, 43 insertions(+), 8 deletions(-) diff --git a/skyvern/forge/prompts/skyvern/auto-completion-choose-option.j2 b/skyvern/forge/prompts/skyvern/auto-completion-choose-option.j2 index 9c97d566..367d524f 100644 --- a/skyvern/forge/prompts/skyvern/auto-completion-choose-option.j2 +++ b/skyvern/forge/prompts/skyvern/auto-completion-choose-option.j2 @@ -23,7 +23,7 @@ Reply in JSON format with the following keys: "confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence. "relevance_float": float, // The relative between the selected element and the provided information. You should consider how much the selected option is related to the user goal, the user details and the context. Pick a number between 0.00 and 1.00. 0.00 means no relevance, 1.00 means full relevance, the precision is 0.01. "value": str, // The value to select. - "id": str, // The id of the most relevant and interactable element to take the action. The id must be from "HTML elements". It should be null if no element is relative or there's no auto completion suggestion. + "id": str, // The id of the most relevant and interactable element to take the action. The id must be from {{'"IDs for emerging HTML elements"' if new_elements_ids else '"HTML elements"'}}. It should be null if no element is relative or there's no auto completion suggestion. } Context: @@ -45,7 +45,12 @@ User details: ``` {{ navigation_payload_str }} ``` - +{% if new_elements_ids %} +IDs for emerging HTML elements +``` +{{ new_elements_ids }} +``` +{% endif %} HTML elements: ``` {{ elements }} diff --git a/skyvern/forge/sdk/db/polls.py b/skyvern/forge/sdk/db/polls.py index addee714..9f9634f3 100644 --- a/skyvern/forge/sdk/db/polls.py +++ b/skyvern/forge/sdk/db/polls.py @@ -13,7 +13,7 @@ async def wait_on_persistent_browser_address( session_id: str, organization_id: str, timeout: int = 600, - poll_interval: int = 2, + poll_interval: float = 2, ) -> str | None: persistent_browser_session = await await_browser_session(db, session_id, organization_id, timeout, poll_interval) return persistent_browser_session.browser_address if persistent_browser_session else None @@ -24,7 +24,7 @@ async def await_browser_session( session_id: str, organization_id: str, timeout: int = 600, - poll_interval: int = 2, + poll_interval: float = 2, ) -> PersistentBrowserSession | None: try: async with asyncio.timeout(timeout): diff --git a/skyvern/webeye/actions/handler.py b/skyvern/webeye/actions/handler.py index 1863dcac..35936c95 100644 --- a/skyvern/webeye/actions/handler.py +++ b/skyvern/webeye/actions/handler.py @@ -1096,6 +1096,7 @@ async def handle_input_text_action( if await skyvern_element.is_auto_completion_input() or input_or_select_context.is_location_input: if result := await input_or_auto_complete_input( input_or_select_context=input_or_select_context, + scraped_page=scraped_page, page=page, dom=dom, text=text, @@ -2132,6 +2133,7 @@ async def chain_click( async def choose_auto_completion_dropdown( context: InputOrSelectContext, page: Page, + scraped_page: ScrapedPage, dom: DomUtil, text: str, skyvern_element: SkyvernElement, @@ -2190,11 +2192,34 @@ async def choose_auto_completion_dropdown( incremental_element.extend(confirmed_preserved_list) result.incremental_elements = copy.deepcopy(incremental_element) - if len(incremental_element) == 0: - raise NoIncrementalElementFoundForAutoCompletion(element_id=skyvern_element.get_id(), text=text) + html = "" + new_interactable_element_ids = [] + if len(incremental_element) > 0: + cleaned_incremental_element = remove_duplicated_HTML_element(incremental_element) + html = incremental_scraped.build_html_tree(cleaned_incremental_element) + else: + scraped_page_after_open = await scraped_page.generate_scraped_page_without_screenshots() + new_element_ids = set(scraped_page_after_open.id_to_css_dict.keys()) - set( + scraped_page.id_to_css_dict.keys() + ) + + dom_after_open = DomUtil(scraped_page=scraped_page_after_open, page=page) + new_interactable_element_ids = [ + element_id + for element_id in new_element_ids + if (await dom_after_open.get_skyvern_element_by_id(element_id)).is_interactable() + ] + if len(new_interactable_element_ids) == 0: + raise NoIncrementalElementFoundForAutoCompletion(element_id=skyvern_element.get_id(), text=text) + LOG.info( + "New elements detected after the input", + new_elements_ids=new_interactable_element_ids, + ) + result.incremental_elements = copy.deepcopy( + [scraped_page_after_open.id_to_element_dict[element_id] for element_id in new_interactable_element_ids] + ) + html = scraped_page_after_open.build_element_tree() - cleaned_incremental_element = remove_duplicated_HTML_element(incremental_element) - html = incremental_scraped.build_html_tree(cleaned_incremental_element) auto_completion_confirm_prompt = prompt_engine.load_prompt( "auto-completion-choose-option", is_search=context.is_search_bar, @@ -2203,6 +2228,7 @@ async def choose_auto_completion_dropdown( navigation_goal=task.navigation_goal, navigation_payload_str=json.dumps(task.navigation_payload), elements=html, + new_elements_ids=new_interactable_element_ids, local_datetime=datetime.now(skyvern_context.ensure_context().tz_info).isoformat(), ) LOG.info( @@ -2257,6 +2283,7 @@ async def choose_auto_completion_dropdown( await locator.click(timeout=settings.BROWSER_ACTION_TIMEOUT_MS) clear_input = False return result + except Exception as e: LOG.info( "Failed to choose the auto completion dropdown", @@ -2287,6 +2314,7 @@ def remove_duplicated_HTML_element(elements: list[dict]) -> list[dict]: async def input_or_auto_complete_input( input_or_select_context: InputOrSelectContext, + scraped_page: ScrapedPage, page: Page, dom: DomUtil, text: str, @@ -2326,6 +2354,7 @@ async def input_or_auto_complete_input( result = await choose_auto_completion_dropdown( context=input_or_select_context, page=page, + scraped_page=scraped_page, dom=dom, text=current_value, preserved_elements=result.incremental_elements, @@ -2395,6 +2424,7 @@ async def input_or_auto_complete_input( result = await choose_auto_completion_dropdown( context=input_or_select_context, page=page, + scraped_page=scraped_page, dom=dom, text=value, preserved_elements=result.incremental_elements,