From 9245a329eefe5bd5ef07395315be6977cf7aa8f5 Mon Sep 17 00:00:00 2001 From: LawyZheng Date: Tue, 23 Jul 2024 14:23:15 +0800 Subject: [PATCH] add cleanup function (#631) --- skyvern/forge/agent.py | 1 + skyvern/forge/agent_functions.py | 33 +++++++++++++++++++++++++++ skyvern/webeye/scraper/scraper.py | 37 +++++-------------------------- 3 files changed, 39 insertions(+), 32 deletions(-) diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index dd36fdcc..1dd0a5a0 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -899,6 +899,7 @@ class ForgeAgent: return await scrape_website( browser_state, task.url, + app.AGENT_FUNCTION.cleanup_element_tree, scrape_exclude=app.scrape_exclude, ) diff --git a/skyvern/forge/agent_functions.py b/skyvern/forge/agent_functions.py index 6af808d0..480d1f22 100644 --- a/skyvern/forge/agent_functions.py +++ b/skyvern/forge/agent_functions.py @@ -1,3 +1,5 @@ +from typing import Dict, List + from playwright.async_api import Page from skyvern.exceptions import StepUnableToExecuteError @@ -8,6 +10,11 @@ from skyvern.forge.sdk.schemas.tasks import Task, TaskStatus from skyvern.webeye.browser_factory import BrowserState +def _remove_rect(element: dict) -> None: + if "rect" in element: + del element["rect"] + + class AgentFunction: async def validate_step_execution( self, @@ -56,3 +63,29 @@ class AgentFunction: page: Page, ) -> list[AsyncOperation]: return [] + + async def cleanup_element_tree( + self, + url: str, + element_tree: List[Dict], + ) -> List[Dict]: + """ + Remove rect and attribute.unique_id from the elements. + The reason we're doing it is to + 1. reduce unnecessary data so that llm get less distrction + TODO later: 2. reduce tokens sent to llm to save money + :param elements: List of elements to remove xpaths from. + :return: List of elements without xpaths. + """ + queue = [] + for element in element_tree: + queue.append(element) + while queue: + queue_ele = queue.pop(0) + _remove_rect(queue_ele) + # TODO: we can come back to test removing the unique_id + # from element attributes to make sure this won't increase hallucination + # _remove_unique_id(queue_ele) + if "children" in queue_ele: + queue.extend(queue_ele["children"]) + return element_tree diff --git a/skyvern/webeye/scraper/scraper.py b/skyvern/webeye/scraper/scraper.py index f479d8f9..92aebb41 100644 --- a/skyvern/webeye/scraper/scraper.py +++ b/skyvern/webeye/scraper/scraper.py @@ -153,6 +153,7 @@ class ScrapedPage(BaseModel): async def scrape_website( browser_state: BrowserState, url: str, + cleanup_element_tree: Callable[[str, list[dict]], Awaitable[list[dict]]], num_retry: int = 0, scrape_exclude: Callable[[Page, Frame], Awaitable[bool]] | None = None, ) -> ScrapedPage: @@ -179,7 +180,7 @@ async def scrape_website( """ try: num_retry += 1 - return await scrape_web_unsafe(browser_state, url, scrape_exclude) + return await scrape_web_unsafe(browser_state, url, cleanup_element_tree, scrape_exclude) except Exception as e: # NOTE: MAX_SCRAPING_RETRIES is set to 0 in both staging and production if num_retry > SettingsManager.get_settings().MAX_SCRAPING_RETRIES: @@ -197,6 +198,7 @@ async def scrape_website( return await scrape_website( browser_state, url, + cleanup_element_tree, num_retry=num_retry, scrape_exclude=scrape_exclude, ) @@ -231,6 +233,7 @@ async def get_frame_text(iframe: Frame) -> str: async def scrape_web_unsafe( browser_state: BrowserState, url: str, + cleanup_element_tree: Callable[[str, list[dict]], Awaitable[list[dict]]], scrape_exclude: Callable[[Page, Frame], Awaitable[bool]] | None = None, ) -> ScrapedPage: """ @@ -261,9 +264,7 @@ async def scrape_web_unsafe( screenshots = await SkyvernFrame.take_split_screenshots(page=page, url=url, draw_boxes=True) elements, element_tree = await get_interactable_element_tree(page, scrape_exclude) - element_tree = cleanup_elements(copy.deepcopy(element_tree)) - - _build_element_links(elements) + element_tree = await cleanup_element_tree(url, copy.deepcopy(element_tree)) id_to_css_dict = {} id_to_element_dict = {} @@ -377,29 +378,6 @@ async def get_interactable_element_tree( return elements, element_tree -def cleanup_elements(elements: list[dict]) -> list[dict]: - """ - Remove rect and attribute.unique_id from the elements. - The reason we're doing it is to - 1. reduce unnecessary data so that llm get less distrction - # TODO later: 2. reduce tokens sent to llm to save money - :param elements: List of elements to remove xpaths from. - :return: List of elements without xpaths. - """ - queue = [] - for element in elements: - queue.append(element) - while queue: - queue_ele = queue.pop(0) - _remove_rect(queue_ele) - # TODO: we can come back to test removing the unique_id - # from element attributes to make sure this won't increase hallucination - # _remove_unique_id(queue_ele) - if "children" in queue_ele: - queue.extend(queue_ele["children"]) - return elements - - def trim_element_tree(elements: list[dict]) -> list[dict]: queue = [] for element in elements: @@ -466,11 +444,6 @@ def _trimmed_attributes(tag_name: str, attributes: dict) -> dict: return new_attributes -def _remove_rect(element: dict) -> None: - if "rect" in element: - del element["rect"] - - def _remove_unique_id(element: dict) -> None: if "attributes" not in element: return