From fcc87243c1e64ac8723f7394265333521be1ff84 Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Sat, 8 Mar 2025 02:14:48 -0800 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Speed=20up=20function=20`c?= =?UTF-8?q?lean=5Felement=5Fbefore=5Fhashing`=20by=20827%=20(#1890)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: codeflash-ai[bot] <148906541+codeflash-ai[bot]@users.noreply.github.com> --- skyvern/webeye/scraper/scraper.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/skyvern/webeye/scraper/scraper.py b/skyvern/webeye/scraper/scraper.py index 8e319fd1..3cc726c5 100644 --- a/skyvern/webeye/scraper/scraper.py +++ b/skyvern/webeye/scraper/scraper.py @@ -160,16 +160,17 @@ def json_to_html(element: dict, need_skyvern_attrs: bool = True) -> str: def clean_element_before_hashing(element: dict) -> dict: - element_copy = copy.deepcopy(element) - element_copy.pop("id", None) - element_copy.pop("rect", None) - element_copy.pop("frame_index", None) - if "attributes" in element_copy: - element_copy["attributes"].pop(SKYVERN_ID_ATTR, None) - if "children" in element_copy: - for idx, child in enumerate(element_copy["children"]): - element_copy["children"][idx] = clean_element_before_hashing(child) - return element_copy + def clean_nested(element: dict) -> dict: + element_cleaned = {key: value for key, value in element.items() if key not in {"id", "rect", "frame_index"}} + if "attributes" in element: + attributes_cleaned = {key: value for key, value in element["attributes"].items() if key != SKYVERN_ID_ATTR} + element_cleaned["attributes"] = attributes_cleaned + if "children" in element: + children_cleaned = [clean_nested(child) for child in element["children"]] + element_cleaned["children"] = children_cleaned + return element_cleaned + + return clean_nested(element) def hash_element(element: dict) -> str: