integration with CUA (#2126)

This commit is contained in:
Shuchang Zheng
2025-04-11 11:18:53 -07:00
committed by GitHub
parent 2ac65c4a9b
commit f883b91180
13 changed files with 420 additions and 53 deletions

View File

@@ -326,13 +326,14 @@ class ScrapedPage(BaseModel):
element["children"] = new_children
return element
async def refresh(self, draw_boxes: bool = True) -> Self:
async def refresh(self, draw_boxes: bool = True, scroll: bool = True) -> Self:
refreshed_page = await scrape_website(
browser_state=self._browser_state,
url=self.url,
cleanup_element_tree=self._clean_up_func,
scrape_exclude=self._scrape_exclude,
draw_boxes=draw_boxes,
scroll=scroll,
)
self.elements = refreshed_page.elements
self.id_to_css_dict = refreshed_page.id_to_css_dict
@@ -366,6 +367,8 @@ async def scrape_website(
scrape_exclude: ScrapeExcludeFunc | None = None,
take_screenshots: bool = True,
draw_boxes: bool = True,
max_screenshot_number: int = settings.MAX_NUM_SCREENSHOTS,
scroll: bool = True,
) -> ScrapedPage:
"""
************************************************************************************************
@@ -397,6 +400,8 @@ async def scrape_website(
scrape_exclude=scrape_exclude,
take_screenshots=take_screenshots,
draw_boxes=draw_boxes,
max_screenshot_number=max_screenshot_number,
scroll=scroll,
)
except Exception as e:
# NOTE: MAX_SCRAPING_RETRIES is set to 0 in both staging and production
@@ -420,6 +425,8 @@ async def scrape_website(
scrape_exclude=scrape_exclude,
take_screenshots=take_screenshots,
draw_boxes=draw_boxes,
max_screenshot_number=max_screenshot_number,
scroll=scroll,
)
@@ -469,6 +476,8 @@ async def scrape_web_unsafe(
scrape_exclude: ScrapeExcludeFunc | None = None,
take_screenshots: bool = True,
draw_boxes: bool = True,
max_screenshot_number: int = settings.MAX_NUM_SCREENSHOTS,
scroll: bool = True,
) -> ScrapedPage:
"""
Asynchronous function that performs web scraping without any built-in error handling. This function is intended
@@ -503,7 +512,6 @@ async def scrape_web_unsafe(
json_to_html(element, need_skyvern_attrs=False) for element in element_tree_trimmed
)
token_count = count_tokens(element_tree_trimmed_html_str)
max_screenshot_number = settings.MAX_NUM_SCREENSHOTS
if token_count > DEFAULT_MAX_TOKENS:
max_screenshot_number = min(max_screenshot_number, 1)
@@ -512,6 +520,7 @@ async def scrape_web_unsafe(
url=url,
draw_boxes=draw_boxes,
max_number=max_screenshot_number,
scroll=scroll,
)
id_to_css_dict, id_to_element_dict, id_to_frame_dict, id_to_element_hash, hash_to_element_ids = build_element_dict(
elements