diff --git a/skyvern/exceptions.py b/skyvern/exceptions.py index 26603002..9e17b076 100644 --- a/skyvern/exceptions.py +++ b/skyvern/exceptions.py @@ -749,3 +749,8 @@ class ScriptNotFound(SkyvernHTTPException): class NoTOTPSecretFound(SkyvernException): def __init__(self) -> None: super().__init__("No TOTP secret found") + + +class NoElementFound(SkyvernException): + def __init__(self) -> None: + super().__init__("No element found.") diff --git a/skyvern/webeye/scraper/scraper.py b/skyvern/webeye/scraper/scraper.py index d1a88c0a..d7c26b81 100644 --- a/skyvern/webeye/scraper/scraper.py +++ b/skyvern/webeye/scraper/scraper.py @@ -13,7 +13,13 @@ from pydantic import BaseModel, PrivateAttr from skyvern.config import settings from skyvern.constants import DEFAULT_MAX_TOKENS, SKYVERN_DIR, SKYVERN_ID_ATTR -from skyvern.exceptions import FailedToTakeScreenshot, ScrapingFailed, ScrapingFailedBlankPage, UnknownElementTreeFormat +from skyvern.exceptions import ( + FailedToTakeScreenshot, + NoElementFound, + ScrapingFailed, + ScrapingFailedBlankPage, + UnknownElementTreeFormat, +) from skyvern.forge.sdk.api.crypto import calculate_sha256 from skyvern.forge.sdk.core import skyvern_context from skyvern.forge.sdk.settings_manager import SettingsManager @@ -524,7 +530,7 @@ async def scrape_web_unsafe( max_screenshot_number: int = settings.MAX_NUM_SCREENSHOTS, scroll: bool = True, support_empty_page: bool = False, - wait_seconds: float = 3, + wait_seconds: float = 0, ) -> ScrapedPage: """ Asynchronous function that performs web scraping without any built-in error handling. This function is intended @@ -549,10 +555,22 @@ async def scrape_web_unsafe( if url == "about:blank" and not support_empty_page: raise ScrapingFailedBlankPage() - LOG.info(f"Waiting for {wait_seconds} seconds before scraping the website.") - await asyncio.sleep(wait_seconds) + try: + await page.wait_for_load_state("load", timeout=3000) + await SkyvernFrame.wait_for_animation_end(page) + except Exception: + LOG.warning("Failed to wait for load state, will continue scraping", exc_info=True) + + if wait_seconds > 0: + LOG.info(f"Waiting for {wait_seconds} seconds before scraping the website.", wait_seconds=wait_seconds) + await asyncio.sleep(wait_seconds) elements, element_tree = await get_interactable_element_tree(page, scrape_exclude) + if not elements and not support_empty_page: + LOG.warning("No elements found on the page, wait for 3 seconds and retry") + await asyncio.sleep(3) + elements, element_tree = await get_interactable_element_tree(page, scrape_exclude) + element_tree = await cleanup_element_tree(page, url, copy.deepcopy(element_tree)) element_tree_trimmed = trim_element_tree(copy.deepcopy(element_tree)) @@ -576,9 +594,9 @@ async def scrape_web_unsafe( elements ) - # if there are no elements, fail the scraping + # if there are no elements, fail the scraping unless support_empty_page is True if not elements and not support_empty_page: - raise Exception("No elements found on the page") + raise NoElementFound() text_content = await get_frame_text(page.main_frame) diff --git a/skyvern/webeye/utils/page.py b/skyvern/webeye/utils/page.py index 450ffb3d..885d9df5 100644 --- a/skyvern/webeye/utils/page.py +++ b/skyvern/webeye/utils/page.py @@ -161,8 +161,7 @@ async def _scrolling_screenshots_helper( if mode == ScreenshotMode.DETAILED: # wait until animation ends, which is triggered by scrolling - LOG.debug("Waiting for 2 seconds until animation ends.") - await asyncio.sleep(2) + await SkyvernFrame.wait_for_animation_end(skyvern_page.frame) else: if draw_boxes: await skyvern_page.build_elements_and_draw_bounding_boxes(frame=frame, frame_index=frame_index) @@ -215,6 +214,21 @@ def _merge_images_by_position(images: list[Image.Image], positions: list[int]) - class SkyvernFrame: + @staticmethod + async def wait_for_animation_end(page: Page, timeout: float = 3000) -> None: + try: + await page.wait_for_function( + """ + () => { + const animations = document.getAnimations(); + return animations.every(a => a.playState === 'finished'); + } + """, + timeout=timeout, + ) + except Exception: + LOG.warning("Failed to wait for animation end, but continue", exc_info=True) + @staticmethod async def evaluate( frame: Page | Frame,