From 434bbff459f105aa26cc1cedad26c89edc59514c Mon Sep 17 00:00:00 2001
From: Shuchang Zheng <wintonzheng0325@gmail.com>
Date: Wed, 13 Aug 2025 19:22:50 -0700
Subject: [PATCH] add support_empty_page and wait_seconds to the scrape_website
 interface (#3181)

---
 skyvern/webeye/scraper/scraper.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/skyvern/webeye/scraper/scraper.py b/skyvern/webeye/scraper/scraper.py
index 1f10e8e9..08e509c6 100644
--- a/skyvern/webeye/scraper/scraper.py
+++ b/skyvern/webeye/scraper/scraper.py
@@ -405,6 +405,8 @@ async def scrape_website(
     draw_boxes: bool = True,
     max_screenshot_number: int = settings.MAX_NUM_SCREENSHOTS,
     scroll: bool = True,
+    support_empty_page: bool = False,
+    wait_seconds: float = 3,
 ) -> ScrapedPage:
     """
     ************************************************************************************************
@@ -439,6 +441,8 @@ async def scrape_website(
             draw_boxes=draw_boxes,
             max_screenshot_number=max_screenshot_number,
             scroll=scroll,
+            support_empty_page=support_empty_page,
+            wait_seconds=wait_seconds,
         )
     except ScrapingFailedBlankPage:
         raise
@@ -517,6 +521,8 @@ async def scrape_web_unsafe(
     draw_boxes: bool = True,
     max_screenshot_number: int = settings.MAX_NUM_SCREENSHOTS,
     scroll: bool = True,
+    support_empty_page: bool = False,
+    wait_seconds: float = 3,
 ) -> ScrapedPage:
     """
     Asynchronous function that performs web scraping without any built-in error handling. This function is intended
@@ -538,11 +544,11 @@ async def scrape_web_unsafe(
     # This also solves the issue where we can't scroll due to a popup.(e.g. geico first popup on the homepage after
     # clicking start my quote)
     url = page.url
-    if url == "about:blank":
+    if url == "about:blank" and not support_empty_page:
         raise ScrapingFailedBlankPage()
 
-    LOG.info("Waiting for 3 seconds before scraping the website.")
-    await asyncio.sleep(3)
+    LOG.info(f"Waiting for {wait_seconds} seconds before scraping the website.")
+    await asyncio.sleep(wait_seconds)
 
     elements, element_tree = await get_interactable_element_tree(page, scrape_exclude)
     element_tree = await cleanup_element_tree(page, url, copy.deepcopy(element_tree))
@@ -569,7 +575,7 @@ async def scrape_web_unsafe(
     )
 
     # if there are no elements, fail the scraping
-    if not elements:
+    if not elements and not support_empty_page:
         raise Exception("No elements found on the page")
 
     text_content = await get_frame_text(page.main_frame)