make sure we do a web scrape before data extraction at the end of task to ensure the refreshness of the scraped data (#1152)

2024-11-06 22:55:21 -08:00
parent f7c9dc2279
commit c80597e7a5
4 changed files with 12 additions and 17 deletions
--- a/skyvern/forge/agent.py
+++ b/skyvern/forge/agent.py
@@ -907,17 +907,19 @@ class ForgeAgent:
                step_id=step.step_id,
                workflow_run_id=task.workflow_run_id,
            )
-            scraped_page_without_screenshots = await scraped_page.refresh(with_screenshot=False)
+            scraped_page_refreshed = await scraped_page.refresh()

            verification_prompt = prompt_engine.load_prompt(
                "check-user-goal",
                navigation_goal=task.navigation_goal,
                navigation_payload=task.navigation_payload,
-                elements=scraped_page_without_screenshots.build_element_tree(ElementTreeFormat.HTML),
+                elements=scraped_page_refreshed.build_element_tree(ElementTreeFormat.HTML),
            )

            # this prompt is critical to our agent so let's use the primary LLM API handler
-            verification_response = await app.LLM_API_HANDLER(prompt=verification_prompt, step=step, screenshots=None)
+            verification_response = await app.LLM_API_HANDLER(
+                prompt=verification_prompt, step=step, screenshots=scraped_page_refreshed.screenshots
+            )
            if "user_goal_achieved" not in verification_response or "thoughts" not in verification_response:
                LOG.error(
                    "Invalid LLM response for user goal success verification, skipping verification",
--- a/skyvern/forge/prompts/skyvern/check-user-goal.j2
+++ b/skyvern/forge/prompts/skyvern/check-user-goal.j2
@@ -1,4 +1,4 @@
-Your are here to help the user determine if the user has completed their goal on the web. Use the content of the elements parsed from the page, the user goal and user details to determine whether the user goal has been completed or not.
+Your are here to help the user determine if the user has completed their goal on the web. Use the content of the elements parsed from the page, the screenshots of the page, the user goal and user details to determine whether the user goal has been completed or not.

 Make sure to ONLY return the JSON object in this format with no additional text before or after it:
 ```json
--- a/skyvern/webeye/actions/handler.py
+++ b/skyvern/webeye/actions/handler.py
@@ -2237,6 +2237,7 @@ async def extract_information_for_navigation_goal(
    element_tree_format = ElementTreeFormat.HTML
    element_tree_in_prompt: str = scraped_page.build_element_tree(element_tree_format)

+    scraped_page_refreshed = await scraped_page.refresh()
    extract_information_prompt = prompt_engine.load_prompt(
        prompt_template,
        navigation_goal=task.navigation_goal,
@@ -2244,8 +2245,8 @@ async def extract_information_for_navigation_goal(
        elements=element_tree_in_prompt,
        data_extraction_goal=task.data_extraction_goal,
        extracted_information_schema=task.extracted_information_schema,
-        current_url=scraped_page.url,
-        extracted_text=scraped_page.extracted_text,
+        current_url=scraped_page_refreshed.url,
+        extracted_text=scraped_page_refreshed.extracted_text,
        error_code_mapping_str=(json.dumps(task.error_code_mapping) if task.error_code_mapping else None),
        utc_datetime=datetime.utcnow().strftime("%Y-%m-%d %H:%M"),
    )
--- a/skyvern/webeye/scraper/scraper.py
+++ b/skyvern/webeye/scraper/scraper.py
@@ -241,13 +241,12 @@ class ScrapedPage(BaseModel):

        raise UnknownElementTreeFormat(fmt=fmt)

-    async def refresh(self, with_screenshot: bool = True) -> Self:
+    async def refresh(self) -> Self:
        refreshed_page = await scrape_website(
            browser_state=self._browser_state,
            url=self.url,
            cleanup_element_tree=self._clean_up_func,
            scrape_exclude=self._scrape_exclude,
-            with_screenshot=with_screenshot,
        )
        self.elements = refreshed_page.elements
        self.id_to_css_dict = refreshed_page.id_to_css_dict
@@ -260,6 +259,7 @@ class ScrapedPage(BaseModel):
        self.screenshots = refreshed_page.screenshots or self.screenshots
        self.html = refreshed_page.html
        self.extracted_text = refreshed_page.extracted_text
+        self.url = refreshed_page.url
        return self


@@ -269,7 +269,6 @@ async def scrape_website(
    cleanup_element_tree: CleanupElementTreeFunc,
    num_retry: int = 0,
    scrape_exclude: ScrapeExcludeFunc | None = None,
-    with_screenshot: bool = True,
 ) -> ScrapedPage:
    """
    ************************************************************************************************
@@ -299,7 +298,6 @@ async def scrape_website(
            url=url,
            cleanup_element_tree=cleanup_element_tree,
            scrape_exclude=scrape_exclude,
-            with_screenshot=with_screenshot,
        )
    except Exception as e:
        # NOTE: MAX_SCRAPING_RETRIES is set to 0 in both staging and production
@@ -321,7 +319,6 @@ async def scrape_website(
            cleanup_element_tree,
            num_retry=num_retry,
            scrape_exclude=scrape_exclude,
-            with_screenshot=with_screenshot,
        )


@@ -369,7 +366,6 @@ async def scrape_web_unsafe(
    url: str,
    cleanup_element_tree: CleanupElementTreeFunc,
    scrape_exclude: ScrapeExcludeFunc | None = None,
-    with_screenshot: bool = True,
 ) -> ScrapedPage:
    """
    Asynchronous function that performs web scraping without any built-in error handling. This function is intended
@@ -394,11 +390,7 @@ async def scrape_web_unsafe(
    LOG.info("Waiting for 5 seconds before scraping the website.")
    await asyncio.sleep(5)

-    screenshots: list[bytes] = []
-
-    # TODO: do we need to scroll to the button when we scrape without screenshots?
-    if with_screenshot:
-        screenshots = await SkyvernFrame.take_split_screenshots(page=page, url=url, draw_boxes=True)
+    screenshots = await SkyvernFrame.take_split_screenshots(page=page, url=url, draw_boxes=True)

    elements, element_tree = await get_interactable_element_tree(page, scrape_exclude)
    element_tree = await cleanup_element_tree(page, url, copy.deepcopy(element_tree))