make sure we do a web scrape before data extraction at the end of task to ensure the refreshness of the scraped data (#1152)

2024-11-06 22:55:21 -08:00
parent f7c9dc2279
commit c80597e7a5
4 changed files with 12 additions and 17 deletions
--- a/skyvern/forge/agent.py
+++ b/skyvern/forge/agent.py
@@ -907,17 +907,19 @@ class ForgeAgent:
                step_id=step.step_id,
                workflow_run_id=task.workflow_run_id,
            )
-            scraped_page_without_screenshots = await scraped_page.refresh(with_screenshot=False)
+            scraped_page_refreshed = await scraped_page.refresh()

            verification_prompt = prompt_engine.load_prompt(
                "check-user-goal",
                navigation_goal=task.navigation_goal,
                navigation_payload=task.navigation_payload,
-                elements=scraped_page_without_screenshots.build_element_tree(ElementTreeFormat.HTML),
+                elements=scraped_page_refreshed.build_element_tree(ElementTreeFormat.HTML),
            )

            # this prompt is critical to our agent so let's use the primary LLM API handler
-            verification_response = await app.LLM_API_HANDLER(prompt=verification_prompt, step=step, screenshots=None)
+            verification_response = await app.LLM_API_HANDLER(
+                prompt=verification_prompt, step=step, screenshots=scraped_page_refreshed.screenshots
+            )
            if "user_goal_achieved" not in verification_response or "thoughts" not in verification_response:
                LOG.error(
                    "Invalid LLM response for user goal success verification, skipping verification",
--- a/skyvern/forge/prompts/skyvern/check-user-goal.j2
+++ b/skyvern/forge/prompts/skyvern/check-user-goal.j2
@@ -1,4 +1,4 @@
-Your are here to help the user determine if the user has completed their goal on the web. Use the content of the elements parsed from the page, the user goal and user details to determine whether the user goal has been completed or not.
+Your are here to help the user determine if the user has completed their goal on the web. Use the content of the elements parsed from the page, the screenshots of the page, the user goal and user details to determine whether the user goal has been completed or not.

 Make sure to ONLY return the JSON object in this format with no additional text before or after it:
 ```json