From d19ff2bd6904272c680c9b0132f210ab770e5c54 Mon Sep 17 00:00:00 2001
From: Kerem Yilmaz <kerem@skyvern.com>
Date: Tue, 17 Sep 2024 18:59:40 -0700
Subject: [PATCH] Add complete action verification (#845)

---
 skyvern/exceptions.py                         |  6 ++
 skyvern/forge/agent.py                        | 81 ++++++++++++++++++-
 .../forge/prompts/skyvern/check-user-goal.j2  | 29 +++++++
 3 files changed, 115 insertions(+), 1 deletion(-)
 create mode 100644 skyvern/forge/prompts/skyvern/check-user-goal.j2

diff --git a/skyvern/exceptions.py b/skyvern/exceptions.py
index f6154602..8bf3a75d 100644
--- a/skyvern/exceptions.py
+++ b/skyvern/exceptions.py
@@ -502,3 +502,9 @@ class FailToFindAutocompleteOption(SkyvernException):
         super().__init__(
             f"Can't find a suitable auto completion for the current value, maybe retry with another reasonable value. current_value={current_value}"
         )
+
+
+class IllegitComplete(SkyvernException):
+    def __init__(self, data: dict | None = None) -> None:
+        data_str = f", data={data}" if data else ""
+        super().__init__(f"Illegit complete{data_str}")
diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py
index b41f7279..2f96c9b8 100644
--- a/skyvern/forge/agent.py
+++ b/skyvern/forge/agent.py
@@ -50,7 +50,7 @@ from skyvern.webeye.actions.actions import (
     WebAction,
     parse_actions,
 )
-from skyvern.webeye.actions.handler import ActionHandler, poll_verification_code
+from skyvern.webeye.actions.handler import ActionHandler, handle_complete_action, poll_verification_code
 from skyvern.webeye.actions.models import AgentStepOutput, DetailedAgentStepOutput
 from skyvern.webeye.actions.responses import ActionResult
 from skyvern.webeye.browser_factory import BrowserState
@@ -773,6 +773,36 @@ class ForgeAgent:
                 step_retry=step.retry_index,
                 action_results=action_results,
             )
+            if app.EXPERIMENTATION_PROVIDER.is_feature_enabled_cached(
+                "CHECK_USER_GOAL_SUCCESS_EVERY_STEP",
+                task.workflow_run_id or task.task_id,
+                properties={
+                    "organization_id": task.organization_id,
+                    "organization_created_at": str(organization.created_at) if organization else None,
+                },
+            ):
+                LOG.info("Checking if user goal is achieved after re-scraping the page")
+                # Check if navigation goal is achieved after re-scraping the page
+                new_scraped_page = await self._scrape_with_type(
+                    task=task,
+                    step=step,
+                    browser_state=browser_state,
+                    scrape_type=ScrapeType.NORMAL,
+                    organization=organization,
+                )
+                if new_scraped_page is None:
+                    LOG.warning("Failed to scrape the page before checking user goal success, skipping check...")
+                else:
+                    working_page = await browser_state.get_working_page()
+                    result_tuple = await self.check_user_goal_success(
+                        page=working_page,
+                        scraped_page=new_scraped_page,
+                        task=task,
+                        step=step,
+                    )
+                    if result_tuple is not None:
+                        complete_action, action_results = result_tuple
+                        detailed_agent_step_output.actions_and_results.append((complete_action, action_results))
             # If no action errors return the agent state and output
             completed_step = await self.update_step(
                 step=step,
@@ -811,6 +841,55 @@ class ForgeAgent:
             )
             return failed_step, detailed_agent_step_output.get_clean_detailed_output()
 
+    @staticmethod
+    async def check_user_goal_success(
+        page: Page, scraped_page: ScrapedPage, task: Task, step: Step
+    ) -> tuple[CompleteAction, list[ActionResult]] | None:
+        try:
+            # Check if Skyvern already returned a complete action, if so, don't run verification
+            if step.output and step.output.actions_and_results:
+                for action, results in step.output.actions_and_results:
+                    if isinstance(action, CompleteAction):
+                        return None
+
+            verification_prompt = prompt_engine.load_prompt(
+                "check-user-goal",
+                navigation_goal=task.navigation_goal,
+                navigation_payload=task.navigation_payload,
+                elements=scraped_page.build_element_tree(ElementTreeFormat.HTML),
+            )
+            screenshots = await SkyvernFrame.take_split_screenshots(page=page, url=page.url)
+
+            verification_llm_api_handler = app.SECONDARY_LLM_API_HANDLER
+
+            verification_response = await verification_llm_api_handler(
+                prompt=verification_prompt, step=step, screenshots=screenshots
+            )
+            if "user_goal_achieved" not in verification_response or "reasoning" not in verification_response:
+                LOG.error(
+                    "Invalid LLM response for user goal success verification, skipping verification",
+                    verification_response=verification_response,
+                )
+                return None
+
+            user_goal_achieved: bool = verification_response["user_goal_achieved"]
+            complete_action = CompleteAction(
+                reasoning=verification_response["reasoning"],
+                data_extraction_goal=task.data_extraction_goal,
+            )
+            # We don't want to return a complete action if the user goal is not achieved since we're checking at every step
+            if not user_goal_achieved:
+                return None
+
+            LOG.info("User goal achieved, executing complete action")
+            action_results = await handle_complete_action(complete_action, page, scraped_page, task, step)
+
+            return complete_action, action_results
+
+        except Exception:
+            LOG.error("LLM verification failed for complete action, skipping LLM verification", exc_info=True)
+            return None
+
     async def record_artifacts_after_action(self, task: Task, step: Step, browser_state: BrowserState) -> None:
         working_page = await browser_state.get_working_page()
         if not working_page:
diff --git a/skyvern/forge/prompts/skyvern/check-user-goal.j2 b/skyvern/forge/prompts/skyvern/check-user-goal.j2
new file mode 100644
index 00000000..6b2ddb96
--- /dev/null
+++ b/skyvern/forge/prompts/skyvern/check-user-goal.j2
@@ -0,0 +1,29 @@
+Based on the content of the screenshot and the elements on the page, determine whether the user goal has been successfully completed or not.
+
+The JSON object should be in this format:
+```json
+{
+  "reasoning": str, // Describe the state of the user goal and explain why it has been completed or not completed.
+  "user_goal_achieved": bool // True if the user goal has been completed, False otherwise.
+}
+
+Make sure to ONLY return the JSON object, with no additional text before or after it. Do not make any assumptions based on the screenshot, return a response solely based on what you observe in the screenshot and nothing else.
+
+Examples:
+{
+  "reasoning": "The screenshot shows a success message for a file upload field. Since the user's goal is to upload a file, it has been successfully completed.",
+  "user_goal_achieved": true
+}
+{
+  "reasoning": "The screenshot shows a job application form with fields. Since the user's goal is to submit a job application, it has not been successfully completed.",
+  "user_goal_achieved": false
+}
+
+Elements on the page:
+{{ elements }}
+
+User Goal:
+{{ navigation_goal }}
+
+User Details:
+{{ navigation_payload }}
\ No newline at end of file