better failure reason (#1066)

2024-10-28 09:42:55 +08:00
parent e42aabfc2e
commit d2b79ab5de
4 changed files with 104 additions and 11 deletions
--- a/skyvern/forge/agent.py
+++ b/skyvern/forge/agent.py
@@ -335,7 +335,7 @@ class ForgeAgent:
                    is_task_completed,
                    maybe_last_step,
                    maybe_next_step,
-                ) = await self.handle_completed_step(organization, task, step)
+                ) = await self.handle_completed_step(organization, task, step, await browser_state.get_working_page())
                if is_task_completed is not None and maybe_last_step:
                    last_step = maybe_last_step
                    await self.send_task_response(
@@ -1649,8 +1649,65 @@ class ForgeAgent:
            )
            return next_step

+    async def summary_failure_reason_for_max_steps(
+        self,
+        organization: Organization,
+        task: Task,
+        step: Step,
+        page: Page | None,
+    ) -> str:
+        try:
+            steps = await app.DATABASE.get_task_steps(
+                task_id=task.task_id, organization_id=organization.organization_id
+            )
+            steps_results = []
+            for step_cnt, step in enumerate(steps):
+                if step.output is None:
+                    continue
+
+                if len(step.output.errors) > 0:
+                    return ";".join([repr(err) for err in step.output.errors])
+
+                if step.output.actions_and_results is None:
+                    continue
+
+                action_result_summary: list[str] = []
+                step_result: dict[str, Any] = {
+                    "order": step_cnt,
+                }
+                for action, action_results in step.output.actions_and_results:
+                    if len(action_results) == 0:
+                        continue
+                    action_result_summary.append(
+                        f"{action.reasoning}(action_type={action.action_type}, result={'success' if action_results[-1].success else 'failed'})"
+                    )
+                step_result["actions_result"] = action_result_summary
+                steps_results.append(step_result)
+
+            screenshots: list[bytes] = []
+            if page is not None:
+                screenshots = await SkyvernFrame.take_split_screenshots(page=page, url=page.url)
+
+            prompt = prompt_engine.load_prompt(
+                "summarize-max-steps-reason",
+                step_count=len(steps),
+                navigation_goal=task.navigation_goal,
+                navigation_payload=task.navigation_payload,
+                steps=steps_results,
+            )
+            json_response = await app.LLM_API_HANDLER(prompt=prompt, screenshots=screenshots, step=step)
+            return json_response.get("reasoning", "")
+
+        except Exception:
+            LOG.exception("Failed to summary the failure reason", task=task.task_id)
+            return ""
+
    async def handle_completed_step(
-        self, organization: Organization, task: Task, step: Step
+        self,
+        organization: Organization,
+        task: Task,
+        step: Step,
+        page: Page | None,
    ) -> tuple[bool | None, Step | None, Step | None]:
        if step.is_goal_achieved():
            LOG.info(
@@ -1701,10 +1758,20 @@ class ForgeAgent:
                max_steps=max_steps_per_run,
            )
            last_step = await self.update_step(step, is_last=True)
+
+            failure_reason = await self.summary_failure_reason_for_max_steps(
+                organization=organization,
+                task=task,
+                step=step,
+                page=page,
+            )
+            if not failure_reason:
+                failure_reason = f"Max steps per task ({max_steps_per_run}) exceeded"
+
            await self.update_task(
                task,
                status=TaskStatus.failed,
-                failure_reason=f"Max steps per task ({max_steps_per_run}) exceeded",
+                failure_reason=failure_reason,
            )
            return False, last_step, None
        else:
--- a/skyvern/forge/prompts/skyvern/summarize-max-steps-reason.j2
+++ b/skyvern/forge/prompts/skyvern/summarize-max-steps-reason.j2
@@ -0,0 +1,18 @@
+User is doing the task step by step on a web page. You are here to help the user summarize the main reason why the user goal has not been achieved within the limit of {{ step_count }} steps. This summary should be based on the provided screenshot, navigation goals, user details, and the results of actions taken step by step.
+
+Make sure to ONLY return the JSON object in this format with no additional text before or after it:
+```json
+{
+  "page_info": str, // Think step by step. Describe all the useful information in the page related to the user goal.
+  "reasoning": str, // Think step by step. Describe the reason you summarized based on 'page_info', screenshots, user goal, user detail and result of actions. Keep the reasoning short and to the point.
+}
+
+User Goal:
+{{ navigation_goal }}
+
+User Details:
+{{ navigation_payload }}
+
+Actions Taken In Each Step:
+{% for step in steps %}Step {{ step.order }} -- {{ step.actions_result }}
+{% endfor %}
--- a/skyvern/forge/sdk/routes/agent_protocol.py
+++ b/skyvern/forge/sdk/routes/agent_protocol.py
@@ -294,17 +294,22 @@ async def get_task(
            task_status=task_obj.status,
        )

-    failure_reason = None
+    failure_reason: str | None = None
    if task_obj.status == TaskStatus.failed and (latest_step.output or task_obj.failure_reason):
        failure_reason = ""
        if task_obj.failure_reason:
-            failure_reason += f"Reasoning: {task_obj.failure_reason or ''}"
-            failure_reason += "\n"
-        if latest_step.output and latest_step.output.action_results:
-            failure_reason += "Exceptions: "
-            failure_reason += str(
-                [f"[{ar.exception_type}]: {ar.exception_message}" for ar in latest_step.output.action_results]
-            )
+            failure_reason += task_obj.failure_reason or ""
+        if latest_step.output is not None and latest_step.output.actions_and_results is not None:
+            action_results_string: list[str] = []
+            for action, results in latest_step.output.actions_and_results:
+                if len(results) == 0:
+                    continue
+                if results[-1].success:
+                    continue
+                action_results_string.append(f"{action.action_type} action failed.")
+
+            if len(action_results_string) > 0:
+                failure_reason += "(Exceptions: " + str(action_results_string) + ")"

    return task_obj.to_task_response(
        action_screenshot_urls=latest_action_screenshot_urls,
--- a/skyvern/webeye/actions/actions.py
+++ b/skyvern/webeye/actions/actions.py
@@ -53,6 +53,9 @@ class UserDefinedError(BaseModel):
    reasoning: str
    confidence_float: float = Field(..., ge=0, le=1)

+    def __repr__(self) -> str:
+        return f"{self.reasoning}(error_code={self.error_code}, confidence_float={self.confidence_float})"
+

 class SelectOption(BaseModel):
    label: str | None = None