better failure reason (#1066)

2024-10-28 09:42:55 +08:00
parent e42aabfc2e
commit d2b79ab5de
4 changed files with 104 additions and 11 deletions
--- a/skyvern/forge/agent.py
+++ b/skyvern/forge/agent.py
@@ -335,7 +335,7 @@ class ForgeAgent:
                    is_task_completed,
                    maybe_last_step,
                    maybe_next_step,
-                ) = await self.handle_completed_step(organization, task, step)
+                ) = await self.handle_completed_step(organization, task, step, await browser_state.get_working_page())
                if is_task_completed is not None and maybe_last_step:
                    last_step = maybe_last_step
                    await self.send_task_response(
@@ -1649,8 +1649,65 @@ class ForgeAgent:
            )
            return next_step
    async def summary_failure_reason_for_max_steps(
        self,
        organization: Organization,
        task: Task,
        step: Step,
        page: Page | None,
    ) -> str:
        try:
            steps = await app.DATABASE.get_task_steps(
                task_id=task.task_id, organization_id=organization.organization_id
            )
            steps_results = []
            for step_cnt, step in enumerate(steps):
                if step.output is None:
                    continue
                if len(step.output.errors) > 0:
                    return ";".join([repr(err) for err in step.output.errors])
                if step.output.actions_and_results is None:
                    continue
                action_result_summary: list[str] = []
                step_result: dict[str, Any] = {
                    "order": step_cnt,
                }
                for action, action_results in step.output.actions_and_results:
                    if len(action_results) == 0:
                        continue
                    action_result_summary.append(
                        f"{action.reasoning}(action_type={action.action_type}, result={'success' if action_results[-1].success else 'failed'})"
                    )
                step_result["actions_result"] = action_result_summary
                steps_results.append(step_result)
            screenshots: list[bytes] = []
            if page is not None:
                screenshots = await SkyvernFrame.take_split_screenshots(page=page, url=page.url)
            prompt = prompt_engine.load_prompt(
                "summarize-max-steps-reason",
                step_count=len(steps),
                navigation_goal=task.navigation_goal,
                navigation_payload=task.navigation_payload,
                steps=steps_results,
            )
            json_response = await app.LLM_API_HANDLER(prompt=prompt, screenshots=screenshots, step=step)
            return json_response.get("reasoning", "")
        except Exception:
            LOG.exception("Failed to summary the failure reason", task=task.task_id)
            return ""
    async def handle_completed_step(
-        self, organization: Organization, task: Task, step: Step
+        self,
        organization: Organization,
        task: Task,
        step: Step,
        page: Page | None,
    ) -> tuple[bool | None, Step | None, Step | None]:
        if step.is_goal_achieved():
            LOG.info(
@@ -1701,10 +1758,20 @@ class ForgeAgent:
                max_steps=max_steps_per_run,
            )
            last_step = await self.update_step(step, is_last=True)
            failure_reason = await self.summary_failure_reason_for_max_steps(
                organization=organization,
                task=task,
                step=step,
                page=page,
            )
            if not failure_reason:
                failure_reason = f"Max steps per task ({max_steps_per_run}) exceeded"
            await self.update_task(
                task,
                status=TaskStatus.failed,
-                failure_reason=f"Max steps per task ({max_steps_per_run}) exceeded",
+                failure_reason=failure_reason,
            )
            return False, last_step, None
        else:
--- a/skyvern/forge/prompts/skyvern/summarize-max-steps-reason.j2
+++ b/skyvern/forge/prompts/skyvern/summarize-max-steps-reason.j2
@@ -0,0 +1,18 @@
 User is doing the task step by step on a web page. You are here to help the user summarize the main reason why the user goal has not been achieved within the limit of {{ step_count }} steps. This summary should be based on the provided screenshot, navigation goals, user details, and the results of actions taken step by step.
 Make sure to ONLY return the JSON object in this format with no additional text before or after it:
 ```json
 {
  "page_info": str, // Think step by step. Describe all the useful information in the page related to the user goal.
  "reasoning": str, // Think step by step. Describe the reason you summarized based on 'page_info', screenshots, user goal, user detail and result of actions. Keep the reasoning short and to the point.
 }
 User Goal:
 {{ navigation_goal }}
 User Details:
 {{ navigation_payload }}
 Actions Taken In Each Step:
 {% for step in steps %}Step {{ step.order }} -- {{ step.actions_result }}
 {% endfor %}
--- a/skyvern/forge/sdk/routes/agent_protocol.py
+++ b/skyvern/forge/sdk/routes/agent_protocol.py
@@ -294,17 +294,22 @@ async def get_task(
            task_status=task_obj.status,
        )
-    failure_reason = None
+    failure_reason: str | None = None
    if task_obj.status == TaskStatus.failed and (latest_step.output or task_obj.failure_reason):
        failure_reason = ""
        if task_obj.failure_reason:
-            failure_reason += f"Reasoning: {task_obj.failure_reason or ''}"
+            failure_reason += task_obj.failure_reason or ""
-            failure_reason += "\n"
+        if latest_step.output is not None and latest_step.output.actions_and_results is not None:
-        if latest_step.output and latest_step.output.action_results:
+            action_results_string: list[str] = []
-            failure_reason += "Exceptions: "
+            for action, results in latest_step.output.actions_and_results:
-            failure_reason += str(
+                if len(results) == 0:
-                [f"[{ar.exception_type}]: {ar.exception_message}" for ar in latest_step.output.action_results]
+                    continue
-            )
+                if results[-1].success:
                    continue
                action_results_string.append(f"{action.action_type} action failed.")
            if len(action_results_string) > 0:
                failure_reason += "(Exceptions: " + str(action_results_string) + ")"
    return task_obj.to_task_response(
        action_screenshot_urls=latest_action_screenshot_urls,
--- a/skyvern/webeye/actions/actions.py
+++ b/skyvern/webeye/actions/actions.py
@@ -53,6 +53,9 @@ class UserDefinedError(BaseModel):
    reasoning: str
    confidence_float: float = Field(..., ge=0, le=1)
    def __repr__(self) -> str:
        return f"{self.reasoning}(error_code={self.error_code}, confidence_float={self.confidence_float})"
 class SelectOption(BaseModel):
    label: str | None = None