generate error failure when summary failure reason (#3395)

2025-09-10 02:03:00 +08:00
parent 4411ef00fe
commit ceec64d201
7 changed files with 60 additions and 26 deletions
--- a/skyvern/errors/errors.py
+++ b/skyvern/errors/errors.py
@@ -1,4 +1,13 @@
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
+
+
+class UserDefinedError(BaseModel):
+    error_code: str
+    reasoning: str
+    confidence_float: float = Field(..., ge=0, le=1)
+
+    def __repr__(self) -> str:
+        return f"{self.reasoning}(error_code={self.error_code}, confidence_float={self.confidence_float})"


 class SkyvernDefinedError(BaseModel):
--- a/skyvern/forge/agent.py
+++ b/skyvern/forge/agent.py
@@ -26,7 +26,7 @@ from skyvern.constants import (
    SPECIAL_FIELD_VERIFICATION_CODE,
    ScrapeType,
 )
-from skyvern.errors.errors import ReachMaxRetriesError, ReachMaxStepsError
+from skyvern.errors.errors import ReachMaxRetriesError, ReachMaxStepsError, UserDefinedError
 from skyvern.exceptions import (
    BrowserSessionNotFound,
    BrowserStateMissingPage,
@@ -82,7 +82,7 @@ from skyvern.schemas.steps import AgentStepOutput
 from skyvern.services import run_service
 from skyvern.services.task_v1_service import is_cua_task
 from skyvern.utils.image_resizer import Resolution
-from skyvern.utils.prompt_engine import load_prompt_with_elements
+from skyvern.utils.prompt_engine import MaxStepsReasonResponse, load_prompt_with_elements
 from skyvern.webeye.actions.action_types import ActionType
 from skyvern.webeye.actions.actions import (
    Action,
@@ -93,7 +93,6 @@ from skyvern.webeye.actions.actions import (
    ExtractAction,
    ReloadPageAction,
    TerminateAction,
-    UserDefinedError,
    WebAction,
 )
 from skyvern.webeye.actions.caching import retrieve_action_plan
@@ -2706,7 +2705,7 @@ class ForgeAgent:
        task: Task,
        step: Step,
        page: Page | None,
-    ) -> str:
+    ) -> MaxStepsReasonResponse:
        steps_results = []
        try:
            steps = await app.DATABASE.get_task_steps(
@@ -2717,7 +2716,12 @@ class ForgeAgent:
                    continue

                if len(step.output.errors) > 0:
-                    return ";".join([repr(err) for err in step.output.errors])
+                    failure_reason = ";".join([repr(err) for err in step.output.errors])
+                    return MaxStepsReasonResponse(
+                        page_info="",
+                        reasoning=failure_reason,
+                        errors=step.output.errors,
+                    )

                if step.output.actions_and_results is None:
                    continue
@@ -2749,18 +2753,27 @@ class ForgeAgent:
                navigation_goal=task.navigation_goal,
                navigation_payload=task.navigation_payload,
                steps=steps_results,
+                error_code_mapping_str=(json.dumps(task.error_code_mapping) if task.error_code_mapping else None),
                local_datetime=datetime.now(skyvern_context.ensure_context().tz_info).isoformat(),
            )
            json_response = await app.LLM_API_HANDLER(
                prompt=prompt, screenshots=screenshots, step=step, prompt_name="summarize-max-steps-reason"
            )
-            return json_response.get("reasoning", "")
+            return MaxStepsReasonResponse.model_validate(json_response)
        except Exception:
            LOG.warning("Failed to summary the failure reason", task_id=task.task_id, step_id=step.step_id)
            if steps_results:
                last_step_result = steps_results[-1]
-                return f"Step {last_step_result['order']}: {last_step_result['actions_result']}"
-            return ""
+                return MaxStepsReasonResponse(
+                    page_info="",
+                    reasoning=f"Step {last_step_result['order']}: {last_step_result['actions_result']}",
+                    errors=[],
+                )
+            return MaxStepsReasonResponse(
+                page_info="",
+                reasoning="",
+                errors=[],
+            )

    async def summary_failure_reason_for_max_retries(
        self,
@@ -2904,21 +2917,22 @@ class ForgeAgent:
            )
            last_step = await self.update_step(step, is_last=True)

-            failure_reason = await self.summary_failure_reason_for_max_steps(
+            generated_failure_reason = await self.summary_failure_reason_for_max_steps(
                organization=organization,
                task=task,
                step=step,
                page=page,
            )
-            failure_reason = (
-                f"Reached the maximum steps ({max_steps_per_run}). Possible failure reasons: {failure_reason}"
-            )
+            failure_reason = f"Reached the maximum steps ({max_steps_per_run}). Possible failure reasons: {generated_failure_reason.reasoning}"
+            errors = [ReachMaxStepsError().model_dump()] + [
+                error.model_dump() for error in generated_failure_reason.errors
+            ]

            await self.update_task(
                task,
                status=TaskStatus.failed,
                failure_reason=failure_reason,
-                errors=[ReachMaxStepsError().model_dump()],
+                errors=errors,
            )
            return False, last_step, None
        else:
--- a/skyvern/forge/prompts/skyvern/summarize-max-steps-reason.j2
+++ b/skyvern/forge/prompts/skyvern/summarize-max-steps-reason.j2
@@ -4,11 +4,21 @@ Make sure to ONLY return the JSON object in this format with no additional text
 ```json
 {
  "page_info": str, // Think step by step. Describe all the useful information in the page related to the user goal.
-  "reasoning": str, // Think step by step. Describe the reason you summarized based on 'page_info', screenshots, user goal, user detail and result of actions. Keep the reasoning short and to the point.
+  "reasoning": str, // Think step by step. Describe the reason you summarized based on 'page_info', screenshots, user goal, user detail and result of actions. Keep the reasoning short and to the point.{% if error_code_mapping_str %}
+  "errors": array // A list of errors. This is used to surface any errors that matches the current situation. If no error description suits the current situation on the screenshots or the action history, return an empty list. You are allowed to return multiple errors if there are multiple errors on the page.
+    [{
+        "error_code": str, // The error code from the user's error code list
+        "reasoning": str, // The reasoning behind the error. Be specific, referencing any user information and their fields in your reasoning. Keep the reasoning short and to the point.
+        "confidence_float": float // The confidence of the error. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
+    }]{% endif %}
 }

 User Goal:
 {{ navigation_goal }}
+{% if error_code_mapping_str %}
+Use the error codes and their descriptions to surface user-defined errors. Do not return any error that's not defined by the user. User defined errors:
+{{ error_code_mapping_str }}
+{% endif %}

 User Details:
 {{ navigation_payload }}
--- a/skyvern/schemas/steps.py
+++ b/skyvern/schemas/steps.py
@@ -2,7 +2,8 @@ from __future__ import annotations

 from pydantic import BaseModel

-from skyvern.webeye.actions.actions import Action, UserDefinedError
+from skyvern.errors.errors import UserDefinedError
+from skyvern.webeye.actions.actions import Action
 from skyvern.webeye.actions.responses import ActionResult


--- a/skyvern/utils/prompt_engine.py
+++ b/skyvern/utils/prompt_engine.py
@@ -4,6 +4,7 @@ import structlog
 from pydantic import BaseModel

 from skyvern.constants import DEFAULT_MAX_TOKENS
+from skyvern.errors.errors import UserDefinedError
 from skyvern.forge.sdk.prompting import PromptEngine
 from skyvern.utils.token_counter import count_tokens
 from skyvern.webeye.scraper.scraper import ElementTreeBuilder
@@ -30,6 +31,12 @@ class CheckDateFormatResponse(BaseModel):
 HTMLTreeStr = str


+class MaxStepsReasonResponse(BaseModel):
+    page_info: str
+    reasoning: str
+    errors: list[UserDefinedError]
+
+
 def load_prompt_with_elements(
    element_tree_builder: ElementTreeBuilder,
    prompt_engine: PromptEngine,
--- a/skyvern/webeye/actions/actions.py
+++ b/skyvern/webeye/actions/actions.py
@@ -6,6 +6,7 @@ import structlog
 from litellm import ConfigDict
 from pydantic import BaseModel, Field

+from skyvern.errors.errors import UserDefinedError
 from skyvern.webeye.actions.action_types import ActionType

 LOG = structlog.get_logger()
@@ -19,15 +20,6 @@ class ActionStatus(StrEnum):
    completed = "completed"


-class UserDefinedError(BaseModel):
-    error_code: str
-    reasoning: str
-    confidence_float: float = Field(..., ge=0, le=1)
-
-    def __repr__(self) -> str:
-        return f"{self.reasoning}(error_code={self.error_code}, confidence_float={self.confidence_float})"
-
-
 class SelectOption(BaseModel):
    label: str | None = None
    value: str | None = None
--- a/skyvern/webeye/actions/models.py
+++ b/skyvern/webeye/actions/models.py
@@ -6,8 +6,9 @@ from openai.types.responses.response import Response as OpenAIResponse
 from pydantic import BaseModel, ConfigDict

 from skyvern.config import settings
+from skyvern.errors.errors import UserDefinedError
 from skyvern.schemas.steps import AgentStepOutput
-from skyvern.webeye.actions.actions import Action, DecisiveAction, UserDefinedError
+from skyvern.webeye.actions.actions import Action, DecisiveAction
 from skyvern.webeye.actions.responses import ActionResult
 from skyvern.webeye.scraper.scraper import ScrapedPage