generate error failure when summary failure reason (#3395)

This commit is contained in:
LawyZheng
2025-09-10 02:03:00 +08:00
committed by GitHub
parent 4411ef00fe
commit ceec64d201
7 changed files with 60 additions and 26 deletions

View File

@@ -1,4 +1,13 @@
from pydantic import BaseModel
from pydantic import BaseModel, Field
class UserDefinedError(BaseModel):
error_code: str
reasoning: str
confidence_float: float = Field(..., ge=0, le=1)
def __repr__(self) -> str:
return f"{self.reasoning}(error_code={self.error_code}, confidence_float={self.confidence_float})"
class SkyvernDefinedError(BaseModel):

View File

@@ -26,7 +26,7 @@ from skyvern.constants import (
SPECIAL_FIELD_VERIFICATION_CODE,
ScrapeType,
)
from skyvern.errors.errors import ReachMaxRetriesError, ReachMaxStepsError
from skyvern.errors.errors import ReachMaxRetriesError, ReachMaxStepsError, UserDefinedError
from skyvern.exceptions import (
BrowserSessionNotFound,
BrowserStateMissingPage,
@@ -82,7 +82,7 @@ from skyvern.schemas.steps import AgentStepOutput
from skyvern.services import run_service
from skyvern.services.task_v1_service import is_cua_task
from skyvern.utils.image_resizer import Resolution
from skyvern.utils.prompt_engine import load_prompt_with_elements
from skyvern.utils.prompt_engine import MaxStepsReasonResponse, load_prompt_with_elements
from skyvern.webeye.actions.action_types import ActionType
from skyvern.webeye.actions.actions import (
Action,
@@ -93,7 +93,6 @@ from skyvern.webeye.actions.actions import (
ExtractAction,
ReloadPageAction,
TerminateAction,
UserDefinedError,
WebAction,
)
from skyvern.webeye.actions.caching import retrieve_action_plan
@@ -2706,7 +2705,7 @@ class ForgeAgent:
task: Task,
step: Step,
page: Page | None,
) -> str:
) -> MaxStepsReasonResponse:
steps_results = []
try:
steps = await app.DATABASE.get_task_steps(
@@ -2717,7 +2716,12 @@ class ForgeAgent:
continue
if len(step.output.errors) > 0:
return ";".join([repr(err) for err in step.output.errors])
failure_reason = ";".join([repr(err) for err in step.output.errors])
return MaxStepsReasonResponse(
page_info="",
reasoning=failure_reason,
errors=step.output.errors,
)
if step.output.actions_and_results is None:
continue
@@ -2749,18 +2753,27 @@ class ForgeAgent:
navigation_goal=task.navigation_goal,
navigation_payload=task.navigation_payload,
steps=steps_results,
error_code_mapping_str=(json.dumps(task.error_code_mapping) if task.error_code_mapping else None),
local_datetime=datetime.now(skyvern_context.ensure_context().tz_info).isoformat(),
)
json_response = await app.LLM_API_HANDLER(
prompt=prompt, screenshots=screenshots, step=step, prompt_name="summarize-max-steps-reason"
)
return json_response.get("reasoning", "")
return MaxStepsReasonResponse.model_validate(json_response)
except Exception:
LOG.warning("Failed to summary the failure reason", task_id=task.task_id, step_id=step.step_id)
if steps_results:
last_step_result = steps_results[-1]
return f"Step {last_step_result['order']}: {last_step_result['actions_result']}"
return ""
return MaxStepsReasonResponse(
page_info="",
reasoning=f"Step {last_step_result['order']}: {last_step_result['actions_result']}",
errors=[],
)
return MaxStepsReasonResponse(
page_info="",
reasoning="",
errors=[],
)
async def summary_failure_reason_for_max_retries(
self,
@@ -2904,21 +2917,22 @@ class ForgeAgent:
)
last_step = await self.update_step(step, is_last=True)
failure_reason = await self.summary_failure_reason_for_max_steps(
generated_failure_reason = await self.summary_failure_reason_for_max_steps(
organization=organization,
task=task,
step=step,
page=page,
)
failure_reason = (
f"Reached the maximum steps ({max_steps_per_run}). Possible failure reasons: {failure_reason}"
)
failure_reason = f"Reached the maximum steps ({max_steps_per_run}). Possible failure reasons: {generated_failure_reason.reasoning}"
errors = [ReachMaxStepsError().model_dump()] + [
error.model_dump() for error in generated_failure_reason.errors
]
await self.update_task(
task,
status=TaskStatus.failed,
failure_reason=failure_reason,
errors=[ReachMaxStepsError().model_dump()],
errors=errors,
)
return False, last_step, None
else:

View File

@@ -4,11 +4,21 @@ Make sure to ONLY return the JSON object in this format with no additional text
```json
{
"page_info": str, // Think step by step. Describe all the useful information in the page related to the user goal.
"reasoning": str, // Think step by step. Describe the reason you summarized based on 'page_info', screenshots, user goal, user detail and result of actions. Keep the reasoning short and to the point.
"reasoning": str, // Think step by step. Describe the reason you summarized based on 'page_info', screenshots, user goal, user detail and result of actions. Keep the reasoning short and to the point.{% if error_code_mapping_str %}
"errors": array // A list of errors. This is used to surface any errors that matches the current situation. If no error description suits the current situation on the screenshots or the action history, return an empty list. You are allowed to return multiple errors if there are multiple errors on the page.
[{
"error_code": str, // The error code from the user's error code list
"reasoning": str, // The reasoning behind the error. Be specific, referencing any user information and their fields in your reasoning. Keep the reasoning short and to the point.
"confidence_float": float // The confidence of the error. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
}]{% endif %}
}
User Goal:
{{ navigation_goal }}
{% if error_code_mapping_str %}
Use the error codes and their descriptions to surface user-defined errors. Do not return any error that's not defined by the user. User defined errors:
{{ error_code_mapping_str }}
{% endif %}
User Details:
{{ navigation_payload }}

View File

@@ -2,7 +2,8 @@ from __future__ import annotations
from pydantic import BaseModel
from skyvern.webeye.actions.actions import Action, UserDefinedError
from skyvern.errors.errors import UserDefinedError
from skyvern.webeye.actions.actions import Action
from skyvern.webeye.actions.responses import ActionResult

View File

@@ -4,6 +4,7 @@ import structlog
from pydantic import BaseModel
from skyvern.constants import DEFAULT_MAX_TOKENS
from skyvern.errors.errors import UserDefinedError
from skyvern.forge.sdk.prompting import PromptEngine
from skyvern.utils.token_counter import count_tokens
from skyvern.webeye.scraper.scraper import ElementTreeBuilder
@@ -30,6 +31,12 @@ class CheckDateFormatResponse(BaseModel):
HTMLTreeStr = str
class MaxStepsReasonResponse(BaseModel):
page_info: str
reasoning: str
errors: list[UserDefinedError]
def load_prompt_with_elements(
element_tree_builder: ElementTreeBuilder,
prompt_engine: PromptEngine,

View File

@@ -6,6 +6,7 @@ import structlog
from litellm import ConfigDict
from pydantic import BaseModel, Field
from skyvern.errors.errors import UserDefinedError
from skyvern.webeye.actions.action_types import ActionType
LOG = structlog.get_logger()
@@ -19,15 +20,6 @@ class ActionStatus(StrEnum):
completed = "completed"
class UserDefinedError(BaseModel):
error_code: str
reasoning: str
confidence_float: float = Field(..., ge=0, le=1)
def __repr__(self) -> str:
return f"{self.reasoning}(error_code={self.error_code}, confidence_float={self.confidence_float})"
class SelectOption(BaseModel):
label: str | None = None
value: str | None = None

View File

@@ -6,8 +6,9 @@ from openai.types.responses.response import Response as OpenAIResponse
from pydantic import BaseModel, ConfigDict
from skyvern.config import settings
from skyvern.errors.errors import UserDefinedError
from skyvern.schemas.steps import AgentStepOutput
from skyvern.webeye.actions.actions import Action, DecisiveAction, UserDefinedError
from skyvern.webeye.actions.actions import Action, DecisiveAction
from skyvern.webeye.actions.responses import ActionResult
from skyvern.webeye.scraper.scraper import ScrapedPage