generate error failure when summary failure reason (#3395)
This commit is contained in:
@@ -1,4 +1,13 @@
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class UserDefinedError(BaseModel):
|
||||
error_code: str
|
||||
reasoning: str
|
||||
confidence_float: float = Field(..., ge=0, le=1)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"{self.reasoning}(error_code={self.error_code}, confidence_float={self.confidence_float})"
|
||||
|
||||
|
||||
class SkyvernDefinedError(BaseModel):
|
||||
|
||||
@@ -26,7 +26,7 @@ from skyvern.constants import (
|
||||
SPECIAL_FIELD_VERIFICATION_CODE,
|
||||
ScrapeType,
|
||||
)
|
||||
from skyvern.errors.errors import ReachMaxRetriesError, ReachMaxStepsError
|
||||
from skyvern.errors.errors import ReachMaxRetriesError, ReachMaxStepsError, UserDefinedError
|
||||
from skyvern.exceptions import (
|
||||
BrowserSessionNotFound,
|
||||
BrowserStateMissingPage,
|
||||
@@ -82,7 +82,7 @@ from skyvern.schemas.steps import AgentStepOutput
|
||||
from skyvern.services import run_service
|
||||
from skyvern.services.task_v1_service import is_cua_task
|
||||
from skyvern.utils.image_resizer import Resolution
|
||||
from skyvern.utils.prompt_engine import load_prompt_with_elements
|
||||
from skyvern.utils.prompt_engine import MaxStepsReasonResponse, load_prompt_with_elements
|
||||
from skyvern.webeye.actions.action_types import ActionType
|
||||
from skyvern.webeye.actions.actions import (
|
||||
Action,
|
||||
@@ -93,7 +93,6 @@ from skyvern.webeye.actions.actions import (
|
||||
ExtractAction,
|
||||
ReloadPageAction,
|
||||
TerminateAction,
|
||||
UserDefinedError,
|
||||
WebAction,
|
||||
)
|
||||
from skyvern.webeye.actions.caching import retrieve_action_plan
|
||||
@@ -2706,7 +2705,7 @@ class ForgeAgent:
|
||||
task: Task,
|
||||
step: Step,
|
||||
page: Page | None,
|
||||
) -> str:
|
||||
) -> MaxStepsReasonResponse:
|
||||
steps_results = []
|
||||
try:
|
||||
steps = await app.DATABASE.get_task_steps(
|
||||
@@ -2717,7 +2716,12 @@ class ForgeAgent:
|
||||
continue
|
||||
|
||||
if len(step.output.errors) > 0:
|
||||
return ";".join([repr(err) for err in step.output.errors])
|
||||
failure_reason = ";".join([repr(err) for err in step.output.errors])
|
||||
return MaxStepsReasonResponse(
|
||||
page_info="",
|
||||
reasoning=failure_reason,
|
||||
errors=step.output.errors,
|
||||
)
|
||||
|
||||
if step.output.actions_and_results is None:
|
||||
continue
|
||||
@@ -2749,18 +2753,27 @@ class ForgeAgent:
|
||||
navigation_goal=task.navigation_goal,
|
||||
navigation_payload=task.navigation_payload,
|
||||
steps=steps_results,
|
||||
error_code_mapping_str=(json.dumps(task.error_code_mapping) if task.error_code_mapping else None),
|
||||
local_datetime=datetime.now(skyvern_context.ensure_context().tz_info).isoformat(),
|
||||
)
|
||||
json_response = await app.LLM_API_HANDLER(
|
||||
prompt=prompt, screenshots=screenshots, step=step, prompt_name="summarize-max-steps-reason"
|
||||
)
|
||||
return json_response.get("reasoning", "")
|
||||
return MaxStepsReasonResponse.model_validate(json_response)
|
||||
except Exception:
|
||||
LOG.warning("Failed to summary the failure reason", task_id=task.task_id, step_id=step.step_id)
|
||||
if steps_results:
|
||||
last_step_result = steps_results[-1]
|
||||
return f"Step {last_step_result['order']}: {last_step_result['actions_result']}"
|
||||
return ""
|
||||
return MaxStepsReasonResponse(
|
||||
page_info="",
|
||||
reasoning=f"Step {last_step_result['order']}: {last_step_result['actions_result']}",
|
||||
errors=[],
|
||||
)
|
||||
return MaxStepsReasonResponse(
|
||||
page_info="",
|
||||
reasoning="",
|
||||
errors=[],
|
||||
)
|
||||
|
||||
async def summary_failure_reason_for_max_retries(
|
||||
self,
|
||||
@@ -2904,21 +2917,22 @@ class ForgeAgent:
|
||||
)
|
||||
last_step = await self.update_step(step, is_last=True)
|
||||
|
||||
failure_reason = await self.summary_failure_reason_for_max_steps(
|
||||
generated_failure_reason = await self.summary_failure_reason_for_max_steps(
|
||||
organization=organization,
|
||||
task=task,
|
||||
step=step,
|
||||
page=page,
|
||||
)
|
||||
failure_reason = (
|
||||
f"Reached the maximum steps ({max_steps_per_run}). Possible failure reasons: {failure_reason}"
|
||||
)
|
||||
failure_reason = f"Reached the maximum steps ({max_steps_per_run}). Possible failure reasons: {generated_failure_reason.reasoning}"
|
||||
errors = [ReachMaxStepsError().model_dump()] + [
|
||||
error.model_dump() for error in generated_failure_reason.errors
|
||||
]
|
||||
|
||||
await self.update_task(
|
||||
task,
|
||||
status=TaskStatus.failed,
|
||||
failure_reason=failure_reason,
|
||||
errors=[ReachMaxStepsError().model_dump()],
|
||||
errors=errors,
|
||||
)
|
||||
return False, last_step, None
|
||||
else:
|
||||
|
||||
@@ -4,11 +4,21 @@ Make sure to ONLY return the JSON object in this format with no additional text
|
||||
```json
|
||||
{
|
||||
"page_info": str, // Think step by step. Describe all the useful information in the page related to the user goal.
|
||||
"reasoning": str, // Think step by step. Describe the reason you summarized based on 'page_info', screenshots, user goal, user detail and result of actions. Keep the reasoning short and to the point.
|
||||
"reasoning": str, // Think step by step. Describe the reason you summarized based on 'page_info', screenshots, user goal, user detail and result of actions. Keep the reasoning short and to the point.{% if error_code_mapping_str %}
|
||||
"errors": array // A list of errors. This is used to surface any errors that matches the current situation. If no error description suits the current situation on the screenshots or the action history, return an empty list. You are allowed to return multiple errors if there are multiple errors on the page.
|
||||
[{
|
||||
"error_code": str, // The error code from the user's error code list
|
||||
"reasoning": str, // The reasoning behind the error. Be specific, referencing any user information and their fields in your reasoning. Keep the reasoning short and to the point.
|
||||
"confidence_float": float // The confidence of the error. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
|
||||
}]{% endif %}
|
||||
}
|
||||
|
||||
User Goal:
|
||||
{{ navigation_goal }}
|
||||
{% if error_code_mapping_str %}
|
||||
Use the error codes and their descriptions to surface user-defined errors. Do not return any error that's not defined by the user. User defined errors:
|
||||
{{ error_code_mapping_str }}
|
||||
{% endif %}
|
||||
|
||||
User Details:
|
||||
{{ navigation_payload }}
|
||||
|
||||
@@ -2,7 +2,8 @@ from __future__ import annotations
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from skyvern.webeye.actions.actions import Action, UserDefinedError
|
||||
from skyvern.errors.errors import UserDefinedError
|
||||
from skyvern.webeye.actions.actions import Action
|
||||
from skyvern.webeye.actions.responses import ActionResult
|
||||
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ import structlog
|
||||
from pydantic import BaseModel
|
||||
|
||||
from skyvern.constants import DEFAULT_MAX_TOKENS
|
||||
from skyvern.errors.errors import UserDefinedError
|
||||
from skyvern.forge.sdk.prompting import PromptEngine
|
||||
from skyvern.utils.token_counter import count_tokens
|
||||
from skyvern.webeye.scraper.scraper import ElementTreeBuilder
|
||||
@@ -30,6 +31,12 @@ class CheckDateFormatResponse(BaseModel):
|
||||
HTMLTreeStr = str
|
||||
|
||||
|
||||
class MaxStepsReasonResponse(BaseModel):
|
||||
page_info: str
|
||||
reasoning: str
|
||||
errors: list[UserDefinedError]
|
||||
|
||||
|
||||
def load_prompt_with_elements(
|
||||
element_tree_builder: ElementTreeBuilder,
|
||||
prompt_engine: PromptEngine,
|
||||
|
||||
@@ -6,6 +6,7 @@ import structlog
|
||||
from litellm import ConfigDict
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from skyvern.errors.errors import UserDefinedError
|
||||
from skyvern.webeye.actions.action_types import ActionType
|
||||
|
||||
LOG = structlog.get_logger()
|
||||
@@ -19,15 +20,6 @@ class ActionStatus(StrEnum):
|
||||
completed = "completed"
|
||||
|
||||
|
||||
class UserDefinedError(BaseModel):
|
||||
error_code: str
|
||||
reasoning: str
|
||||
confidence_float: float = Field(..., ge=0, le=1)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"{self.reasoning}(error_code={self.error_code}, confidence_float={self.confidence_float})"
|
||||
|
||||
|
||||
class SelectOption(BaseModel):
|
||||
label: str | None = None
|
||||
value: str | None = None
|
||||
|
||||
@@ -6,8 +6,9 @@ from openai.types.responses.response import Response as OpenAIResponse
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
from skyvern.config import settings
|
||||
from skyvern.errors.errors import UserDefinedError
|
||||
from skyvern.schemas.steps import AgentStepOutput
|
||||
from skyvern.webeye.actions.actions import Action, DecisiveAction, UserDefinedError
|
||||
from skyvern.webeye.actions.actions import Action, DecisiveAction
|
||||
from skyvern.webeye.actions.responses import ActionResult
|
||||
from skyvern.webeye.scraper.scraper import ScrapedPage
|
||||
|
||||
|
||||
Reference in New Issue
Block a user