diff --git a/skyvern/errors/errors.py b/skyvern/errors/errors.py index 7a995235..e1ac174c 100644 --- a/skyvern/errors/errors.py +++ b/skyvern/errors/errors.py @@ -1,4 +1,13 @@ -from pydantic import BaseModel +from pydantic import BaseModel, Field + + +class UserDefinedError(BaseModel): + error_code: str + reasoning: str + confidence_float: float = Field(..., ge=0, le=1) + + def __repr__(self) -> str: + return f"{self.reasoning}(error_code={self.error_code}, confidence_float={self.confidence_float})" class SkyvernDefinedError(BaseModel): diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index 861d82d8..9e7732c8 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -26,7 +26,7 @@ from skyvern.constants import ( SPECIAL_FIELD_VERIFICATION_CODE, ScrapeType, ) -from skyvern.errors.errors import ReachMaxRetriesError, ReachMaxStepsError +from skyvern.errors.errors import ReachMaxRetriesError, ReachMaxStepsError, UserDefinedError from skyvern.exceptions import ( BrowserSessionNotFound, BrowserStateMissingPage, @@ -82,7 +82,7 @@ from skyvern.schemas.steps import AgentStepOutput from skyvern.services import run_service from skyvern.services.task_v1_service import is_cua_task from skyvern.utils.image_resizer import Resolution -from skyvern.utils.prompt_engine import load_prompt_with_elements +from skyvern.utils.prompt_engine import MaxStepsReasonResponse, load_prompt_with_elements from skyvern.webeye.actions.action_types import ActionType from skyvern.webeye.actions.actions import ( Action, @@ -93,7 +93,6 @@ from skyvern.webeye.actions.actions import ( ExtractAction, ReloadPageAction, TerminateAction, - UserDefinedError, WebAction, ) from skyvern.webeye.actions.caching import retrieve_action_plan @@ -2706,7 +2705,7 @@ class ForgeAgent: task: Task, step: Step, page: Page | None, - ) -> str: + ) -> MaxStepsReasonResponse: steps_results = [] try: steps = await app.DATABASE.get_task_steps( @@ -2717,7 +2716,12 @@ class ForgeAgent: continue if len(step.output.errors) > 0: - return ";".join([repr(err) for err in step.output.errors]) + failure_reason = ";".join([repr(err) for err in step.output.errors]) + return MaxStepsReasonResponse( + page_info="", + reasoning=failure_reason, + errors=step.output.errors, + ) if step.output.actions_and_results is None: continue @@ -2749,18 +2753,27 @@ class ForgeAgent: navigation_goal=task.navigation_goal, navigation_payload=task.navigation_payload, steps=steps_results, + error_code_mapping_str=(json.dumps(task.error_code_mapping) if task.error_code_mapping else None), local_datetime=datetime.now(skyvern_context.ensure_context().tz_info).isoformat(), ) json_response = await app.LLM_API_HANDLER( prompt=prompt, screenshots=screenshots, step=step, prompt_name="summarize-max-steps-reason" ) - return json_response.get("reasoning", "") + return MaxStepsReasonResponse.model_validate(json_response) except Exception: LOG.warning("Failed to summary the failure reason", task_id=task.task_id, step_id=step.step_id) if steps_results: last_step_result = steps_results[-1] - return f"Step {last_step_result['order']}: {last_step_result['actions_result']}" - return "" + return MaxStepsReasonResponse( + page_info="", + reasoning=f"Step {last_step_result['order']}: {last_step_result['actions_result']}", + errors=[], + ) + return MaxStepsReasonResponse( + page_info="", + reasoning="", + errors=[], + ) async def summary_failure_reason_for_max_retries( self, @@ -2904,21 +2917,22 @@ class ForgeAgent: ) last_step = await self.update_step(step, is_last=True) - failure_reason = await self.summary_failure_reason_for_max_steps( + generated_failure_reason = await self.summary_failure_reason_for_max_steps( organization=organization, task=task, step=step, page=page, ) - failure_reason = ( - f"Reached the maximum steps ({max_steps_per_run}). Possible failure reasons: {failure_reason}" - ) + failure_reason = f"Reached the maximum steps ({max_steps_per_run}). Possible failure reasons: {generated_failure_reason.reasoning}" + errors = [ReachMaxStepsError().model_dump()] + [ + error.model_dump() for error in generated_failure_reason.errors + ] await self.update_task( task, status=TaskStatus.failed, failure_reason=failure_reason, - errors=[ReachMaxStepsError().model_dump()], + errors=errors, ) return False, last_step, None else: diff --git a/skyvern/forge/prompts/skyvern/summarize-max-steps-reason.j2 b/skyvern/forge/prompts/skyvern/summarize-max-steps-reason.j2 index fda693d5..b4ac7e7c 100644 --- a/skyvern/forge/prompts/skyvern/summarize-max-steps-reason.j2 +++ b/skyvern/forge/prompts/skyvern/summarize-max-steps-reason.j2 @@ -4,11 +4,21 @@ Make sure to ONLY return the JSON object in this format with no additional text ```json { "page_info": str, // Think step by step. Describe all the useful information in the page related to the user goal. - "reasoning": str, // Think step by step. Describe the reason you summarized based on 'page_info', screenshots, user goal, user detail and result of actions. Keep the reasoning short and to the point. + "reasoning": str, // Think step by step. Describe the reason you summarized based on 'page_info', screenshots, user goal, user detail and result of actions. Keep the reasoning short and to the point.{% if error_code_mapping_str %} + "errors": array // A list of errors. This is used to surface any errors that matches the current situation. If no error description suits the current situation on the screenshots or the action history, return an empty list. You are allowed to return multiple errors if there are multiple errors on the page. + [{ + "error_code": str, // The error code from the user's error code list + "reasoning": str, // The reasoning behind the error. Be specific, referencing any user information and their fields in your reasoning. Keep the reasoning short and to the point. + "confidence_float": float // The confidence of the error. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence + }]{% endif %} } User Goal: {{ navigation_goal }} +{% if error_code_mapping_str %} +Use the error codes and their descriptions to surface user-defined errors. Do not return any error that's not defined by the user. User defined errors: +{{ error_code_mapping_str }} +{% endif %} User Details: {{ navigation_payload }} diff --git a/skyvern/schemas/steps.py b/skyvern/schemas/steps.py index 3cf49850..6b8a72b9 100644 --- a/skyvern/schemas/steps.py +++ b/skyvern/schemas/steps.py @@ -2,7 +2,8 @@ from __future__ import annotations from pydantic import BaseModel -from skyvern.webeye.actions.actions import Action, UserDefinedError +from skyvern.errors.errors import UserDefinedError +from skyvern.webeye.actions.actions import Action from skyvern.webeye.actions.responses import ActionResult diff --git a/skyvern/utils/prompt_engine.py b/skyvern/utils/prompt_engine.py index 1af0e1f5..46538d1f 100644 --- a/skyvern/utils/prompt_engine.py +++ b/skyvern/utils/prompt_engine.py @@ -4,6 +4,7 @@ import structlog from pydantic import BaseModel from skyvern.constants import DEFAULT_MAX_TOKENS +from skyvern.errors.errors import UserDefinedError from skyvern.forge.sdk.prompting import PromptEngine from skyvern.utils.token_counter import count_tokens from skyvern.webeye.scraper.scraper import ElementTreeBuilder @@ -30,6 +31,12 @@ class CheckDateFormatResponse(BaseModel): HTMLTreeStr = str +class MaxStepsReasonResponse(BaseModel): + page_info: str + reasoning: str + errors: list[UserDefinedError] + + def load_prompt_with_elements( element_tree_builder: ElementTreeBuilder, prompt_engine: PromptEngine, diff --git a/skyvern/webeye/actions/actions.py b/skyvern/webeye/actions/actions.py index 3aaa16ff..d51a2b1b 100644 --- a/skyvern/webeye/actions/actions.py +++ b/skyvern/webeye/actions/actions.py @@ -6,6 +6,7 @@ import structlog from litellm import ConfigDict from pydantic import BaseModel, Field +from skyvern.errors.errors import UserDefinedError from skyvern.webeye.actions.action_types import ActionType LOG = structlog.get_logger() @@ -19,15 +20,6 @@ class ActionStatus(StrEnum): completed = "completed" -class UserDefinedError(BaseModel): - error_code: str - reasoning: str - confidence_float: float = Field(..., ge=0, le=1) - - def __repr__(self) -> str: - return f"{self.reasoning}(error_code={self.error_code}, confidence_float={self.confidence_float})" - - class SelectOption(BaseModel): label: str | None = None value: str | None = None diff --git a/skyvern/webeye/actions/models.py b/skyvern/webeye/actions/models.py index 65f810b2..d9c9aa0c 100644 --- a/skyvern/webeye/actions/models.py +++ b/skyvern/webeye/actions/models.py @@ -6,8 +6,9 @@ from openai.types.responses.response import Response as OpenAIResponse from pydantic import BaseModel, ConfigDict from skyvern.config import settings +from skyvern.errors.errors import UserDefinedError from skyvern.schemas.steps import AgentStepOutput -from skyvern.webeye.actions.actions import Action, DecisiveAction, UserDefinedError +from skyvern.webeye.actions.actions import Action, DecisiveAction from skyvern.webeye.actions.responses import ActionResult from skyvern.webeye.scraper.scraper import ScrapedPage