diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index b0675ab0..a864f308 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -335,7 +335,7 @@ class ForgeAgent: is_task_completed, maybe_last_step, maybe_next_step, - ) = await self.handle_completed_step(organization, task, step) + ) = await self.handle_completed_step(organization, task, step, await browser_state.get_working_page()) if is_task_completed is not None and maybe_last_step: last_step = maybe_last_step await self.send_task_response( @@ -1649,8 +1649,65 @@ class ForgeAgent: ) return next_step + async def summary_failure_reason_for_max_steps( + self, + organization: Organization, + task: Task, + step: Step, + page: Page | None, + ) -> str: + try: + steps = await app.DATABASE.get_task_steps( + task_id=task.task_id, organization_id=organization.organization_id + ) + steps_results = [] + for step_cnt, step in enumerate(steps): + if step.output is None: + continue + + if len(step.output.errors) > 0: + return ";".join([repr(err) for err in step.output.errors]) + + if step.output.actions_and_results is None: + continue + + action_result_summary: list[str] = [] + step_result: dict[str, Any] = { + "order": step_cnt, + } + for action, action_results in step.output.actions_and_results: + if len(action_results) == 0: + continue + action_result_summary.append( + f"{action.reasoning}(action_type={action.action_type}, result={'success' if action_results[-1].success else 'failed'})" + ) + step_result["actions_result"] = action_result_summary + steps_results.append(step_result) + + screenshots: list[bytes] = [] + if page is not None: + screenshots = await SkyvernFrame.take_split_screenshots(page=page, url=page.url) + + prompt = prompt_engine.load_prompt( + "summarize-max-steps-reason", + step_count=len(steps), + navigation_goal=task.navigation_goal, + navigation_payload=task.navigation_payload, + steps=steps_results, + ) + json_response = await app.LLM_API_HANDLER(prompt=prompt, screenshots=screenshots, step=step) + return json_response.get("reasoning", "") + + except Exception: + LOG.exception("Failed to summary the failure reason", task=task.task_id) + return "" + async def handle_completed_step( - self, organization: Organization, task: Task, step: Step + self, + organization: Organization, + task: Task, + step: Step, + page: Page | None, ) -> tuple[bool | None, Step | None, Step | None]: if step.is_goal_achieved(): LOG.info( @@ -1701,10 +1758,20 @@ class ForgeAgent: max_steps=max_steps_per_run, ) last_step = await self.update_step(step, is_last=True) + + failure_reason = await self.summary_failure_reason_for_max_steps( + organization=organization, + task=task, + step=step, + page=page, + ) + if not failure_reason: + failure_reason = f"Max steps per task ({max_steps_per_run}) exceeded" + await self.update_task( task, status=TaskStatus.failed, - failure_reason=f"Max steps per task ({max_steps_per_run}) exceeded", + failure_reason=failure_reason, ) return False, last_step, None else: diff --git a/skyvern/forge/prompts/skyvern/summarize-max-steps-reason.j2 b/skyvern/forge/prompts/skyvern/summarize-max-steps-reason.j2 new file mode 100644 index 00000000..c497f975 --- /dev/null +++ b/skyvern/forge/prompts/skyvern/summarize-max-steps-reason.j2 @@ -0,0 +1,18 @@ +User is doing the task step by step on a web page. You are here to help the user summarize the main reason why the user goal has not been achieved within the limit of {{ step_count }} steps. This summary should be based on the provided screenshot, navigation goals, user details, and the results of actions taken step by step. + +Make sure to ONLY return the JSON object in this format with no additional text before or after it: +```json +{ + "page_info": str, // Think step by step. Describe all the useful information in the page related to the user goal. + "reasoning": str, // Think step by step. Describe the reason you summarized based on 'page_info', screenshots, user goal, user detail and result of actions. Keep the reasoning short and to the point. +} + +User Goal: +{{ navigation_goal }} + +User Details: +{{ navigation_payload }} + +Actions Taken In Each Step: +{% for step in steps %}Step {{ step.order }} -- {{ step.actions_result }} +{% endfor %} \ No newline at end of file diff --git a/skyvern/forge/sdk/routes/agent_protocol.py b/skyvern/forge/sdk/routes/agent_protocol.py index 59358fbd..701c66cf 100644 --- a/skyvern/forge/sdk/routes/agent_protocol.py +++ b/skyvern/forge/sdk/routes/agent_protocol.py @@ -294,17 +294,22 @@ async def get_task( task_status=task_obj.status, ) - failure_reason = None + failure_reason: str | None = None if task_obj.status == TaskStatus.failed and (latest_step.output or task_obj.failure_reason): failure_reason = "" if task_obj.failure_reason: - failure_reason += f"Reasoning: {task_obj.failure_reason or ''}" - failure_reason += "\n" - if latest_step.output and latest_step.output.action_results: - failure_reason += "Exceptions: " - failure_reason += str( - [f"[{ar.exception_type}]: {ar.exception_message}" for ar in latest_step.output.action_results] - ) + failure_reason += task_obj.failure_reason or "" + if latest_step.output is not None and latest_step.output.actions_and_results is not None: + action_results_string: list[str] = [] + for action, results in latest_step.output.actions_and_results: + if len(results) == 0: + continue + if results[-1].success: + continue + action_results_string.append(f"{action.action_type} action failed.") + + if len(action_results_string) > 0: + failure_reason += "(Exceptions: " + str(action_results_string) + ")" return task_obj.to_task_response( action_screenshot_urls=latest_action_screenshot_urls, diff --git a/skyvern/webeye/actions/actions.py b/skyvern/webeye/actions/actions.py index 0b6b7a8a..82f3e39f 100644 --- a/skyvern/webeye/actions/actions.py +++ b/skyvern/webeye/actions/actions.py @@ -53,6 +53,9 @@ class UserDefinedError(BaseModel): reasoning: str confidence_float: float = Field(..., ge=0, le=1) + def __repr__(self) -> str: + return f"{self.reasoning}(error_code={self.error_code}, confidence_float={self.confidence_float})" + class SelectOption(BaseModel): label: str | None = None