feat: summarize failure on max retries (#2641)

Co-authored-by: lawyzheng <lawyzheng1106@gmail.com>
This commit is contained in:
Shuchang Zheng
2025-06-09 00:29:14 -07:00
committed by GitHub
parent c531395c39
commit fdf61aa2f9
2 changed files with 111 additions and 1 deletions

View File

@@ -2445,10 +2445,25 @@ class ForgeAgent:
step_retry=step.retry_index,
max_retries=settings.MAX_RETRIES_PER_STEP,
)
browser_state = app.BROWSER_MANAGER.get_for_task(task_id=task.task_id, workflow_run_id=task.workflow_run_id)
page = None
if browser_state is not None:
page = await browser_state.get_working_page()
failure_reason = await self.summary_failure_reason_for_max_retries(
organization=organization,
task=task,
step=step,
page=page,
max_retries=max_retries_per_step,
)
await self.update_task(
task,
TaskStatus.failed,
failure_reason=f"Max retries per step ({max_retries_per_step}) exceeded",
failure_reason=(
f"Max retries per step ({max_retries_per_step}) exceeded. Possible failure reasons: {failure_reason}"
),
)
return None
else:
@@ -2530,6 +2545,72 @@ class ForgeAgent:
return f"Step {last_step_result['order']}: {last_step_result['actions_result']}"
return ""
async def summary_failure_reason_for_max_retries(
self,
organization: Organization,
task: Task,
step: Step,
page: Page | None,
max_retries: int,
) -> str:
html = ""
screenshots: list[bytes] = []
steps_results = []
try:
steps = await app.DATABASE.get_task_steps(
task_id=task.task_id, organization_id=organization.organization_id
)
for step_cnt, cur_step in enumerate(steps[-max_retries:]):
if cur_step.output and cur_step.output.actions_and_results:
action_result_summary: list[str] = []
step_result: dict[str, Any] = {
"order": step_cnt,
}
for action, action_results in cur_step.output.actions_and_results:
if len(action_results) == 0:
continue
last_result = action_results[-1]
if last_result.success:
continue
reason = last_result.exception_message or ""
action_result_summary.append(
f"{action.reasoning}(action_type={action.action_type}, result=failed, reason={reason})"
)
step_result["actions_result"] = action_result_summary
steps_results.append(step_result)
if page is not None:
skyvern_frame = await SkyvernFrame.create_instance(frame=page)
html = await skyvern_frame.get_content()
screenshots = await SkyvernFrame.take_split_screenshots(page=page, url=page.url)
prompt = prompt_engine.load_prompt(
"summarize-max-retries-reason",
navigation_goal=task.navigation_goal,
navigation_payload=task.navigation_payload,
steps=steps_results,
page_html=html,
max_retries=max_retries,
local_datetime=datetime.now(skyvern_context.ensure_context().tz_info).isoformat(),
)
json_response = await app.LLM_API_HANDLER(
prompt=prompt,
screenshots=screenshots,
step=step,
prompt_name="summarize-max-retries-reason",
)
return json_response.get("reasoning", "")
except Exception:
LOG.warning(
"Failed to summarize the failure reason for max retries",
task_id=task.task_id,
step_id=step.step_id,
)
if steps_results:
last_step_result = steps_results[-1]
return f"Retry Step {last_step_result['order']}: {last_step_result['actions_result']}"
return ""
async def handle_completed_step(
self,
organization: Organization,

View File

@@ -0,0 +1,29 @@
User attempted the step multiple times but all {{ max_retries }} retries failed. Summarize the main reason why the actions failed based on the provided screenshot, page HTML, user goal and details.
Make sure to ONLY return the JSON object in this format with no additional text before or after it:
```json
{
"page_info": str, // Think step by step. Describe useful information from the page HTML related to the user goal.
"reasoning": str, // Think step by step. Summarize why the actions failed based on 'page_info', screenshots, user goal and the failed actions. Keep it short and to the point.
}
```
User Goal:
{{ navigation_goal }}
User Details:
{{ navigation_payload }}
Failed Actions In Each Retry Step:
{% for step in steps %}Retry Step {{ step.order }} -- {{ step.actions_result }}
{% endfor %}
Page HTML:
```html
{{ page_html }}
```
Current datetime, ISO format:
```
{{ local_datetime }}
```