better failure reason (#1066)

This commit is contained in:
LawyZheng
2024-10-28 09:42:55 +08:00
committed by GitHub
parent e42aabfc2e
commit d2b79ab5de
4 changed files with 104 additions and 11 deletions

View File

@@ -335,7 +335,7 @@ class ForgeAgent:
is_task_completed,
maybe_last_step,
maybe_next_step,
) = await self.handle_completed_step(organization, task, step)
) = await self.handle_completed_step(organization, task, step, await browser_state.get_working_page())
if is_task_completed is not None and maybe_last_step:
last_step = maybe_last_step
await self.send_task_response(
@@ -1649,8 +1649,65 @@ class ForgeAgent:
)
return next_step
async def summary_failure_reason_for_max_steps(
self,
organization: Organization,
task: Task,
step: Step,
page: Page | None,
) -> str:
try:
steps = await app.DATABASE.get_task_steps(
task_id=task.task_id, organization_id=organization.organization_id
)
steps_results = []
for step_cnt, step in enumerate(steps):
if step.output is None:
continue
if len(step.output.errors) > 0:
return ";".join([repr(err) for err in step.output.errors])
if step.output.actions_and_results is None:
continue
action_result_summary: list[str] = []
step_result: dict[str, Any] = {
"order": step_cnt,
}
for action, action_results in step.output.actions_and_results:
if len(action_results) == 0:
continue
action_result_summary.append(
f"{action.reasoning}(action_type={action.action_type}, result={'success' if action_results[-1].success else 'failed'})"
)
step_result["actions_result"] = action_result_summary
steps_results.append(step_result)
screenshots: list[bytes] = []
if page is not None:
screenshots = await SkyvernFrame.take_split_screenshots(page=page, url=page.url)
prompt = prompt_engine.load_prompt(
"summarize-max-steps-reason",
step_count=len(steps),
navigation_goal=task.navigation_goal,
navigation_payload=task.navigation_payload,
steps=steps_results,
)
json_response = await app.LLM_API_HANDLER(prompt=prompt, screenshots=screenshots, step=step)
return json_response.get("reasoning", "")
except Exception:
LOG.exception("Failed to summary the failure reason", task=task.task_id)
return ""
async def handle_completed_step(
self, organization: Organization, task: Task, step: Step
self,
organization: Organization,
task: Task,
step: Step,
page: Page | None,
) -> tuple[bool | None, Step | None, Step | None]:
if step.is_goal_achieved():
LOG.info(
@@ -1701,10 +1758,20 @@ class ForgeAgent:
max_steps=max_steps_per_run,
)
last_step = await self.update_step(step, is_last=True)
failure_reason = await self.summary_failure_reason_for_max_steps(
organization=organization,
task=task,
step=step,
page=page,
)
if not failure_reason:
failure_reason = f"Max steps per task ({max_steps_per_run}) exceeded"
await self.update_task(
task,
status=TaskStatus.failed,
failure_reason=f"Max steps per task ({max_steps_per_run}) exceeded",
failure_reason=failure_reason,
)
return False, last_step, None
else:

View File

@@ -0,0 +1,18 @@
User is doing the task step by step on a web page. You are here to help the user summarize the main reason why the user goal has not been achieved within the limit of {{ step_count }} steps. This summary should be based on the provided screenshot, navigation goals, user details, and the results of actions taken step by step.
Make sure to ONLY return the JSON object in this format with no additional text before or after it:
```json
{
"page_info": str, // Think step by step. Describe all the useful information in the page related to the user goal.
"reasoning": str, // Think step by step. Describe the reason you summarized based on 'page_info', screenshots, user goal, user detail and result of actions. Keep the reasoning short and to the point.
}
User Goal:
{{ navigation_goal }}
User Details:
{{ navigation_payload }}
Actions Taken In Each Step:
{% for step in steps %}Step {{ step.order }} -- {{ step.actions_result }}
{% endfor %}

View File

@@ -294,17 +294,22 @@ async def get_task(
task_status=task_obj.status,
)
failure_reason = None
failure_reason: str | None = None
if task_obj.status == TaskStatus.failed and (latest_step.output or task_obj.failure_reason):
failure_reason = ""
if task_obj.failure_reason:
failure_reason += f"Reasoning: {task_obj.failure_reason or ''}"
failure_reason += "\n"
if latest_step.output and latest_step.output.action_results:
failure_reason += "Exceptions: "
failure_reason += str(
[f"[{ar.exception_type}]: {ar.exception_message}" for ar in latest_step.output.action_results]
)
failure_reason += task_obj.failure_reason or ""
if latest_step.output is not None and latest_step.output.actions_and_results is not None:
action_results_string: list[str] = []
for action, results in latest_step.output.actions_and_results:
if len(results) == 0:
continue
if results[-1].success:
continue
action_results_string.append(f"{action.action_type} action failed.")
if len(action_results_string) > 0:
failure_reason += "(Exceptions: " + str(action_results_string) + ")"
return task_obj.to_task_response(
action_screenshot_urls=latest_action_screenshot_urls,

View File

@@ -53,6 +53,9 @@ class UserDefinedError(BaseModel):
reasoning: str
confidence_float: float = Field(..., ge=0, le=1)
def __repr__(self) -> str:
return f"{self.reasoning}(error_code={self.error_code}, confidence_float={self.confidence_float})"
class SelectOption(BaseModel):
label: str | None = None