better failure reason (#1066)
This commit is contained in:
@@ -335,7 +335,7 @@ class ForgeAgent:
|
|||||||
is_task_completed,
|
is_task_completed,
|
||||||
maybe_last_step,
|
maybe_last_step,
|
||||||
maybe_next_step,
|
maybe_next_step,
|
||||||
) = await self.handle_completed_step(organization, task, step)
|
) = await self.handle_completed_step(organization, task, step, await browser_state.get_working_page())
|
||||||
if is_task_completed is not None and maybe_last_step:
|
if is_task_completed is not None and maybe_last_step:
|
||||||
last_step = maybe_last_step
|
last_step = maybe_last_step
|
||||||
await self.send_task_response(
|
await self.send_task_response(
|
||||||
@@ -1649,8 +1649,65 @@ class ForgeAgent:
|
|||||||
)
|
)
|
||||||
return next_step
|
return next_step
|
||||||
|
|
||||||
|
async def summary_failure_reason_for_max_steps(
|
||||||
|
self,
|
||||||
|
organization: Organization,
|
||||||
|
task: Task,
|
||||||
|
step: Step,
|
||||||
|
page: Page | None,
|
||||||
|
) -> str:
|
||||||
|
try:
|
||||||
|
steps = await app.DATABASE.get_task_steps(
|
||||||
|
task_id=task.task_id, organization_id=organization.organization_id
|
||||||
|
)
|
||||||
|
steps_results = []
|
||||||
|
for step_cnt, step in enumerate(steps):
|
||||||
|
if step.output is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if len(step.output.errors) > 0:
|
||||||
|
return ";".join([repr(err) for err in step.output.errors])
|
||||||
|
|
||||||
|
if step.output.actions_and_results is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
action_result_summary: list[str] = []
|
||||||
|
step_result: dict[str, Any] = {
|
||||||
|
"order": step_cnt,
|
||||||
|
}
|
||||||
|
for action, action_results in step.output.actions_and_results:
|
||||||
|
if len(action_results) == 0:
|
||||||
|
continue
|
||||||
|
action_result_summary.append(
|
||||||
|
f"{action.reasoning}(action_type={action.action_type}, result={'success' if action_results[-1].success else 'failed'})"
|
||||||
|
)
|
||||||
|
step_result["actions_result"] = action_result_summary
|
||||||
|
steps_results.append(step_result)
|
||||||
|
|
||||||
|
screenshots: list[bytes] = []
|
||||||
|
if page is not None:
|
||||||
|
screenshots = await SkyvernFrame.take_split_screenshots(page=page, url=page.url)
|
||||||
|
|
||||||
|
prompt = prompt_engine.load_prompt(
|
||||||
|
"summarize-max-steps-reason",
|
||||||
|
step_count=len(steps),
|
||||||
|
navigation_goal=task.navigation_goal,
|
||||||
|
navigation_payload=task.navigation_payload,
|
||||||
|
steps=steps_results,
|
||||||
|
)
|
||||||
|
json_response = await app.LLM_API_HANDLER(prompt=prompt, screenshots=screenshots, step=step)
|
||||||
|
return json_response.get("reasoning", "")
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
LOG.exception("Failed to summary the failure reason", task=task.task_id)
|
||||||
|
return ""
|
||||||
|
|
||||||
async def handle_completed_step(
|
async def handle_completed_step(
|
||||||
self, organization: Organization, task: Task, step: Step
|
self,
|
||||||
|
organization: Organization,
|
||||||
|
task: Task,
|
||||||
|
step: Step,
|
||||||
|
page: Page | None,
|
||||||
) -> tuple[bool | None, Step | None, Step | None]:
|
) -> tuple[bool | None, Step | None, Step | None]:
|
||||||
if step.is_goal_achieved():
|
if step.is_goal_achieved():
|
||||||
LOG.info(
|
LOG.info(
|
||||||
@@ -1701,10 +1758,20 @@ class ForgeAgent:
|
|||||||
max_steps=max_steps_per_run,
|
max_steps=max_steps_per_run,
|
||||||
)
|
)
|
||||||
last_step = await self.update_step(step, is_last=True)
|
last_step = await self.update_step(step, is_last=True)
|
||||||
|
|
||||||
|
failure_reason = await self.summary_failure_reason_for_max_steps(
|
||||||
|
organization=organization,
|
||||||
|
task=task,
|
||||||
|
step=step,
|
||||||
|
page=page,
|
||||||
|
)
|
||||||
|
if not failure_reason:
|
||||||
|
failure_reason = f"Max steps per task ({max_steps_per_run}) exceeded"
|
||||||
|
|
||||||
await self.update_task(
|
await self.update_task(
|
||||||
task,
|
task,
|
||||||
status=TaskStatus.failed,
|
status=TaskStatus.failed,
|
||||||
failure_reason=f"Max steps per task ({max_steps_per_run}) exceeded",
|
failure_reason=failure_reason,
|
||||||
)
|
)
|
||||||
return False, last_step, None
|
return False, last_step, None
|
||||||
else:
|
else:
|
||||||
|
|||||||
18
skyvern/forge/prompts/skyvern/summarize-max-steps-reason.j2
Normal file
18
skyvern/forge/prompts/skyvern/summarize-max-steps-reason.j2
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
User is doing the task step by step on a web page. You are here to help the user summarize the main reason why the user goal has not been achieved within the limit of {{ step_count }} steps. This summary should be based on the provided screenshot, navigation goals, user details, and the results of actions taken step by step.
|
||||||
|
|
||||||
|
Make sure to ONLY return the JSON object in this format with no additional text before or after it:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"page_info": str, // Think step by step. Describe all the useful information in the page related to the user goal.
|
||||||
|
"reasoning": str, // Think step by step. Describe the reason you summarized based on 'page_info', screenshots, user goal, user detail and result of actions. Keep the reasoning short and to the point.
|
||||||
|
}
|
||||||
|
|
||||||
|
User Goal:
|
||||||
|
{{ navigation_goal }}
|
||||||
|
|
||||||
|
User Details:
|
||||||
|
{{ navigation_payload }}
|
||||||
|
|
||||||
|
Actions Taken In Each Step:
|
||||||
|
{% for step in steps %}Step {{ step.order }} -- {{ step.actions_result }}
|
||||||
|
{% endfor %}
|
||||||
@@ -294,17 +294,22 @@ async def get_task(
|
|||||||
task_status=task_obj.status,
|
task_status=task_obj.status,
|
||||||
)
|
)
|
||||||
|
|
||||||
failure_reason = None
|
failure_reason: str | None = None
|
||||||
if task_obj.status == TaskStatus.failed and (latest_step.output or task_obj.failure_reason):
|
if task_obj.status == TaskStatus.failed and (latest_step.output or task_obj.failure_reason):
|
||||||
failure_reason = ""
|
failure_reason = ""
|
||||||
if task_obj.failure_reason:
|
if task_obj.failure_reason:
|
||||||
failure_reason += f"Reasoning: {task_obj.failure_reason or ''}"
|
failure_reason += task_obj.failure_reason or ""
|
||||||
failure_reason += "\n"
|
if latest_step.output is not None and latest_step.output.actions_and_results is not None:
|
||||||
if latest_step.output and latest_step.output.action_results:
|
action_results_string: list[str] = []
|
||||||
failure_reason += "Exceptions: "
|
for action, results in latest_step.output.actions_and_results:
|
||||||
failure_reason += str(
|
if len(results) == 0:
|
||||||
[f"[{ar.exception_type}]: {ar.exception_message}" for ar in latest_step.output.action_results]
|
continue
|
||||||
)
|
if results[-1].success:
|
||||||
|
continue
|
||||||
|
action_results_string.append(f"{action.action_type} action failed.")
|
||||||
|
|
||||||
|
if len(action_results_string) > 0:
|
||||||
|
failure_reason += "(Exceptions: " + str(action_results_string) + ")"
|
||||||
|
|
||||||
return task_obj.to_task_response(
|
return task_obj.to_task_response(
|
||||||
action_screenshot_urls=latest_action_screenshot_urls,
|
action_screenshot_urls=latest_action_screenshot_urls,
|
||||||
|
|||||||
@@ -53,6 +53,9 @@ class UserDefinedError(BaseModel):
|
|||||||
reasoning: str
|
reasoning: str
|
||||||
confidence_float: float = Field(..., ge=0, le=1)
|
confidence_float: float = Field(..., ge=0, le=1)
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return f"{self.reasoning}(error_code={self.error_code}, confidence_float={self.confidence_float})"
|
||||||
|
|
||||||
|
|
||||||
class SelectOption(BaseModel):
|
class SelectOption(BaseModel):
|
||||||
label: str | None = None
|
label: str | None = None
|
||||||
|
|||||||
Reference in New Issue
Block a user