use single prompt to generate error code (#3396)

This commit is contained in:
LawyZheng
2025-09-10 02:45:07 +08:00
committed by GitHub
parent ceec64d201
commit 3b8789d045
5 changed files with 68 additions and 27 deletions

View File

@@ -2062,7 +2062,6 @@ class ForgeAgent:
current_url=current_url,
data_extraction_goal=task.data_extraction_goal,
action_history=actions_and_results_str,
error_code_mapping_str=(json.dumps(task.error_code_mapping) if task.error_code_mapping else None),
local_datetime=datetime.now(context.tz_info).isoformat(),
verification_code_check=verification_code_check,
complete_criterion=task.complete_criterion.strip() if task.complete_criterion else None,

View File

@@ -10,14 +10,6 @@ Reply in JSON format with the following keys:
"reasoning": str, // The reasoning behind the action. This reasoning must be user information agnostic. Mention why you chose the action type, and why you chose the element id. Keep the reasoning short and to the point.
"confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
"action_type": str, // It's a string enum: "COMPLETE", "TERMINATE". "COMPLETE" is used when the current page info has met the complete criterion. If there is no complete criterion, use "COMPLETE" as long as the page info hasn't met the terminate criterion. "TERMINATE" is used to terminate with a failure when the current page info has met the terminate criterion. It there is no terminate criterion, use "TERMINATE" as long as the page info hasn't met the complete criterion.
{% if error_code_mapping_str %}
"errors": array // A list of errors. This is used to surface any errors that matches the current situation for COMPLETE and TERMINATE actions. For other actions or if no error description suits the current situation on the screenshots, return an empty list. You are allowed to return multiple errors if there are multiple errors on the page.
[{
"error_code": str, // The error code from the user's error code list
"reasoning": str, // The reasoning behind the error. Be specific, referencing any user information and their fields in your reasoning. Keep the reasoning short and to the point.
"confidence_float": float // The confidence of the error. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
}]
{% endif %}
}]
}
@@ -38,11 +30,6 @@ Terminate Criterion:
```
{{ terminate_criterion }}
```{% endif %}
{% if error_code_mapping_str %}
Use the error codes and their descriptions to surface user-defined errors. Do not return any error that's not defined by the user. User defined errors:
```
{{ error_code_mapping_str }}
```{% endif %}
User details:
```

View File

@@ -27,15 +27,7 @@ Reply in JSON format with the following keys:
"label": str, // the label of the option if any. MAKE SURE YOU USE THIS LABEL TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION LABEL HERE
"index": int, // the index corresponding to the option index under the select element.
"value": str // the value of the option. MAKE SURE YOU USE THIS VALUE TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION VALUE HERE
},
{% if error_code_mapping_str %}
"errors": array // A list of errors. This is used to surface any errors that matches the current situation for COMPLETE and TERMINATE actions. For other actions or if no error description suits the current situation on the screenshots, return an empty list. You are allowed to return multiple errors if there are multiple errors on the page.
[{
"error_code": str, // The error code from the user's error code list
"reasoning": str, // The reasoning behind the error. Be specific, referencing any user information and their fields in your reasoning. Keep the reasoning short and to the point.
"confidence_float": float // The confidence of the error. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
}]
{% endif %}
}
}],{% if verification_code_check %}
"verification_code_reasoning": str, // Let's think step by step. Describe what you see and think if there is somewhere on the current page where you must enter the verification code now for login or any verification step. Explain why you believe a verification code needs to be entered somewhere or not. Do not imagine any place to enter the code if the code has not been sent yet.
"place_to_enter_verification_code": bool, // Whether there is a place on the current page to enter the verification code now.
@@ -57,10 +49,6 @@ User goal:
```
{{ navigation_goal }}
```
{% if error_code_mapping_str %}
Use the error codes and their descriptions to surface user-defined errors. Do not return any error that's not defined by the user. User defined errors:
{{ error_code_mapping_str }}
{% endif %}
{% if data_extraction_goal %}
User Data Extraction Goal:
```

View File

@@ -0,0 +1,39 @@
You are here to help the user use the error codes and their descriptions to surface user-defined errors based on the screenshots, user goal, user details and the HTML elements.
Do not return any error that's not defined by the user.
Reply in JSON format with the following keys:
{
"errors": array // A list of errors. If no error description suits the current situation on the screenshots, return an empty list. You are allowed to return multiple errors if there are multiple errors on the page.
[{
"error_code": str, // The error code from the user's error code list
"reasoning": str, // The reasoning behind the error. Be specific, referencing any user information and their fields in your reasoning. Keep the reasoning short and to the point.
"confidence_float": float // The confidence of the error. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
}]
}
User defined errors:
```
{{ error_code_mapping_str }}
```
User Goal:
```
{{ navigation_goal }}
```
User details:
```
{{ navigation_payload_str }}
```
Clickable elements from `{{ current_url }}`:
```
{{ elements }}
```
The URL of the page you're on right now is `{{ current_url }}`.
Current datetime, ISO format:
```
{{ local_datetime }}
```

View File

@@ -92,12 +92,14 @@ from skyvern.webeye.actions.actions import (
SelectOption,
SelectOptionAction,
UploadFileAction,
UserDefinedError,
WebAction,
)
from skyvern.webeye.actions.responses import ActionAbort, ActionFailure, ActionResult, ActionSuccess
from skyvern.webeye.scraper.scraper import (
CleanupElementTreeFunc,
ElementTreeBuilder,
ElementTreeFormat,
IncrementalScrapePage,
ScrapedPage,
hash_element,
@@ -1795,6 +1797,8 @@ async def handle_terminate_action(
task: Task,
step: Step,
) -> list[ActionResult]:
if task.error_code_mapping:
action.errors = await extract_user_defined_errors(task=task, step=step, scraped_page=scraped_page)
return [ActionSuccess()]
@@ -1834,6 +1838,10 @@ async def handle_complete_action(
workflow_run_id=task.workflow_run_id,
)
action.verified = True
if task.error_code_mapping:
action.errors = await extract_user_defined_errors(task=task, step=step, scraped_page=scraped_page)
if not task.data_extraction_goal and verification_result.thoughts:
await app.DATABASE.update_task(
task.task_id,
@@ -3816,3 +3824,23 @@ async def _get_input_or_select_context(
context=input_or_select_context,
)
return input_or_select_context
async def extract_user_defined_errors(task: Task, step: Step, scraped_page: ScrapedPage) -> list[UserDefinedError]:
scraped_page_refreshed = await scraped_page.refresh(draw_boxes=False)
prompt = prompt_engine.load_prompt(
"surface-user-defined-errors",
navigation_goal=task.navigation_goal,
navigation_payload_str=json.dumps(task.navigation_payload),
elements=scraped_page_refreshed.build_element_tree(fmt=ElementTreeFormat.HTML),
current_url=task.url,
error_code_mapping_str=json.dumps(task.error_code_mapping) if task.error_code_mapping else {},
local_datetime=datetime.now(skyvern_context.ensure_context().tz_info).isoformat(),
)
json_response = await app.EXTRACTION_LLM_API_HANDLER(
prompt=prompt,
screenshots=scraped_page_refreshed.screenshots,
step=step,
prompt_name="surface-user-defined-errors",
)
return [UserDefinedError.model_validate(error) for error in json_response.get("errors", [])]