From 031d9083a61d5c29a86edb62deadb165b6edd041 Mon Sep 17 00:00:00 2001
From: LawyZheng <lawyzheng1106@gmail.com>
Date: Sat, 13 Sep 2025 16:13:35 +0800
Subject: [PATCH] add history to surface errorcode prompt (#3428)

---
 skyvern/forge/agent.py                        | 43 +--------------
 .../skyvern/surface-user-defined-errors.j2    |  8 ++-
 skyvern/services/action_service.py            | 54 +++++++++++++++++++
 skyvern/webeye/actions/handler.py             |  3 ++
 4 files changed, 66 insertions(+), 42 deletions(-)
 create mode 100644 skyvern/services/action_service.py

diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py
index ef3980a5..c78351a8 100644
--- a/skyvern/forge/agent.py
+++ b/skyvern/forge/agent.py
@@ -81,6 +81,7 @@ from skyvern.forge.sdk.workflow.models.workflow import Workflow, WorkflowRun, Wo
 from skyvern.schemas.runs import CUA_ENGINES, RunEngine
 from skyvern.schemas.steps import AgentStepOutput
 from skyvern.services import run_service
+from skyvern.services.action_service import get_action_history
 from skyvern.services.task_v1_service import is_cua_task
 from skyvern.utils.image_resizer import Resolution
 from skyvern.utils.prompt_engine import MaxStepsReasonResponse, load_prompt_with_elements
@@ -2100,47 +2101,7 @@ class ForgeAgent:
         return final_navigation_payload
 
     async def _get_action_results(self, task: Task, current_step: Step | None = None) -> str:
-        """
-        Get the action results from the last app.SETTINGS.PROMPT_ACTION_HISTORY_WINDOW steps.
-        If current_step is provided, the current executing step will be included in the action history.
-        Default is excluding the current executing step from the action history.
-        """
-
-        # Get action results from the last app.SETTINGS.PROMPT_ACTION_HISTORY_WINDOW steps
-        steps = await app.DATABASE.get_task_steps(task_id=task.task_id, organization_id=task.organization_id)
-        # the last step is always the newly created one and it should be excluded from the history window
-        window_steps = steps[-1 - settings.PROMPT_ACTION_HISTORY_WINDOW : -1]
-        if current_step:
-            window_steps.append(current_step)
-
-        actions_and_results: list[tuple[Action, list[ActionResult]]] = []
-        for window_step in window_steps:
-            if window_step.output and window_step.output.actions_and_results:
-                actions_and_results.extend(window_step.output.actions_and_results)
-
-        # exclude successful action from history
-        action_history = [
-            {
-                "action": action.model_dump(
-                    exclude_none=True,
-                    include={"action_type", "element_id", "status", "reasoning", "option", "download"},
-                ),
-                "results": [
-                    result.model_dump(
-                        exclude_none=True,
-                        include={
-                            "success",
-                            "exception_type",
-                            "exception_message",
-                        },
-                    )
-                    for result in results
-                ],
-            }
-            for action, results in actions_and_results
-            if len(results) > 0
-        ]
-        return json.dumps(action_history)
+        return json.dumps(await get_action_history(task=task, current_step=current_step))
 
     async def get_extracted_information_for_task(self, task: Task) -> dict[str, Any] | list | str | None:
         """
diff --git a/skyvern/forge/prompts/skyvern/surface-user-defined-errors.j2 b/skyvern/forge/prompts/skyvern/surface-user-defined-errors.j2
index 1bf8a8eb..a2740052 100644
--- a/skyvern/forge/prompts/skyvern/surface-user-defined-errors.j2
+++ b/skyvern/forge/prompts/skyvern/surface-user-defined-errors.j2
@@ -1,4 +1,4 @@
-You are here to help the user use the error codes and their descriptions to surface user-defined errors based on the screenshots, user goal, user details and the HTML elements.
+You are here to help the user use the error codes and their descriptions to surface user-defined errors based on the screenshots, user goal, user details, action history and the HTML elements.
 Do not return any error that's not defined by the user. 
 
 Reply in JSON format with the following keys:
@@ -27,6 +27,12 @@ User details:
 {{ navigation_payload_str }}
 ```
 
+Consider the action history and the screenshot together.
+Action history from previous steps: (note: even if the action history suggests goal is achieved, check the screenshot and the DOM elements to make sure the goal is achieved)
+```
+{{action_history}}
+```
+
 Clickable elements from `{{ current_url }}`:
 ```
 {{ elements }}
diff --git a/skyvern/services/action_service.py b/skyvern/services/action_service.py
new file mode 100644
index 00000000..6b6c73eb
--- /dev/null
+++ b/skyvern/services/action_service.py
@@ -0,0 +1,54 @@
+from typing import Any
+
+from skyvern.config import settings
+from skyvern.forge import app
+from skyvern.forge.sdk.models import Step
+from skyvern.forge.sdk.schemas.tasks import Task
+from skyvern.webeye.actions.actions import Action
+from skyvern.webeye.actions.responses import ActionResult
+
+
+async def get_action_history(
+    task: Task, current_step: Step | None = None, history_window: int = settings.PROMPT_ACTION_HISTORY_WINDOW
+) -> list[dict[str, Any]]:
+    """
+    Get the action results from the last history_window steps.
+    If current_step is provided, the current executing step will be included in the action history.
+    Default is excluding the current executing step from the action history.
+    """
+
+    # Get action results from the last history_window steps
+    steps = await app.DATABASE.get_task_steps(task_id=task.task_id, organization_id=task.organization_id)
+    # the last step is always the newly created one and it should be excluded from the history window
+    window_steps = steps[-1 - history_window : -1]
+    if current_step:
+        window_steps.append(current_step)
+
+    actions_and_results: list[tuple[Action, list[ActionResult]]] = []
+    for window_step in window_steps:
+        if window_step.output and window_step.output.actions_and_results:
+            actions_and_results.extend(window_step.output.actions_and_results)
+
+    # exclude successful action from history
+    action_history = [
+        {
+            "action": action.model_dump(
+                exclude_none=True,
+                include={"action_type", "element_id", "status", "reasoning", "option", "download"},
+            ),
+            "results": [
+                result.model_dump(
+                    exclude_none=True,
+                    include={
+                        "success",
+                        "exception_type",
+                        "exception_message",
+                    },
+                )
+                for result in results
+            ],
+        }
+        for action, results in actions_and_results
+        if len(results) > 0
+    ]
+    return action_history
diff --git a/skyvern/webeye/actions/handler.py b/skyvern/webeye/actions/handler.py
index 3dc131d4..4cdea6fc 100644
--- a/skyvern/webeye/actions/handler.py
+++ b/skyvern/webeye/actions/handler.py
@@ -69,6 +69,7 @@ from skyvern.forge.sdk.schemas.tasks import Task
 from skyvern.forge.sdk.services.bitwarden import BitwardenConstants
 from skyvern.forge.sdk.services.credentials import AzureVaultConstants, OnePasswordConstants
 from skyvern.forge.sdk.trace import TraceManager
+from skyvern.services.action_service import get_action_history
 from skyvern.services.task_v1_service import is_cua_task
 from skyvern.utils.prompt_engine import (
     CheckDateFormatResponse,
@@ -3732,6 +3733,7 @@ async def _get_input_or_select_context(
 
 
 async def extract_user_defined_errors(task: Task, step: Step, scraped_page: ScrapedPage) -> list[UserDefinedError]:
+    action_history = await get_action_history(task=task, current_step=step)
     scraped_page_refreshed = await scraped_page.refresh(draw_boxes=False)
     prompt = prompt_engine.load_prompt(
         "surface-user-defined-errors",
@@ -3739,6 +3741,7 @@ async def extract_user_defined_errors(task: Task, step: Step, scraped_page: Scra
         navigation_payload_str=json.dumps(task.navigation_payload),
         elements=scraped_page_refreshed.build_element_tree(fmt=ElementTreeFormat.HTML),
         current_url=scraped_page_refreshed.url,
+        action_history=json.dumps(action_history),
         error_code_mapping_str=json.dumps(task.error_code_mapping) if task.error_code_mapping else "{}",
         local_datetime=datetime.now(skyvern_context.ensure_context().tz_info).isoformat(),
     )