let observer continue exploring when task failed/terminated (#1520)

2025-01-08 20:06:14 -08:00
parent d3b159da49
commit 52c188b5de
3 changed files with 18 additions and 26 deletions
--- a/skyvern/forge/prompts/skyvern/observer.j2
+++ b/skyvern/forge/prompts/skyvern/observer.j2
@@ -12,10 +12,10 @@ MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing comma
 Reply in JSON format with the following keys:
 {
  "page_info": str, // Think step by step. Describe all the useful information in the page related to the user goal.
-  "thoughts": str, // Think step by step. What has been done so far and what is the next reasonable mini goal a human can do foreseeably move towards the overall goal.
  "extraction_thought": str, // Think step by step. Should any information be extracted given the user goal? If yes, has all the information been extracted? If the user is searching for something, looking for information or specifically trying to extract information along side the goal, consider it an intention to extract information. Phrases like "find something", "show me something", "search something" and so on indicate the intention to extract information.
  "require_extraction": bool, // True if the user goal requires information extraction. False otherwise.
-  "information_extracted": optional[bool], // True if the needed information has been extracted. False if the needed information has not been extracted. Null if the user goal does not require information extraction.
+  "information_extracted": optional[bool], // True if the needed information has been extracted. False if the needed information has not been extracted. If task history has no "extract" type, that means no data extraction has happened, return false. Null if the user goal does not require information extraction.
+  "thoughts": str, // Think step by step. What has been done so far and what is the next reasonable mini goal a human can do foreseeably move towards the overall goal.
  "user_goal_achieved": bool, // True if the user goal has been completed, false otherwise. If the user wants to extract information and it has not been done, the user goal is not achieved.
  "plan": str, // The mini goal to achieve to move towards the user goal. DO NOT come up or hallucinate any data that's not provided in the user goal. Be accurate and precise. Return null if the user goal has been achieved.
  "task_type": str, // One of the available task types: navigate, extract, loop
@@ -40,6 +40,10 @@ Task history (the earliest task is the first in the list and the latest is the l
 {{ task_history }}
 ```

+Task history explanation:
+- completed status means the mini goal has been completed
+- terminated and failed mean the mini goal was not fully achieved or couldn't be achieved so you might want to try something else. The reason is given to explain why.
+
 Current datetime, ISO format:
 ```
 {{ local_datetime }}
--- a/skyvern/forge/prompts/skyvern/observer_check_completion.j2
+++ b/skyvern/forge/prompts/skyvern/observer_check_completion.j2
@@ -2,8 +2,12 @@ You're to assist the user to achieve the user goal in the web, given the user go

 Reply in JSON format with the following keys:
 {
+  "page_info": str, // Think step by step. Describe all the useful information in the page related to the user goal.
+  "extraction_thought": str, // Think step by step. Should any information be extracted given the user goal? If yes, has all the information been extracted? If the user is searching for something, looking for information or specifically trying to extract information along side the goal, consider it an intention to extract information. Phrases like "find something", "show me something", "search something" and so on indicate the intention to extract information.
+  "require_extraction": bool, // True if the user goal requires information extraction. False otherwise.
+  "information_extracted": optional[bool], // True if the needed information has been extracted. False if the needed information has not been extracted. Null if the user goal does not require information extraction.
  "thoughts": str, // Think step by step. Would completing the tasks in the task history be good enough to achieve the user goal? If more tasks need to be completed to achieve the goal, what would be the next task?
-  "user_goal_achieved": bool, // True if the user goal has been completed, false otherwise. If the user wants to extract information along side the goal, make sure the extract task has happened before claiming that the goal is achieved.
+  "user_goal_achieved": bool, // True if the user goal has been completed, false otherwise. If the user wants to extract information and it has not been done, the user goal is not achieved.
 }

 User goal:
--- a/skyvern/forge/sdk/services/observer_service.py
+++ b/skyvern/forge/sdk/services/observer_service.py
@@ -457,6 +457,9 @@ async def run_observer_cruise_helper(

        # generate the extraction task
        block_result = await block.execute_safe(workflow_run_id=workflow_run_id, organization_id=organization_id)
+        task_history_record["status"] = str(block_result.status)
+        if block_result.failure_reason:
+            task_history_record["reason"] = block_result.failure_reason

        extracted_data = _get_extracted_data_from_block_result(
            block_result,
@@ -499,8 +502,8 @@ async def run_observer_cruise_helper(
                status=workflow_run.status,
            )
            break
-        if block_result.success is True:
-            # validate completion
+        if block_result.success is True and i == max_iterations - 1:
+            # validate completion only happens at the last iteration
            observer_completion_prompt = prompt_engine.load_prompt(
                "observer_check_completion",
                user_goal=user_prompt,
@@ -610,20 +613,10 @@ async def handle_block_result(
                block_type_var=block.block_type,
                block_label=block.label,
            )
-        else:
-            failure_reason = f"Block with type {block.block_type} failed. failure reason: {block_result.failure_reason}"
-            await app.WORKFLOW_SERVICE.mark_workflow_run_as_failed(
-                workflow_run_id=workflow_run.workflow_run_id, failure_reason=failure_reason
-            )
-
-            # TODO: add api_key
-            await app.WORKFLOW_SERVICE.clean_up_workflow(
-                workflow=workflow,
-                workflow_run=workflow_run,
-            )
+        # observer will continue running the workflow
    elif block_result.status == BlockStatus.terminated:
        LOG.info(
-            f"Block with type {block.block_type} was terminated for workflow run {workflow_run_id}, marking workflow run as terminated",
+            f"Block with type {block.block_type} was terminated for workflow run {workflow_run_id}",
            block_type=block.block_type,
            workflow_run_id=workflow_run.workflow_run_id,
            block_result=block_result,
@@ -640,15 +633,6 @@ async def handle_block_result(
                block_type_var=block.block_type,
                block_label=block.label,
            )
-        else:
-            failure_reason = f"Block with type {block.block_type} terminated. Reason: {block_result.failure_reason}"
-            await app.WORKFLOW_SERVICE.mark_workflow_run_as_terminated(
-                workflow_run_id=workflow_run.workflow_run_id, failure_reason=failure_reason
-            )
-            await app.WORKFLOW_SERVICE.clean_up_workflow(
-                workflow=workflow,
-                workflow_run=workflow_run,
-            )
    # refresh workflow run model
    return await app.WORKFLOW_SERVICE.get_workflow_run(
        workflow_run_id=workflow_run_id,