suchintan.vibe code user goal check (#2349)

Co-authored-by: lawyzheng <lawyzheng1106@gmail.com>
This commit is contained in:
Shuchang Zheng
2025-05-15 08:18:24 -07:00
committed by GitHub
parent 847ddacebd
commit ed4280153f
30 changed files with 251 additions and 9 deletions

View File

@@ -172,6 +172,7 @@ class ForgeAgent:
retry=task_retry,
max_steps_per_run=task_block.max_steps_per_run,
error_code_mapping=task_block.error_code_mapping,
include_action_history_in_verification=task_block.include_action_history_in_verification,
)
LOG.info(
"Created a new task for workflow run",
@@ -226,6 +227,7 @@ class ForgeAgent:
extracted_information_schema=task_request.extracted_information_schema,
error_code_mapping=task_request.error_code_mapping,
application=task_request.application,
include_action_history_in_verification=task_request.include_action_history_in_verification,
)
LOG.info(
"Created new task",
@@ -1033,6 +1035,7 @@ class ForgeAgent:
for result in results:
result.step_retry_number = step.retry_index
result.step_order = step.order
step.output = detailed_agent_step_output.to_agent_step_output()
action_results.extend(results)
# Check the last result for this action. If that succeeded, assume the entire action is successful
if results and results[-1].success:
@@ -1462,8 +1465,9 @@ class ForgeAgent:
)
return actions
@staticmethod
async def complete_verify(page: Page, scraped_page: ScrapedPage, task: Task, step: Step) -> CompleteVerifyResult:
async def complete_verify(
self, page: Page, scraped_page: ScrapedPage, task: Task, step: Step
) -> CompleteVerifyResult:
LOG.info(
"Checking if user goal is achieved after re-scraping the page",
task_id=task.task_id,
@@ -1477,6 +1481,10 @@ class ForgeAgent:
scraped_page_refreshed = await scraped_page.refresh(draw_boxes=False, scroll=scroll)
actions_and_results_str = ""
if task.include_action_history_in_verification:
actions_and_results_str = await self._get_action_results(task, current_step=step)
verification_prompt = load_prompt_with_elements(
scraped_page=scraped_page_refreshed,
prompt_engine=prompt_engine,
@@ -1484,6 +1492,7 @@ class ForgeAgent:
navigation_goal=task.navigation_goal,
navigation_payload=task.navigation_payload,
complete_criterion=task.complete_criterion,
action_history=actions_and_results_str,
local_datetime=datetime.now(skyvern_context.ensure_context().tz_info).isoformat(),
)
@@ -1496,12 +1505,11 @@ class ForgeAgent:
)
return CompleteVerifyResult.model_validate(verification_result)
@staticmethod
async def check_user_goal_complete(
page: Page, scraped_page: ScrapedPage, task: Task, step: Step
self, page: Page, scraped_page: ScrapedPage, task: Task, step: Step
) -> CompleteAction | None:
try:
verification_result = await app.agent.complete_verify(
verification_result = await self.complete_verify(
page=page,
scraped_page=scraped_page,
task=task,
@@ -1878,11 +1886,20 @@ class ForgeAgent:
current_context.totp_codes.pop(task.task_id)
return final_navigation_payload
async def _get_action_results(self, task: Task) -> str:
async def _get_action_results(self, task: Task, current_step: Step | None = None) -> str:
"""
Get the action results from the last app.SETTINGS.PROMPT_ACTION_HISTORY_WINDOW steps.
If current_step is provided, the current executing step will be included in the action history.
Default is excluding the current executing step from the action history.
"""
# Get action results from the last app.SETTINGS.PROMPT_ACTION_HISTORY_WINDOW steps
steps = await app.DATABASE.get_task_steps(task_id=task.task_id, organization_id=task.organization_id)
# the last step is always the newly created one and it should be excluded from the history window
window_steps = steps[-1 - settings.PROMPT_ACTION_HISTORY_WINDOW : -1]
if current_step:
window_steps.append(current_step)
actions_and_results: list[tuple[Action, list[ActionResult]]] = []
for window_step in window_steps:
if window_step.output and window_step.output.actions_and_results:

View File

@@ -23,6 +23,12 @@ Complete Criterion:
```
{{ complete_criterion }}
```{% endif %}
{% if action_history %}
Action History:
```
{{ action_history }}
```
{% endif %}
Elements on the page:
```

View File

@@ -139,6 +139,7 @@ class AgentDB:
error_code_mapping: dict[str, str] | None = None,
task_type: str = TaskType.general,
application: str | None = None,
include_action_history_in_verification: bool | None = None,
) -> Task:
try:
async with self.Session() as session:
@@ -164,6 +165,7 @@ class AgentDB:
max_steps_per_run=max_steps_per_run,
error_code_mapping=error_code_mapping,
application=application,
include_action_history_in_verification=include_action_history_in_verification,
)
session.add(new_task)
await session.commit()
@@ -2564,6 +2566,7 @@ class AgentDB:
wait_sec: int | None = None,
description: str | None = None,
block_workflow_run_id: str | None = None,
include_action_history_in_verification: bool | None = None,
) -> WorkflowRunBlock:
async with self.Session() as session:
workflow_run_block = (
@@ -2604,6 +2607,8 @@ class AgentDB:
workflow_run_block.description = description
if block_workflow_run_id:
workflow_run_block.block_workflow_run_id = block_workflow_run_id
if include_action_history_in_verification is not None:
workflow_run_block.include_action_history_in_verification = include_action_history_in_verification
await session.commit()
await session.refresh(workflow_run_block)
else:

View File

@@ -83,6 +83,7 @@ class TaskModel(Base):
errors = Column(JSON, default=[], nullable=False)
max_steps_per_run = Column(Integer, nullable=True)
application = Column(String, nullable=True)
include_action_history_in_verification = Column(Boolean, default=False, nullable=True)
created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False, index=True)
modified_at = Column(
DateTime,
@@ -532,6 +533,7 @@ class WorkflowRunBlockModel(Base):
workflow_run_block_id = Column(String, primary_key=True, default=generate_workflow_run_block_id)
workflow_run_id = Column(String, nullable=False)
include_action_history_in_verification = Column(Boolean, default=False, nullable=True)
# this is the inner workflow run id of the taskv2 block
block_workflow_run_id = Column(String, nullable=True)
parent_workflow_run_block_id = Column(String, nullable=True)

View File

@@ -71,6 +71,7 @@ def convert_to_task(task_obj: TaskModel, debug_enabled: bool = False, workflow_p
url=task_obj.url,
complete_criterion=task_obj.complete_criterion,
terminate_criterion=task_obj.terminate_criterion,
include_action_history_in_verification=task_obj.include_action_history_in_verification,
webhook_callback_url=task_obj.webhook_callback_url,
totp_verification_url=task_obj.totp_verification_url,
totp_identifier=task_obj.totp_identifier,
@@ -411,6 +412,7 @@ def convert_to_workflow_run_block(
body=workflow_run_block_model.body,
created_at=workflow_run_block_model.created_at,
modified_at=workflow_run_block_model.modified_at,
include_action_history_in_verification=workflow_run_block_model.include_action_history_in_verification,
)
if task:
block.url = task.url

View File

@@ -79,7 +79,11 @@ class Step(BaseModel):
if output is not None and status is None:
raise ValueError(f"cant_set_output_without_updating_status({self.step_id})")
if self.output is not None and output is not None:
if (
old_status not in [StepStatus.running, StepStatus.created]
and self.output is not None
and output is not None
):
raise ValueError(f"cant_override_output({self.step_id})")
if is_last is False:

View File

@@ -90,6 +90,11 @@ class TaskBase(BaseModel):
description="The application for which the task is running",
examples=["forms"],
)
include_action_history_in_verification: bool | None = Field(
default=False,
description="Whether to include the action history when verifying the task is complete",
examples=[True, False],
)
class TaskRequest(TaskBase):

View File

@@ -35,6 +35,7 @@ class WorkflowRunBlock(BaseModel):
actions: list[Action] = []
created_at: datetime
modified_at: datetime
include_action_history_in_verification: bool | None = False
# for loop block
loop_values: list[Any] | None = None

View File

@@ -367,6 +367,7 @@ class BaseTaskBlock(Block):
totp_identifier: str | None = None
cache_actions: bool = False
complete_verification: bool = True
include_action_history_in_verification: bool = False
def get_all_parameters(
self,
@@ -544,6 +545,7 @@ class BaseTaskBlock(Block):
workflow_run_block_id=workflow_run_block_id,
task_id=task.task_id,
organization_id=organization_id,
include_action_history_in_verification=self.include_action_history_in_verification,
)
current_running_task = task
organization = await app.DATABASE.get_organization(organization_id=workflow_run.organization_id)

View File

@@ -143,6 +143,7 @@ class TaskBlockYAML(BlockYAML):
complete_criterion: str | None = None
terminate_criterion: str | None = None
complete_verification: bool = True
include_action_history_in_verification: bool = False
class ForLoopBlockYAML(BlockYAML):
@@ -288,6 +289,7 @@ class NavigationBlockYAML(BlockYAML):
complete_criterion: str | None = None
terminate_criterion: str | None = None
complete_verification: bool = True
include_action_history_in_verification: bool = False
class ExtractionBlockYAML(BlockYAML):

View File

@@ -1621,6 +1621,7 @@ class WorkflowService:
complete_criterion=block_yaml.complete_criterion,
terminate_criterion=block_yaml.terminate_criterion,
complete_verification=block_yaml.complete_verification,
include_action_history_in_verification=block_yaml.include_action_history_in_verification,
)
elif block_yaml.block_type == BlockType.FOR_LOOP:
loop_blocks = [
@@ -1816,6 +1817,7 @@ class WorkflowService:
complete_criterion=block_yaml.complete_criterion,
terminate_criterion=block_yaml.terminate_criterion,
complete_verification=block_yaml.complete_verification,
include_action_history_in_verification=block_yaml.include_action_history_in_verification,
)
elif block_yaml.block_type == BlockType.EXTRACTION:

View File

@@ -225,6 +225,9 @@ class TaskRunRequest(BaseModel):
description="ID of an existing browser session to reuse, having it continue from the current screen state",
)
publish_workflow: bool = Field(default=False, description="Whether to publish this task as a reusable workflow. ")
include_action_history_in_verification: bool = Field(
default=False, description="Whether to include action history when verifying that the task is complete"
)
@field_validator("url", "webhook_url", "totp_url")
@classmethod