Add termination-aware complete verification experiment (SKY-6884) (#3948)

This commit is contained in:
pedrohsdb
2025-11-07 18:53:51 -08:00
committed by GitHub
parent ea7361c9f2
commit ca958da6be
4 changed files with 252 additions and 28 deletions

View File

@@ -29,13 +29,50 @@ class SelectOption(BaseModel):
return f"SelectOption(label={self.label}, value={self.value}, index={self.index})"
class VerificationStatus(StrEnum):
"""Status of user goal verification."""
complete = "complete" # Goal achieved successfully
terminate = "terminate" # Goal cannot be achieved, stop trying
continue_step = "continue" # Goal not yet achieved, continue with more steps
class CompleteVerifyResult(BaseModel):
user_goal_achieved: bool
# New field: explicit status with three options (used when experiment is enabled)
status: VerificationStatus | None = None
# Legacy fields: for backward compatibility (used when experiment is disabled)
user_goal_achieved: bool = False
should_terminate: bool = False
thoughts: str
page_info: str | None = None
def __repr__(self) -> str:
return f"CompleteVerifyResponse(thoughts={self.thoughts}, user_goal_achieved={self.user_goal_achieved}, page_info={self.page_info})"
if self.status:
return f"CompleteVerifyResult(status={self.status}, thoughts={self.thoughts}, page_info={self.page_info})"
return f"CompleteVerifyResult(thoughts={self.thoughts}, user_goal_achieved={self.user_goal_achieved}, should_terminate={self.should_terminate}, page_info={self.page_info})"
@property
def is_complete(self) -> bool:
"""True if goal was achieved (supports both new and legacy formats)."""
if self.status:
return self.status == VerificationStatus.complete
return self.user_goal_achieved
@property
def is_terminate(self) -> bool:
"""True if task should terminate (supports both new and legacy formats)."""
if self.status:
return self.status == VerificationStatus.terminate
return self.should_terminate
@property
def is_continue(self) -> bool:
"""True if task should continue (supports both new and legacy formats)."""
if self.status:
return self.status == VerificationStatus.continue_step
return not self.user_goal_achieved and not self.should_terminate
class InputOrSelectContext(BaseModel):

View File

@@ -1994,7 +1994,32 @@ async def handle_complete_action(
)
return [ActionFailure(exception=e)]
if not verification_result.user_goal_achieved:
# Check if we should terminate instead of complete
# Note: This requires the USE_TERMINATION_AWARE_COMPLETE_VERIFICATION experiment to be enabled
if verification_result.is_terminate:
LOG.warning(
"CompleteAction verification determined task should terminate instead (termination-aware experiment)",
workflow_run_id=task.workflow_run_id,
thoughts=verification_result.thoughts,
status=verification_result.status if verification_result.status else "legacy",
)
# Create a TerminateAction and execute it
terminate_action = actions.TerminateAction(
reasoning=verification_result.thoughts,
organization_id=action.organization_id,
workflow_run_id=action.workflow_run_id,
task_id=action.task_id,
step_id=action.step_id,
step_order=action.step_order,
action_order=action.action_order,
)
results = await handle_terminate_action(terminate_action, page, scraped_page, task, step)
action.action_type = ActionType.TERMINATE
action.reasoning = terminate_action.reasoning
action.errors = terminate_action.errors
return results
if not verification_result.is_complete:
return [ActionFailure(exception=IllegitComplete(data={"error": verification_result.thoughts}))]
LOG.info(