Add termination-aware complete verification experiment (SKY-6884) (#3948)
This commit is contained in:
@@ -29,13 +29,50 @@ class SelectOption(BaseModel):
|
||||
return f"SelectOption(label={self.label}, value={self.value}, index={self.index})"
|
||||
|
||||
|
||||
class VerificationStatus(StrEnum):
|
||||
"""Status of user goal verification."""
|
||||
|
||||
complete = "complete" # Goal achieved successfully
|
||||
terminate = "terminate" # Goal cannot be achieved, stop trying
|
||||
continue_step = "continue" # Goal not yet achieved, continue with more steps
|
||||
|
||||
|
||||
class CompleteVerifyResult(BaseModel):
|
||||
user_goal_achieved: bool
|
||||
# New field: explicit status with three options (used when experiment is enabled)
|
||||
status: VerificationStatus | None = None
|
||||
|
||||
# Legacy fields: for backward compatibility (used when experiment is disabled)
|
||||
user_goal_achieved: bool = False
|
||||
should_terminate: bool = False
|
||||
|
||||
thoughts: str
|
||||
page_info: str | None = None
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"CompleteVerifyResponse(thoughts={self.thoughts}, user_goal_achieved={self.user_goal_achieved}, page_info={self.page_info})"
|
||||
if self.status:
|
||||
return f"CompleteVerifyResult(status={self.status}, thoughts={self.thoughts}, page_info={self.page_info})"
|
||||
return f"CompleteVerifyResult(thoughts={self.thoughts}, user_goal_achieved={self.user_goal_achieved}, should_terminate={self.should_terminate}, page_info={self.page_info})"
|
||||
|
||||
@property
|
||||
def is_complete(self) -> bool:
|
||||
"""True if goal was achieved (supports both new and legacy formats)."""
|
||||
if self.status:
|
||||
return self.status == VerificationStatus.complete
|
||||
return self.user_goal_achieved
|
||||
|
||||
@property
|
||||
def is_terminate(self) -> bool:
|
||||
"""True if task should terminate (supports both new and legacy formats)."""
|
||||
if self.status:
|
||||
return self.status == VerificationStatus.terminate
|
||||
return self.should_terminate
|
||||
|
||||
@property
|
||||
def is_continue(self) -> bool:
|
||||
"""True if task should continue (supports both new and legacy formats)."""
|
||||
if self.status:
|
||||
return self.status == VerificationStatus.continue_step
|
||||
return not self.user_goal_achieved and not self.should_terminate
|
||||
|
||||
|
||||
class InputOrSelectContext(BaseModel):
|
||||
|
||||
@@ -1994,7 +1994,32 @@ async def handle_complete_action(
|
||||
)
|
||||
return [ActionFailure(exception=e)]
|
||||
|
||||
if not verification_result.user_goal_achieved:
|
||||
# Check if we should terminate instead of complete
|
||||
# Note: This requires the USE_TERMINATION_AWARE_COMPLETE_VERIFICATION experiment to be enabled
|
||||
if verification_result.is_terminate:
|
||||
LOG.warning(
|
||||
"CompleteAction verification determined task should terminate instead (termination-aware experiment)",
|
||||
workflow_run_id=task.workflow_run_id,
|
||||
thoughts=verification_result.thoughts,
|
||||
status=verification_result.status if verification_result.status else "legacy",
|
||||
)
|
||||
# Create a TerminateAction and execute it
|
||||
terminate_action = actions.TerminateAction(
|
||||
reasoning=verification_result.thoughts,
|
||||
organization_id=action.organization_id,
|
||||
workflow_run_id=action.workflow_run_id,
|
||||
task_id=action.task_id,
|
||||
step_id=action.step_id,
|
||||
step_order=action.step_order,
|
||||
action_order=action.action_order,
|
||||
)
|
||||
results = await handle_terminate_action(terminate_action, page, scraped_page, task, step)
|
||||
action.action_type = ActionType.TERMINATE
|
||||
action.reasoning = terminate_action.reasoning
|
||||
action.errors = terminate_action.errors
|
||||
return results
|
||||
|
||||
if not verification_result.is_complete:
|
||||
return [ActionFailure(exception=IllegitComplete(data={"error": verification_result.thoughts}))]
|
||||
|
||||
LOG.info(
|
||||
|
||||
Reference in New Issue
Block a user