Show branch evaluations in conditional block output (#SKY-7495) (#4572)

This commit is contained in:
Celal Zamanoglu
2026-01-28 23:31:03 +03:00
committed by GitHub
parent 57c3c07593
commit 9c2b4c690e
2 changed files with 303 additions and 62 deletions

View File

@@ -12,3 +12,11 @@ Use this context to understand variable values:
{{ context_json }}
{% endif %}
For each condition, provide IN THIS ORDER:
1. "rendered_condition": REPLACE all variable names and references with their actual values.
- REMOVE the variable names entirely and put only the values in their place
- Do NOT include both the variable name and value - only the value
- Example: "base_date is same as date_3" → "01-25-2026 is same as 01-25-2026" (NOT "base_date 01-25-2026 is same as date_3 01-25-2026")
- Example: "date shown in the website is not same with date_3" → "01-27-2026 is not same with 01-07-2025"
2. "reasoning": Explain your reasoning for evaluating the rendered condition. Compare the actual values.
3. "result": The boolean result (true/false) based on the rendered condition.

View File

@@ -4679,27 +4679,50 @@ class BranchEvaluationContext:
def build_llm_safe_context_snapshot(self) -> dict[str, Any]:
"""
Build a non-secret context blob for LLM-facing branch evaluation.
Build a minimal context blob for LLM-facing branch evaluation.
Secrets are stripped/masked; only params/outputs/environment and cached
block metadata are included so the LLM can ground purely natural language
expressions without requiring inline templating.
Only includes essential data the LLM needs to evaluate conditions:
- Parameter values (base_date, date_1, etc.)
- Extracted information from previous blocks
- Loop variables (current_value, current_index, current_item)
"""
if self.workflow_run_context is None:
return {}
ctx = self.workflow_run_context
raw_values: dict[str, Any] = ctx.values.copy()
# Start from the recorded values (params, outputs, env, block outputs)
snapshot: dict[str, Any] = ctx.values.copy()
# Keys to skip - these are not useful for evaluating conditions
keys_to_skip = {
"blocks_metadata",
"params",
"outputs",
"environment",
"env",
"llm",
"workflow_title",
"workflow_id",
"workflow_permanent_id",
"workflow_run_id",
}
# Add block metadata (e.g., loop indices/current_item) without mutating originals
snapshot["blocks_metadata"] = ctx.blocks_metadata.copy()
snapshot: dict[str, Any] = {}
for key, value in raw_values.items():
# Skip noisy keys
if key in keys_to_skip:
continue
# For block outputs (dicts with extracted_information), only include extracted_information
if isinstance(value, dict) and "extracted_information" in value:
extracted = value.get("extracted_information")
if extracted is not None:
snapshot[key] = extracted
else:
# Include parameter values directly
snapshot[key] = value
# Copy loop variables (current_value, current_index, current_item) to top level
# Required for pure NatLang expressions like "current_value['date']" to work
# Without this, current_value is buried in blocks_metadata.{block_label}.current_value
# and the LLM can't find it when evaluating natural language expressions
if self.block_label:
block_metadata = ctx.get_block_metadata(self.block_label)
if "current_value" in block_metadata:
@@ -4709,19 +4732,6 @@ class BranchEvaluationContext:
if "current_item" in block_metadata:
snapshot["current_item"] = block_metadata["current_item"]
# Ensure the common namespaces exist
snapshot.setdefault("params", snapshot.get("params", {}))
snapshot.setdefault("outputs", snapshot.get("outputs", {}))
snapshot.setdefault("environment", snapshot.get("environment", {}))
snapshot.setdefault("env", snapshot.get("environment", {}))
snapshot.setdefault("llm", snapshot.get("llm", {}))
# Standard workflow identifiers for additional context
snapshot.setdefault("workflow_title", ctx.workflow_title)
snapshot.setdefault("workflow_id", ctx.workflow_id)
snapshot.setdefault("workflow_permanent_id", ctx.workflow_permanent_id)
snapshot.setdefault("workflow_run_id", ctx.workflow_run_id)
# Mask any real secret values that may have leaked into values
snapshot = ctx.mask_secrets_in_data(snapshot)
@@ -4917,6 +4927,135 @@ def _is_pure_jinja_expression(expression: str) -> bool:
return True
def _render_jinja_expression_for_display(
expression: str,
context_values: dict[str, Any],
block_label: str | None = None,
) -> str:
"""
Render a pure Jinja expression for UI display by substituting variable names with values.
This is for display purposes only - it shows users what values were compared
without actually evaluating the expression. For example:
- Input: "{{ base_date == date_1 }}" with context {"base_date": "01-25-2026", "date_1": "01-25-2026"}
- Output: '"01-25-2026" == "01-25-2026"'
Returns the original expression if it's not a pure Jinja expression or if rendering fails.
"""
if not _is_pure_jinja_expression(expression):
return expression
try:
# Extract inner expression (strip {{ and }})
inner_expr = expression.strip()[2:-2].strip()
display_expr = inner_expr
# Substitute variable names with their values using word boundary regex
# This ensures we only match whole variable names, not substrings
# e.g., "date" won't match inside "validate_date" or "date_1"
for var_name in sorted(context_values.keys(), key=len, reverse=True):
pattern = r"\b" + re.escape(var_name) + r"\b"
var_value = context_values[var_name]
# Quote string values for clarity
replacement = f'"{var_value}"' if isinstance(var_value, str) else str(var_value)
display_expr = re.sub(pattern, replacement, display_expr)
return display_expr
except Exception as exc:
LOG.debug(
"Failed to render Jinja expression for display",
block_label=block_label,
expression=expression,
error=str(exc),
)
return expression
def _find_evaluations_array(output_value: dict[str, Any]) -> list[Any]:
"""
Extract the evaluations array from LLM output.
ExtractionBlock wraps output in 'extracted_information', so we check there first.
Falls back to direct access if not found in the nested structure.
Args:
output_value: The raw output from ExtractionBlock
Returns:
List of evaluation objects from the LLM
Raises:
ValueError: If evaluations array is not found or has wrong type
"""
# Try standard ExtractionBlock format: output_value.extracted_information.evaluations
extracted_info = output_value.get("extracted_information")
if isinstance(extracted_info, dict):
raw_evaluations = extracted_info.get("evaluations")
else:
# Fallback: try direct access at output_value.evaluations
raw_evaluations = output_value.get("evaluations")
if not isinstance(raw_evaluations, list):
raise ValueError(f"Expected array of evaluations, got: {type(raw_evaluations)}")
return raw_evaluations
def _parse_single_evaluation(
evaluation: Any,
idx: int,
fallback_rendered_expressions: list[str],
) -> tuple[bool, str]:
"""
Parse a single evaluation from the LLM response.
Handles two formats:
- New format (dict): {result: bool, rendered_condition: str, reasoning: str}
- Legacy format: just a boolean value
Args:
evaluation: Single evaluation object from LLM (dict or bool)
idx: Index of this evaluation (for fallback lookup)
fallback_rendered_expressions: Pre-rendered expressions to use if LLM didn't provide one
Returns:
Tuple of (boolean_result, rendered_condition_string)
"""
# Determine fallback rendered expression
fallback_rendered = fallback_rendered_expressions[idx] if idx < len(fallback_rendered_expressions) else ""
if isinstance(evaluation, dict):
# New format: {result, rendered_condition, reasoning}
result = evaluation.get("result")
if isinstance(result, bool):
bool_result = result
else:
bool_result = _evaluate_truthy_string(str(result))
LOG.warning(
"Prompt branch evaluation returned non-boolean result",
branch_index=idx,
result=result,
evaluated_result=bool_result,
)
# Get rendered_condition, fallback to pre-rendered expression
rendered_cond = evaluation.get("rendered_condition")
if rendered_cond and isinstance(rendered_cond, str):
rendered_expression = rendered_cond
else:
rendered_expression = fallback_rendered
return (bool_result, rendered_expression)
else:
# Legacy format: just a boolean
if isinstance(evaluation, bool):
bool_result = evaluation
else:
bool_result = _evaluate_truthy_string(str(evaluation))
return (bool_result, fallback_rendered)
class BranchCondition(BaseModel):
"""Represents a single conditional branch edge within a ConditionalBlock."""
@@ -4993,7 +5132,7 @@ class ConditionalBlock(Block):
workflow_run_block_id: str,
organization_id: str | None = None,
browser_session_id: str | None = None,
) -> list[bool]:
) -> tuple[list[bool], list[str], str | None, dict | None]:
"""
Evaluate natural language branch conditions using a single ExtractionBlock.
@@ -5004,12 +5143,19 @@ class ConditionalBlock(Block):
- Browser/page access for expressions like "comment count > 100"
- UI visibility (shows up in workflow timeline with prompt/response)
- Proper LLM integration with data_schema
Returns:
A tuple of (results, rendered_expressions, extraction_goal, llm_response):
- results: List of boolean results for each branch
- rendered_expressions: List of expressions after Jinja pre-rendering
- extraction_goal: The prompt sent to the LLM (for UI display)
- llm_response: The raw LLM response for debugging
"""
if organization_id is None:
raise ValueError("organization_id is required to evaluate natural language branches")
if not branches:
return []
return ([], [], None, None)
workflow_run_context = evaluation_context.workflow_run_context
@@ -5068,22 +5214,39 @@ class ConditionalBlock(Block):
context_json=context_json,
)
# Step 3: Build schema for array of boolean results
# Step 3: Build schema for array of evaluation results
# Order matters: rendered_condition -> reasoning -> result (chain-of-thought)
data_schema = {
"type": "object",
"properties": {
"results": {
"evaluations": {
"type": "array",
"items": {"type": "boolean"},
"description": (
"Array of boolean results for each condition in the same order. "
"TRUE if the condition is satisfied, FALSE otherwise."
),
"items": {
"type": "object",
"properties": {
"rendered_condition": {
"type": "string",
"description": (
"The condition with all variable names and references replaced with actual values."
),
},
"reasoning": {
"type": "string",
"description": "Explanation of the reasoning behind evaluating the rendered condition.",
},
"result": {
"type": "boolean",
"description": "TRUE if the rendered condition is satisfied, FALSE otherwise.",
},
},
"required": ["rendered_condition", "reasoning", "result"],
},
"description": "Array of evaluation results for each condition in the same order.",
"minItems": len(branches),
"maxItems": len(branches),
}
},
"required": ["results"],
"required": ["evaluations"],
}
# Step 4: Create and execute single ExtractionBlock
@@ -5145,41 +5308,32 @@ class ConditionalBlock(Block):
exc_info=True,
)
# Step 5: Extract the boolean results array
# Step 5: Extract the evaluation results (result + rendered_condition)
output_value = extraction_result.output_parameter_value
results_array: list[bool] = []
llm_rendered_expressions: list[str] = []
if isinstance(output_value, dict):
# Check if results is in extracted_information (standard ExtractionBlock output)
extracted_info = output_value.get("extracted_information")
if isinstance(extracted_info, dict):
raw_results = extracted_info.get("results")
else:
# Fallback: try direct access
raw_results = output_value.get("results")
if isinstance(raw_results, list):
for idx, result in enumerate(raw_results):
if isinstance(result, bool):
results_array.append(result)
else:
evaluated_result = _evaluate_truthy_string(str(result))
LOG.warning(
"Prompt branch evaluation returned non-boolean result",
branch_index=idx,
result=result,
evaluated_result=evaluated_result,
)
results_array.append(evaluated_result)
else:
raise ValueError(f"Expected array of results, got: {type(raw_results)}")
else:
if not isinstance(output_value, dict):
raise ValueError(f"Unexpected output format: {type(output_value)}")
# Find evaluations array from LLM output (handles ExtractionBlock nesting)
raw_evaluations = _find_evaluations_array(output_value)
# Parse each evaluation to extract result and rendered_condition
for idx, evaluation in enumerate(raw_evaluations):
bool_result, rendered_expr = _parse_single_evaluation(
evaluation=evaluation,
idx=idx,
fallback_rendered_expressions=rendered_expressions,
)
results_array.append(bool_result)
llm_rendered_expressions.append(rendered_expr)
LOG.info(
"Conditional branch evaluation results",
block_label=self.label,
results=results_array,
llm_rendered_expressions=llm_rendered_expressions,
raw_output=output_value,
)
@@ -5188,7 +5342,7 @@ class ConditionalBlock(Block):
f"Prompt branch evaluation returned {len(results_array)} results for {len(branches)} branches"
)
return results_array
return (results_array, llm_rendered_expressions, extraction_goal, output_value)
except Exception as exc:
LOG.error(
@@ -5229,13 +5383,24 @@ class ConditionalBlock(Block):
matched_branch = None
failure_reason: str | None = None
# Track all branch evaluations for UI display
branch_evaluations_list: list[dict] = []
prompt_rendered_by_id: dict[str, str] = {}
natural_language_branches = [
branch for branch in self.ordered_branches if isinstance(branch.criteria, PromptBranchCriteria)
]
prompt_results_by_id: dict[str, bool] = {}
prompt_llm_response: dict | None = None
prompt_extraction_goal: str | None = None
if natural_language_branches:
try:
prompt_results = await self._evaluate_prompt_branches(
(
prompt_results,
prompt_rendered_expressions,
prompt_extraction_goal,
prompt_llm_response,
) = await self._evaluate_prompt_branches(
branches=natural_language_branches,
evaluation_context=evaluation_context,
workflow_run_id=workflow_run_id,
@@ -5246,6 +5411,10 @@ class ConditionalBlock(Block):
prompt_results_by_id = {
branch.id: result for branch, result in zip(natural_language_branches, prompt_results, strict=False)
}
prompt_rendered_by_id = {
branch.id: rendered
for branch, rendered in zip(natural_language_branches, prompt_rendered_expressions, strict=False)
}
except Exception as exc:
failure_reason = f"Failed to evaluate natural language branches: {str(exc)}"
LOG.error(
@@ -5256,24 +5425,49 @@ class ConditionalBlock(Block):
)
for idx, branch in enumerate(self.ordered_branches):
branch_eval: dict = {
"branch_id": branch.id,
"branch_index": idx,
"criteria_type": branch.criteria.criteria_type if branch.criteria else None,
"original_expression": branch.criteria.expression if branch.criteria else None,
"rendered_expression": None,
"result": None,
"is_matched": False,
"is_default": branch.is_default,
"next_block_label": branch.next_block_label,
"error": None,
}
# Handle default branch (no criteria to evaluate)
if branch.criteria is None:
# Default branch - only matched if no other branch matches
branch_evaluations_list.append(branch_eval)
continue
if branch.criteria.criteria_type == "prompt":
if failure_reason:
branch_eval["error"] = failure_reason
branch_evaluations_list.append(branch_eval)
break
prompt_result = prompt_results_by_id.get(branch.id)
rendered_expr = prompt_rendered_by_id.get(branch.id)
branch_eval["rendered_expression"] = rendered_expr
if prompt_result is None:
failure_reason = "Missing result for natural language branch evaluation"
branch_eval["error"] = failure_reason
LOG.error(
"Missing prompt evaluation result",
block_label=self.label,
branch_index=idx,
branch_id=branch.id,
)
branch_evaluations_list.append(branch_eval)
break
branch_eval["result"] = prompt_result
branch_evaluations_list.append(branch_eval)
if prompt_result:
matched_branch = branch
branch_eval["is_matched"] = True
LOG.info(
"Conditional natural language branch matched",
block_label=self.label,
@@ -5283,9 +5477,25 @@ class ConditionalBlock(Block):
break
continue
# Jinja template branch
try:
if await branch.criteria.evaluate(evaluation_context):
# Render the expression for UI display - substitute variables without evaluating
rendered_expression = _render_jinja_expression_for_display(
expression=branch.criteria.expression,
context_values=evaluation_context.workflow_run_context.values
if evaluation_context.workflow_run_context
else {},
block_label=self.label,
)
branch_eval["rendered_expression"] = rendered_expression
result = await branch.criteria.evaluate(evaluation_context)
branch_eval["result"] = result
branch_evaluations_list.append(branch_eval)
if result:
matched_branch = branch
branch_eval["is_matched"] = True
LOG.info(
"Conditional branch matched",
block_label=self.label,
@@ -5295,6 +5505,9 @@ class ConditionalBlock(Block):
break
except Exception as exc:
failure_reason = f"Failed to evaluate branch {idx} for {self.label}: {str(exc)}"
branch_eval["error"] = str(exc)
branch_eval["result"] = None
branch_evaluations_list.append(branch_eval)
LOG.error(
"Failed to evaluate conditional branch",
block_label=self.label,
@@ -5306,6 +5519,12 @@ class ConditionalBlock(Block):
if matched_branch is None and failure_reason is None:
matched_branch = self.get_default_branch()
# Update is_matched for default branch in evaluations
if matched_branch:
for eval_entry in branch_evaluations_list:
if eval_entry["branch_id"] == matched_branch.id:
eval_entry["is_matched"] = True
break
matched_index = self.ordered_branches.index(matched_branch) if matched_branch in self.ordered_branches else None
next_block_label = matched_branch.next_block_label if matched_branch else None
@@ -5339,6 +5558,20 @@ class ConditionalBlock(Block):
if matched_branch and matched_branch.criteria
else None,
"next_block_label": next_block_label,
# Detailed evaluation info for all branches
"evaluations": branch_evaluations_list if branch_evaluations_list else None,
# Raw LLM response for debugging prompt-based evaluations (masked for secrets)
"llm_response": (
workflow_run_context.mask_secrets_in_data(prompt_llm_response)
if workflow_run_context and prompt_llm_response
else prompt_llm_response
),
# The exact prompt sent to LLM for debugging (masked for secrets)
"llm_prompt": (
workflow_run_context.mask_secrets_in_data(prompt_extraction_goal)
if workflow_run_context and prompt_extraction_goal
else prompt_extraction_goal
),
}
status = BlockStatus.completed