Files
Dorod-Sky/tests/unit/test_script_generation_race_condition.py

757 lines
27 KiB
Python
Raw Normal View History

"""
Tests for script generation race condition (SKY-7653).
The race condition occurs when script generation runs during workflow execution
before all actions have been saved to the database. This results in:
1. `generate_workflow_parameters_schema` not finding INPUT_TEXT actions
2. No field_name mappings being generated
3. Generated script having hardcoded values instead of context.parameters[field_name]
"""
from typing import Any
import pytest
from skyvern.core.script_generations import generate_workflow_parameters as gwp
from skyvern.core.script_generations.generate_workflow_parameters import (
CUSTOM_FIELD_ACTIONS,
GeneratedFieldMapping,
generate_workflow_parameters_schema,
hydrate_input_text_actions_with_field_names,
)
from skyvern.forge.sdk.core import skyvern_context
from skyvern.forge.sdk.core.skyvern_context import SkyvernContext
from skyvern.forge.sdk.workflow.service import BLOCK_TYPES_THAT_SHOULD_BE_CACHED
from skyvern.webeye.actions.actions import ActionType
def make_input_text_action(
task_id: str,
action_id: str,
text: str,
intention: str = "",
field_name: str | None = None,
) -> dict[str, Any]:
"""Create a mock INPUT_TEXT action dictionary."""
action = {
"action_type": ActionType.INPUT_TEXT,
"action_id": action_id,
"task_id": task_id,
"text": text,
"intention": intention,
"element_id": "element_1",
"xpath": "//input[@id='test']",
}
if field_name:
action["field_name"] = field_name
return action
def make_click_action(task_id: str, action_id: str) -> dict[str, Any]:
"""Create a mock CLICK action dictionary."""
return {
"action_type": ActionType.CLICK,
"action_id": action_id,
"task_id": task_id,
"element_id": "element_2",
"xpath": "//button[@id='submit']",
}
class TestRaceConditionScenarios:
"""Test scenarios that demonstrate the race condition."""
def test_hydrate_adds_field_name_to_actions(self) -> None:
"""Test that hydrate_input_text_actions_with_field_names properly adds field_name."""
task_id = "task-123"
action_id = "action-456"
actions_by_task = {
task_id: [
make_input_text_action(task_id, action_id, "Urdaneta", "Enter facility name"),
]
}
field_mappings = {
f"{task_id}:{action_id}": "facility_name",
}
result = hydrate_input_text_actions_with_field_names(actions_by_task, field_mappings)
# The action should now have field_name
assert result[task_id][0].get("field_name") == "facility_name"
def test_hydrate_without_mappings_no_field_name(self) -> None:
"""
Test that without field mappings, actions don't get field_name added.
This simulates what happens when script generation runs before actions are saved.
"""
task_id = "task-123"
action_id = "action-456"
actions_by_task = {
task_id: [
make_input_text_action(task_id, action_id, "Urdaneta", "Enter facility name"),
]
}
# Empty field mappings - simulates race condition where LLM wasn't called
# because no INPUT_TEXT actions were found
field_mappings: dict[str, str] = {}
result = hydrate_input_text_actions_with_field_names(actions_by_task, field_mappings)
# The action should NOT have field_name
assert "field_name" not in result[task_id][0]
def test_race_condition_empty_actions_produces_empty_schema(self) -> None:
"""
Test that when no actions are passed, generate_workflow_parameters_schema
returns an empty schema. This happens when script generation runs before
actions are executed.
"""
# Empty actions - simulates script generation running before any INPUT_TEXT
# actions have been saved to the database
actions_by_task: dict[str, list[dict[str, Any]]] = {}
# Call the synchronous part that checks for actions
# (The async LLM call won't be made because no actions are found)
# Extract just the action-finding logic
custom_field_actions = []
for task_id, actions in actions_by_task.items():
for action in actions:
action_type = action.get("action_type", "")
if action_type in CUSTOM_FIELD_ACTIONS:
custom_field_actions.append(action)
# With no actions, the schema generator should return empty schema
assert len(custom_field_actions) == 0
def test_race_condition_only_click_actions_no_schema(self) -> None:
"""
Test that when only CLICK actions are present (before INPUT_TEXT is saved),
no field mappings are generated.
"""
task_id = "task-123"
# Only CLICK actions - simulates script generation running after CLICK
# but before INPUT_TEXT action is saved
actions_by_task = {
task_id: [
make_click_action(task_id, "action-1"),
make_click_action(task_id, "action-2"),
]
}
custom_field_actions = []
for task_id, actions in actions_by_task.items():
for action in actions:
action_type = action.get("action_type", "")
if action_type in CUSTOM_FIELD_ACTIONS:
custom_field_actions.append(action)
# No INPUT_TEXT actions found - no schema will be generated
assert len(custom_field_actions) == 0
class TestCodeGenerationWithoutFieldName:
"""
Test that code generation produces hardcoded values when field_name is missing.
This demonstrates the impact of the race condition on generated code.
"""
def test_action_without_field_name_produces_hardcoded_value(self) -> None:
"""
When an INPUT_TEXT action doesn't have field_name (due to race condition),
the generated code should have a hardcoded value instead of context.parameters.
"""
action = make_input_text_action(
task_id="task-123",
action_id="action-456",
text="Urdaneta", # This becomes hardcoded
intention="Enter facility name",
field_name=None, # No field_name due to race condition
)
# The action_handler_body function uses act.get("field_name") to decide
# whether to use context.parameters[field_name] or hardcoded value
assert action.get("field_name") is None
assert action.get("text") == "Urdaneta" # Will be hardcoded
def test_action_with_field_name_produces_parameter_reference(self) -> None:
"""
When an INPUT_TEXT action has field_name, the generated code should
use context.parameters[field_name].
"""
action = make_input_text_action(
task_id="task-123",
action_id="action-456",
text="Urdaneta", # Original value (not used in generated code)
intention="Enter facility name",
field_name="facility_name", # Field name present
)
# The action has field_name, so generated code will use context.parameters
assert action.get("field_name") == "facility_name"
class TestFieldMappingGeneration:
"""Test the field mapping generation logic."""
def test_field_mapping_structure(self) -> None:
"""Test that GeneratedFieldMapping has the expected structure."""
mapping = GeneratedFieldMapping(
field_mappings={"action_index_1": "facility_name"},
schema_fields={"facility_name": {"type": "str", "description": "The facility name"}},
)
assert mapping.field_mappings["action_index_1"] == "facility_name"
assert mapping.schema_fields["facility_name"]["type"] == "str"
def test_action_index_to_field_mapping_key_format(self) -> None:
"""Test that field mapping keys use the correct format: task_id:action_id."""
task_id = "task-123"
action_id = "action-456"
# This is the format used in generate_workflow_parameters_schema
expected_key = f"{task_id}:{action_id}"
assert expected_key == "task-123:action-456"
@pytest.mark.asyncio
async def test_generate_workflow_parameters_schema_empty_actions(monkeypatch: pytest.MonkeyPatch) -> None:
"""
Integration test: Verify that empty actions result in empty schema.
This test confirms the race condition behavior - when script generation
runs before INPUT_TEXT actions are saved, no field mappings are generated.
"""
# Mock the prompt engine and LLM handler since we won't reach them
# (the function returns early when no custom_field_actions are found)
actions_by_task: dict[str, list[dict[str, Any]]] = {}
schema_code, action_field_mappings = await generate_workflow_parameters_schema(actions_by_task)
# Should return empty schema
assert "pass" in schema_code # Empty schema has `pass`
assert action_field_mappings == {}
@pytest.mark.asyncio
async def test_generate_workflow_parameters_schema_with_actions(monkeypatch: pytest.MonkeyPatch) -> None:
"""
Integration test: Verify that when actions are present, LLM is called.
This confirms that when script generation runs AFTER actions are saved,
it properly generates field mappings.
"""
# Mock the LLM call to return a mapping
async def mock_generate_field_names_with_llm(custom_field_actions):
return GeneratedFieldMapping(
field_mappings={"action_index_1": "facility_name"},
schema_fields={"facility_name": {"type": "str", "description": "The facility name"}},
)
monkeypatch.setattr(gwp, "_generate_field_names_with_llm", mock_generate_field_names_with_llm)
task_id = "task-123"
action_id = "action-456"
actions_by_task = {
task_id: [
make_input_text_action(task_id, action_id, "Urdaneta", "Enter facility name"),
]
}
schema_code, action_field_mappings = await generate_workflow_parameters_schema(actions_by_task)
# Should have generated schema with field
assert "facility_name" in schema_code
assert "GeneratedWorkflowParameters" in schema_code
# Should have mapping for our action
assert f"{task_id}:{action_id}" in action_field_mappings
assert action_field_mappings[f"{task_id}:{action_id}"] == "facility_name"
class TestRaceConditionTimingScenario:
"""
Document the timing scenario that causes the race condition.
Timeline:
1. T+0s: CLICK action executes, post_action_execution triggered
2. T+0.1s: Script generation starts (asyncio.create_task)
3. T+0.2s: Script generation queries database for actions - finds only CLICK
4. T+0.3s: Script generation completes with no field mappings
5. T+6s: INPUT_TEXT action executes, saved to database
6. T+6.1s: Another script generation triggered, but first (wrong) script already saved
The result is a script with hardcoded values like `value = 'Urdaneta'`
instead of `value = context.parameters['facility_name']`
"""
def test_timing_scenario_documentation(self) -> None:
"""This test documents the race condition scenario."""
# Phase 1: After CLICK, before INPUT_TEXT
actions_at_time_0 = {
"task-123": [
make_click_action("task-123", "action-1"),
]
}
# At this point, script generation finds no INPUT_TEXT actions
input_text_actions = [
a for actions in actions_at_time_0.values() for a in actions if a["action_type"] == ActionType.INPUT_TEXT
]
assert len(input_text_actions) == 0
# Phase 2: After INPUT_TEXT is saved
actions_at_time_6 = {
"task-123": [
make_click_action("task-123", "action-1"),
make_input_text_action("task-123", "action-2", "Urdaneta", "Enter facility name"),
]
}
# Now INPUT_TEXT is found - but too late, first script already saved
input_text_actions = [
a for actions in actions_at_time_6.values() for a in actions if a["action_type"] == ActionType.INPUT_TEXT
]
assert len(input_text_actions) == 1
assert input_text_actions[0]["text"] == "Urdaneta"
class TestFinalizeParameter:
"""
Tests for the `finalize` parameter in generate_script_if_needed.
The fix (SKY-7653) uses a smart finalize approach:
- Only regenerates if script_gen_had_incomplete_actions flag is set
- This avoids unnecessary regeneration costs when script is already complete
"""
def test_finalize_with_incomplete_actions_triggers_regeneration(self) -> None:
"""
Test that finalize=True with incomplete actions flag triggers regeneration.
This simulates the logic in generate_script_if_needed when finalize=True
and the context has script_gen_had_incomplete_actions=True.
"""
# Simulate workflow definition blocks
class MockBlock:
def __init__(self, label: str, block_type: str):
self.label = label
self.block_type = block_type
workflow_blocks = [
MockBlock("login_step", "task"), # Should be in BLOCK_TYPES_THAT_SHOULD_BE_CACHED
MockBlock("search_step", "task"),
MockBlock("wait_block", "wait"), # Should NOT be cached
]
# Simulate the finalize logic with incomplete actions flag
blocks_to_update: set[str] = set()
finalize = True
context = SkyvernContext(script_gen_had_incomplete_actions=True)
if finalize and context.script_gen_had_incomplete_actions:
task_block_labels = {
block.label
for block in workflow_blocks
if block.label and block.block_type in BLOCK_TYPES_THAT_SHOULD_BE_CACHED
}
blocks_to_update.update(task_block_labels)
# Should include task blocks but not wait block
assert "login_step" in blocks_to_update
assert "search_step" in blocks_to_update
assert "wait_block" not in blocks_to_update
def test_finalize_without_incomplete_actions_skips_regeneration(self) -> None:
"""
Test that finalize=True without incomplete actions flag skips regeneration.
This is the optimization - when script generation had complete data,
we don't waste resources regenerating.
"""
blocks_to_update: set[str] = set()
finalize = True
context = SkyvernContext(script_gen_had_incomplete_actions=False)
if finalize and context.script_gen_had_incomplete_actions:
# This branch won't execute - no incomplete actions
blocks_to_update.add("some_block")
# No blocks should be added - script is already complete
assert len(blocks_to_update) == 0
def test_without_finalize_no_forced_regeneration(self) -> None:
"""
Test that without finalize=True, blocks are not force-added.
"""
blocks_to_update: set[str] = set()
finalize = False
# Without finalize, no blocks are force-added
if finalize:
# This branch won't execute
blocks_to_update.add("some_block")
assert len(blocks_to_update) == 0
class TestCodeGenerationLogic:
"""
Test the exact code generation logic from generate_script.py.
The code at generate_script.py:401-429 determines whether to use
context.parameters[field_name] or hardcoded text based on act.get("field_name").
"""
def test_code_generation_path_without_field_name(self) -> None:
"""
Verify the code generation path when field_name is missing.
From generate_script.py:401-429:
- If act.get("field_name") is truthy, use context.parameters[field_name]
- Else, use _value(act["text"]) which produces hardcoded string
"""
action = make_input_text_action(
task_id="task-123",
action_id="action-456",
text="Urdaneta",
intention="Enter facility name",
field_name=None,
)
# Simulate the code generation logic
if action.get("field_name"):
# This branch produces: context.parameters["facility_name"]
code_path = "context.parameters"
else:
# This branch produces: "Urdaneta" (hardcoded)
code_path = "hardcoded"
assert code_path == "hardcoded"
assert action.get("text") == "Urdaneta"
def test_code_generation_path_with_field_name(self) -> None:
"""
Verify the code generation path when field_name is present.
From generate_script.py:401-429:
- If act.get("field_name") is truthy, use context.parameters[field_name]
"""
action = make_input_text_action(
task_id="task-123",
action_id="action-456",
text="Urdaneta",
intention="Enter facility name",
field_name="facility_name",
)
# Simulate the code generation logic
if action.get("field_name"):
# This branch produces: context.parameters["facility_name"]
code_path = "context.parameters"
else:
# This branch produces: "Urdaneta" (hardcoded)
code_path = "hardcoded"
assert code_path == "context.parameters"
assert action.get("field_name") == "facility_name"
def test_demonstrates_race_condition_consequence(self) -> None:
"""
Demonstrate the consequence of the race condition.
When script generation runs before INPUT_TEXT action is saved:
1. generate_workflow_parameters_schema finds no INPUT_TEXT actions
2. No field mappings are generated
3. Actions don't get field_name hydrated
4. Generated script uses hardcoded values
This means the cached script CANNOT be reused with different parameters.
"""
# Scenario: First workflow run with "Urdaneta"
# Script generation ran early, field_name is missing
action_from_early_script_gen = make_input_text_action(
task_id="task-123",
action_id="action-456",
text="Urdaneta", # This gets hardcoded
field_name=None, # Missing due to race condition
)
# The generated code would be: value = "Urdaneta"
generated_code_has_hardcoded = action_from_early_script_gen.get("field_name") is None
assert generated_code_has_hardcoded
# Scenario: User runs workflow again with "Pok Pok" parameter
# But the cached script has: value = "Urdaneta" (hardcoded!)
# So the wrong value is used.
# Correct scenario: Script generation runs after all actions saved
action_from_proper_script_gen = make_input_text_action(
task_id="task-123",
action_id="action-456",
text="Urdaneta",
field_name="facility_name", # Present because script gen ran after action saved
)
# The generated code would be: value = context.parameters["facility_name"]
generated_code_uses_parameters = action_from_proper_script_gen.get("field_name") is not None
assert generated_code_uses_parameters
# Now when user runs with "Pok Pok", context.parameters["facility_name"] = "Pok Pok"
# And the correct value is used!
class TestSkipActionsWithoutData:
"""
Tests for the smart finalize approach that skips actions without data.
This addresses the race condition (SKY-7653) while avoiding unnecessary costs:
1. Skip actions without data during mid-run generation (avoids bad field mappings)
2. Set context flag when actions are skipped (script_gen_had_incomplete_actions)
3. At finalize, only regenerate if the flag is set (avoids unnecessary regeneration)
The benefit is:
- First run with race condition: flag set regenerate at end script complete
- Subsequent runs: script already complete no regeneration needed
"""
def test_input_text_without_text_is_skipped(self) -> None:
"""Test that INPUT_TEXT actions without text are skipped during field mapping."""
task_id = "task-123"
# INPUT_TEXT action without text - simulates race condition
action_without_text = {
"action_type": ActionType.INPUT_TEXT,
"action_id": "action-456",
"task_id": task_id,
"text": "", # Empty - not yet saved
"intention": "Enter facility name",
}
# Simulate the filtering logic from generate_workflow_parameters_schema
custom_field_actions = []
for action in [action_without_text]:
action_type = action.get("action_type", "")
if action_type not in CUSTOM_FIELD_ACTIONS:
continue
value = ""
if action_type == ActionType.INPUT_TEXT:
value = action.get("text", "")
# Skip actions without data
if not value:
continue
custom_field_actions.append(action)
# Action should be skipped because text is empty
assert len(custom_field_actions) == 0
def test_input_text_with_text_is_included(self) -> None:
"""Test that INPUT_TEXT actions with text are included in field mapping."""
task_id = "task-123"
# INPUT_TEXT action with text - properly saved
action_with_text = {
"action_type": ActionType.INPUT_TEXT,
"action_id": "action-456",
"task_id": task_id,
"text": "Urdaneta", # Has value
"intention": "Enter facility name",
}
# Simulate the filtering logic
custom_field_actions = []
for action in [action_with_text]:
action_type = action.get("action_type", "")
if action_type not in CUSTOM_FIELD_ACTIONS:
continue
value = ""
if action_type == ActionType.INPUT_TEXT:
value = action.get("text", "")
# Skip actions without data
if not value:
continue
custom_field_actions.append(action)
# Action should be included because text has value
assert len(custom_field_actions) == 1
def test_select_option_without_option_is_skipped(self) -> None:
"""Test that SELECT_OPTION actions without option are skipped."""
task_id = "task-123"
action_without_option = {
"action_type": ActionType.SELECT_OPTION,
"action_id": "action-789",
"task_id": task_id,
"option": "", # Empty - not yet saved
}
custom_field_actions = []
for action in [action_without_option]:
action_type = action.get("action_type", "")
if action_type not in CUSTOM_FIELD_ACTIONS:
continue
value = ""
if action_type == ActionType.SELECT_OPTION:
value = action.get("option", "")
if not value:
continue
custom_field_actions.append(action)
assert len(custom_field_actions) == 0
def test_upload_file_without_file_url_is_skipped(self) -> None:
"""Test that UPLOAD_FILE actions without file_url are skipped."""
task_id = "task-123"
action_without_file = {
"action_type": ActionType.UPLOAD_FILE,
"action_id": "action-101",
"task_id": task_id,
"file_url": "", # Empty - not yet saved
}
custom_field_actions = []
for action in [action_without_file]:
action_type = action.get("action_type", "")
if action_type not in CUSTOM_FIELD_ACTIONS:
continue
value = ""
if action_type == ActionType.UPLOAD_FILE:
value = action.get("file_url", "")
if not value:
continue
custom_field_actions.append(action)
assert len(custom_field_actions) == 0
@pytest.mark.asyncio
async def test_generate_workflow_parameters_schema_skips_empty_actions_and_sets_flag(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""
Integration test: Verify that actions without data are skipped and the context flag is set.
This test confirms the smart finalize approach:
1. Incomplete actions are skipped mid-run (prevents bad field mappings)
2. Context flag is set (triggers finalize regeneration only when needed)
"""
# Set up context to track the flag
context = SkyvernContext()
skyvern_context.set(context)
# Mock the LLM call - should only be called if there are valid actions
llm_called = False
async def mock_generate_field_names_with_llm(custom_field_actions):
nonlocal llm_called
llm_called = True
return GeneratedFieldMapping(
field_mappings={"action_index_1": "facility_name"},
schema_fields={"facility_name": {"type": "str", "description": "The facility name"}},
)
monkeypatch.setattr(gwp, "_generate_field_names_with_llm", mock_generate_field_names_with_llm)
task_id = "task-123"
# Actions with empty values - simulates race condition
actions_by_task = {
task_id: [
{
"action_type": ActionType.INPUT_TEXT,
"action_id": "action-456",
"task_id": task_id,
"text": "", # Empty - not yet saved
"intention": "Enter facility name",
},
]
}
try:
schema_code, action_field_mappings = await generate_workflow_parameters_schema(actions_by_task)
# LLM should NOT be called because action was skipped
assert not llm_called
# Should return empty schema
assert "pass" in schema_code
assert action_field_mappings == {}
# Context flag should be set - triggers finalize regeneration
assert context.script_gen_had_incomplete_actions is True
finally:
skyvern_context.reset()
@pytest.mark.asyncio
async def test_generate_workflow_parameters_schema_with_complete_actions_no_flag(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""
Integration test: Verify that complete actions don't set the context flag.
When script generation has complete data, the flag should NOT be set,
which means finalize won't regenerate (saving costs).
"""
# Set up context to track the flag
context = SkyvernContext()
skyvern_context.set(context)
# Mock the LLM call
async def mock_generate_field_names_with_llm(custom_field_actions):
return GeneratedFieldMapping(
field_mappings={"action_index_1": "facility_name"},
schema_fields={"facility_name": {"type": "str", "description": "The facility name"}},
)
monkeypatch.setattr(gwp, "_generate_field_names_with_llm", mock_generate_field_names_with_llm)
task_id = "task-123"
# Actions with complete values - no race condition
actions_by_task = {
task_id: [
{
"action_type": ActionType.INPUT_TEXT,
"action_id": "action-456",
"task_id": task_id,
"text": "Urdaneta", # Has value - complete
"intention": "Enter facility name",
},
]
}
try:
schema_code, action_field_mappings = await generate_workflow_parameters_schema(actions_by_task)
# Should have generated schema
assert "facility_name" in schema_code
# Context flag should NOT be set - no regeneration needed
assert context.script_gen_had_incomplete_actions is False
finally:
skyvern_context.reset()