""" Tests for script generation race condition (SKY-7653). The race condition occurs when script generation runs during workflow execution before all actions have been saved to the database. This results in: 1. `generate_workflow_parameters_schema` not finding INPUT_TEXT actions 2. No field_name mappings being generated 3. Generated script having hardcoded values instead of context.parameters[field_name] """ from typing import Any import pytest from skyvern.core.script_generations import generate_workflow_parameters as gwp from skyvern.core.script_generations.generate_workflow_parameters import ( CUSTOM_FIELD_ACTIONS, GeneratedFieldMapping, generate_workflow_parameters_schema, hydrate_input_text_actions_with_field_names, ) from skyvern.forge.sdk.core import skyvern_context from skyvern.forge.sdk.core.skyvern_context import SkyvernContext from skyvern.forge.sdk.workflow.service import BLOCK_TYPES_THAT_SHOULD_BE_CACHED from skyvern.webeye.actions.actions import ActionType def make_input_text_action( task_id: str, action_id: str, text: str, intention: str = "", field_name: str | None = None, ) -> dict[str, Any]: """Create a mock INPUT_TEXT action dictionary.""" action = { "action_type": ActionType.INPUT_TEXT, "action_id": action_id, "task_id": task_id, "text": text, "intention": intention, "element_id": "element_1", "xpath": "//input[@id='test']", } if field_name: action["field_name"] = field_name return action def make_click_action(task_id: str, action_id: str) -> dict[str, Any]: """Create a mock CLICK action dictionary.""" return { "action_type": ActionType.CLICK, "action_id": action_id, "task_id": task_id, "element_id": "element_2", "xpath": "//button[@id='submit']", } class TestRaceConditionScenarios: """Test scenarios that demonstrate the race condition.""" def test_hydrate_adds_field_name_to_actions(self) -> None: """Test that hydrate_input_text_actions_with_field_names properly adds field_name.""" task_id = "task-123" action_id = "action-456" actions_by_task = { task_id: [ make_input_text_action(task_id, action_id, "Urdaneta", "Enter facility name"), ] } field_mappings = { f"{task_id}:{action_id}": "facility_name", } result = hydrate_input_text_actions_with_field_names(actions_by_task, field_mappings) # The action should now have field_name assert result[task_id][0].get("field_name") == "facility_name" def test_hydrate_without_mappings_no_field_name(self) -> None: """ Test that without field mappings, actions don't get field_name added. This simulates what happens when script generation runs before actions are saved. """ task_id = "task-123" action_id = "action-456" actions_by_task = { task_id: [ make_input_text_action(task_id, action_id, "Urdaneta", "Enter facility name"), ] } # Empty field mappings - simulates race condition where LLM wasn't called # because no INPUT_TEXT actions were found field_mappings: dict[str, str] = {} result = hydrate_input_text_actions_with_field_names(actions_by_task, field_mappings) # The action should NOT have field_name assert "field_name" not in result[task_id][0] def test_race_condition_empty_actions_produces_empty_schema(self) -> None: """ Test that when no actions are passed, generate_workflow_parameters_schema returns an empty schema. This happens when script generation runs before actions are executed. """ # Empty actions - simulates script generation running before any INPUT_TEXT # actions have been saved to the database actions_by_task: dict[str, list[dict[str, Any]]] = {} # Call the synchronous part that checks for actions # (The async LLM call won't be made because no actions are found) # Extract just the action-finding logic custom_field_actions = [] for task_id, actions in actions_by_task.items(): for action in actions: action_type = action.get("action_type", "") if action_type in CUSTOM_FIELD_ACTIONS: custom_field_actions.append(action) # With no actions, the schema generator should return empty schema assert len(custom_field_actions) == 0 def test_race_condition_only_click_actions_no_schema(self) -> None: """ Test that when only CLICK actions are present (before INPUT_TEXT is saved), no field mappings are generated. """ task_id = "task-123" # Only CLICK actions - simulates script generation running after CLICK # but before INPUT_TEXT action is saved actions_by_task = { task_id: [ make_click_action(task_id, "action-1"), make_click_action(task_id, "action-2"), ] } custom_field_actions = [] for task_id, actions in actions_by_task.items(): for action in actions: action_type = action.get("action_type", "") if action_type in CUSTOM_FIELD_ACTIONS: custom_field_actions.append(action) # No INPUT_TEXT actions found - no schema will be generated assert len(custom_field_actions) == 0 class TestCodeGenerationWithoutFieldName: """ Test that code generation produces hardcoded values when field_name is missing. This demonstrates the impact of the race condition on generated code. """ def test_action_without_field_name_produces_hardcoded_value(self) -> None: """ When an INPUT_TEXT action doesn't have field_name (due to race condition), the generated code should have a hardcoded value instead of context.parameters. """ action = make_input_text_action( task_id="task-123", action_id="action-456", text="Urdaneta", # This becomes hardcoded intention="Enter facility name", field_name=None, # No field_name due to race condition ) # The action_handler_body function uses act.get("field_name") to decide # whether to use context.parameters[field_name] or hardcoded value assert action.get("field_name") is None assert action.get("text") == "Urdaneta" # Will be hardcoded def test_action_with_field_name_produces_parameter_reference(self) -> None: """ When an INPUT_TEXT action has field_name, the generated code should use context.parameters[field_name]. """ action = make_input_text_action( task_id="task-123", action_id="action-456", text="Urdaneta", # Original value (not used in generated code) intention="Enter facility name", field_name="facility_name", # Field name present ) # The action has field_name, so generated code will use context.parameters assert action.get("field_name") == "facility_name" class TestFieldMappingGeneration: """Test the field mapping generation logic.""" def test_field_mapping_structure(self) -> None: """Test that GeneratedFieldMapping has the expected structure.""" mapping = GeneratedFieldMapping( field_mappings={"action_index_1": "facility_name"}, schema_fields={"facility_name": {"type": "str", "description": "The facility name"}}, ) assert mapping.field_mappings["action_index_1"] == "facility_name" assert mapping.schema_fields["facility_name"]["type"] == "str" def test_action_index_to_field_mapping_key_format(self) -> None: """Test that field mapping keys use the correct format: task_id:action_id.""" task_id = "task-123" action_id = "action-456" # This is the format used in generate_workflow_parameters_schema expected_key = f"{task_id}:{action_id}" assert expected_key == "task-123:action-456" @pytest.mark.asyncio async def test_generate_workflow_parameters_schema_empty_actions(monkeypatch: pytest.MonkeyPatch) -> None: """ Integration test: Verify that empty actions result in empty schema. This test confirms the race condition behavior - when script generation runs before INPUT_TEXT actions are saved, no field mappings are generated. """ # Mock the prompt engine and LLM handler since we won't reach them # (the function returns early when no custom_field_actions are found) actions_by_task: dict[str, list[dict[str, Any]]] = {} schema_code, action_field_mappings = await generate_workflow_parameters_schema(actions_by_task) # Should return empty schema assert "pass" in schema_code # Empty schema has `pass` assert action_field_mappings == {} @pytest.mark.asyncio async def test_generate_workflow_parameters_schema_with_actions(monkeypatch: pytest.MonkeyPatch) -> None: """ Integration test: Verify that when actions are present, LLM is called. This confirms that when script generation runs AFTER actions are saved, it properly generates field mappings. """ # Mock the LLM call to return a mapping async def mock_generate_field_names_with_llm(custom_field_actions): return GeneratedFieldMapping( field_mappings={"action_index_1": "facility_name"}, schema_fields={"facility_name": {"type": "str", "description": "The facility name"}}, ) monkeypatch.setattr(gwp, "_generate_field_names_with_llm", mock_generate_field_names_with_llm) task_id = "task-123" action_id = "action-456" actions_by_task = { task_id: [ make_input_text_action(task_id, action_id, "Urdaneta", "Enter facility name"), ] } schema_code, action_field_mappings = await generate_workflow_parameters_schema(actions_by_task) # Should have generated schema with field assert "facility_name" in schema_code assert "GeneratedWorkflowParameters" in schema_code # Should have mapping for our action assert f"{task_id}:{action_id}" in action_field_mappings assert action_field_mappings[f"{task_id}:{action_id}"] == "facility_name" class TestRaceConditionTimingScenario: """ Document the timing scenario that causes the race condition. Timeline: 1. T+0s: CLICK action executes, post_action_execution triggered 2. T+0.1s: Script generation starts (asyncio.create_task) 3. T+0.2s: Script generation queries database for actions - finds only CLICK 4. T+0.3s: Script generation completes with no field mappings 5. T+6s: INPUT_TEXT action executes, saved to database 6. T+6.1s: Another script generation triggered, but first (wrong) script already saved The result is a script with hardcoded values like `value = 'Urdaneta'` instead of `value = context.parameters['facility_name']` """ def test_timing_scenario_documentation(self) -> None: """This test documents the race condition scenario.""" # Phase 1: After CLICK, before INPUT_TEXT actions_at_time_0 = { "task-123": [ make_click_action("task-123", "action-1"), ] } # At this point, script generation finds no INPUT_TEXT actions input_text_actions = [ a for actions in actions_at_time_0.values() for a in actions if a["action_type"] == ActionType.INPUT_TEXT ] assert len(input_text_actions) == 0 # Phase 2: After INPUT_TEXT is saved actions_at_time_6 = { "task-123": [ make_click_action("task-123", "action-1"), make_input_text_action("task-123", "action-2", "Urdaneta", "Enter facility name"), ] } # Now INPUT_TEXT is found - but too late, first script already saved input_text_actions = [ a for actions in actions_at_time_6.values() for a in actions if a["action_type"] == ActionType.INPUT_TEXT ] assert len(input_text_actions) == 1 assert input_text_actions[0]["text"] == "Urdaneta" class TestFinalizeParameter: """ Tests for the `finalize` parameter in generate_script_if_needed. The fix (SKY-7653) uses a smart finalize approach: - Only regenerates if script_gen_had_incomplete_actions flag is set - This avoids unnecessary regeneration costs when script is already complete """ def test_finalize_with_incomplete_actions_triggers_regeneration(self) -> None: """ Test that finalize=True with incomplete actions flag triggers regeneration. This simulates the logic in generate_script_if_needed when finalize=True and the context has script_gen_had_incomplete_actions=True. """ # Simulate workflow definition blocks class MockBlock: def __init__(self, label: str, block_type: str): self.label = label self.block_type = block_type workflow_blocks = [ MockBlock("login_step", "task"), # Should be in BLOCK_TYPES_THAT_SHOULD_BE_CACHED MockBlock("search_step", "task"), MockBlock("wait_block", "wait"), # Should NOT be cached ] # Simulate the finalize logic with incomplete actions flag blocks_to_update: set[str] = set() finalize = True context = SkyvernContext(script_gen_had_incomplete_actions=True) if finalize and context.script_gen_had_incomplete_actions: task_block_labels = { block.label for block in workflow_blocks if block.label and block.block_type in BLOCK_TYPES_THAT_SHOULD_BE_CACHED } blocks_to_update.update(task_block_labels) # Should include task blocks but not wait block assert "login_step" in blocks_to_update assert "search_step" in blocks_to_update assert "wait_block" not in blocks_to_update def test_finalize_without_incomplete_actions_skips_regeneration(self) -> None: """ Test that finalize=True without incomplete actions flag skips regeneration. This is the optimization - when script generation had complete data, we don't waste resources regenerating. """ blocks_to_update: set[str] = set() finalize = True context = SkyvernContext(script_gen_had_incomplete_actions=False) if finalize and context.script_gen_had_incomplete_actions: # This branch won't execute - no incomplete actions blocks_to_update.add("some_block") # No blocks should be added - script is already complete assert len(blocks_to_update) == 0 def test_without_finalize_no_forced_regeneration(self) -> None: """ Test that without finalize=True, blocks are not force-added. """ blocks_to_update: set[str] = set() finalize = False # Without finalize, no blocks are force-added if finalize: # This branch won't execute blocks_to_update.add("some_block") assert len(blocks_to_update) == 0 class TestCodeGenerationLogic: """ Test the exact code generation logic from generate_script.py. The code at generate_script.py:401-429 determines whether to use context.parameters[field_name] or hardcoded text based on act.get("field_name"). """ def test_code_generation_path_without_field_name(self) -> None: """ Verify the code generation path when field_name is missing. From generate_script.py:401-429: - If act.get("field_name") is truthy, use context.parameters[field_name] - Else, use _value(act["text"]) which produces hardcoded string """ action = make_input_text_action( task_id="task-123", action_id="action-456", text="Urdaneta", intention="Enter facility name", field_name=None, ) # Simulate the code generation logic if action.get("field_name"): # This branch produces: context.parameters["facility_name"] code_path = "context.parameters" else: # This branch produces: "Urdaneta" (hardcoded) code_path = "hardcoded" assert code_path == "hardcoded" assert action.get("text") == "Urdaneta" def test_code_generation_path_with_field_name(self) -> None: """ Verify the code generation path when field_name is present. From generate_script.py:401-429: - If act.get("field_name") is truthy, use context.parameters[field_name] """ action = make_input_text_action( task_id="task-123", action_id="action-456", text="Urdaneta", intention="Enter facility name", field_name="facility_name", ) # Simulate the code generation logic if action.get("field_name"): # This branch produces: context.parameters["facility_name"] code_path = "context.parameters" else: # This branch produces: "Urdaneta" (hardcoded) code_path = "hardcoded" assert code_path == "context.parameters" assert action.get("field_name") == "facility_name" def test_demonstrates_race_condition_consequence(self) -> None: """ Demonstrate the consequence of the race condition. When script generation runs before INPUT_TEXT action is saved: 1. generate_workflow_parameters_schema finds no INPUT_TEXT actions 2. No field mappings are generated 3. Actions don't get field_name hydrated 4. Generated script uses hardcoded values This means the cached script CANNOT be reused with different parameters. """ # Scenario: First workflow run with "Urdaneta" # Script generation ran early, field_name is missing action_from_early_script_gen = make_input_text_action( task_id="task-123", action_id="action-456", text="Urdaneta", # This gets hardcoded field_name=None, # Missing due to race condition ) # The generated code would be: value = "Urdaneta" generated_code_has_hardcoded = action_from_early_script_gen.get("field_name") is None assert generated_code_has_hardcoded # Scenario: User runs workflow again with "Pok Pok" parameter # But the cached script has: value = "Urdaneta" (hardcoded!) # So the wrong value is used. # Correct scenario: Script generation runs after all actions saved action_from_proper_script_gen = make_input_text_action( task_id="task-123", action_id="action-456", text="Urdaneta", field_name="facility_name", # Present because script gen ran after action saved ) # The generated code would be: value = context.parameters["facility_name"] generated_code_uses_parameters = action_from_proper_script_gen.get("field_name") is not None assert generated_code_uses_parameters # Now when user runs with "Pok Pok", context.parameters["facility_name"] = "Pok Pok" # And the correct value is used! class TestSkipActionsWithoutData: """ Tests for the smart finalize approach that skips actions without data. This addresses the race condition (SKY-7653) while avoiding unnecessary costs: 1. Skip actions without data during mid-run generation (avoids bad field mappings) 2. Set context flag when actions are skipped (script_gen_had_incomplete_actions) 3. At finalize, only regenerate if the flag is set (avoids unnecessary regeneration) The benefit is: - First run with race condition: flag set → regenerate at end → script complete - Subsequent runs: script already complete → no regeneration needed """ def test_input_text_without_text_is_skipped(self) -> None: """Test that INPUT_TEXT actions without text are skipped during field mapping.""" task_id = "task-123" # INPUT_TEXT action without text - simulates race condition action_without_text = { "action_type": ActionType.INPUT_TEXT, "action_id": "action-456", "task_id": task_id, "text": "", # Empty - not yet saved "intention": "Enter facility name", } # Simulate the filtering logic from generate_workflow_parameters_schema custom_field_actions = [] for action in [action_without_text]: action_type = action.get("action_type", "") if action_type not in CUSTOM_FIELD_ACTIONS: continue value = "" if action_type == ActionType.INPUT_TEXT: value = action.get("text", "") # Skip actions without data if not value: continue custom_field_actions.append(action) # Action should be skipped because text is empty assert len(custom_field_actions) == 0 def test_input_text_with_text_is_included(self) -> None: """Test that INPUT_TEXT actions with text are included in field mapping.""" task_id = "task-123" # INPUT_TEXT action with text - properly saved action_with_text = { "action_type": ActionType.INPUT_TEXT, "action_id": "action-456", "task_id": task_id, "text": "Urdaneta", # Has value "intention": "Enter facility name", } # Simulate the filtering logic custom_field_actions = [] for action in [action_with_text]: action_type = action.get("action_type", "") if action_type not in CUSTOM_FIELD_ACTIONS: continue value = "" if action_type == ActionType.INPUT_TEXT: value = action.get("text", "") # Skip actions without data if not value: continue custom_field_actions.append(action) # Action should be included because text has value assert len(custom_field_actions) == 1 def test_select_option_without_option_is_skipped(self) -> None: """Test that SELECT_OPTION actions without option are skipped.""" task_id = "task-123" action_without_option = { "action_type": ActionType.SELECT_OPTION, "action_id": "action-789", "task_id": task_id, "option": "", # Empty - not yet saved } custom_field_actions = [] for action in [action_without_option]: action_type = action.get("action_type", "") if action_type not in CUSTOM_FIELD_ACTIONS: continue value = "" if action_type == ActionType.SELECT_OPTION: value = action.get("option", "") if not value: continue custom_field_actions.append(action) assert len(custom_field_actions) == 0 def test_upload_file_without_file_url_is_skipped(self) -> None: """Test that UPLOAD_FILE actions without file_url are skipped.""" task_id = "task-123" action_without_file = { "action_type": ActionType.UPLOAD_FILE, "action_id": "action-101", "task_id": task_id, "file_url": "", # Empty - not yet saved } custom_field_actions = [] for action in [action_without_file]: action_type = action.get("action_type", "") if action_type not in CUSTOM_FIELD_ACTIONS: continue value = "" if action_type == ActionType.UPLOAD_FILE: value = action.get("file_url", "") if not value: continue custom_field_actions.append(action) assert len(custom_field_actions) == 0 @pytest.mark.asyncio async def test_generate_workflow_parameters_schema_skips_empty_actions_and_sets_flag( monkeypatch: pytest.MonkeyPatch, ) -> None: """ Integration test: Verify that actions without data are skipped and the context flag is set. This test confirms the smart finalize approach: 1. Incomplete actions are skipped mid-run (prevents bad field mappings) 2. Context flag is set (triggers finalize regeneration only when needed) """ # Set up context to track the flag context = SkyvernContext() skyvern_context.set(context) # Mock the LLM call - should only be called if there are valid actions llm_called = False async def mock_generate_field_names_with_llm(custom_field_actions): nonlocal llm_called llm_called = True return GeneratedFieldMapping( field_mappings={"action_index_1": "facility_name"}, schema_fields={"facility_name": {"type": "str", "description": "The facility name"}}, ) monkeypatch.setattr(gwp, "_generate_field_names_with_llm", mock_generate_field_names_with_llm) task_id = "task-123" # Actions with empty values - simulates race condition actions_by_task = { task_id: [ { "action_type": ActionType.INPUT_TEXT, "action_id": "action-456", "task_id": task_id, "text": "", # Empty - not yet saved "intention": "Enter facility name", }, ] } try: schema_code, action_field_mappings = await generate_workflow_parameters_schema(actions_by_task) # LLM should NOT be called because action was skipped assert not llm_called # Should return empty schema assert "pass" in schema_code assert action_field_mappings == {} # Context flag should be set - triggers finalize regeneration assert context.script_gen_had_incomplete_actions is True finally: skyvern_context.reset() @pytest.mark.asyncio async def test_generate_workflow_parameters_schema_with_complete_actions_no_flag( monkeypatch: pytest.MonkeyPatch, ) -> None: """ Integration test: Verify that complete actions don't set the context flag. When script generation has complete data, the flag should NOT be set, which means finalize won't regenerate (saving costs). """ # Set up context to track the flag context = SkyvernContext() skyvern_context.set(context) # Mock the LLM call async def mock_generate_field_names_with_llm(custom_field_actions): return GeneratedFieldMapping( field_mappings={"action_index_1": "facility_name"}, schema_fields={"facility_name": {"type": "str", "description": "The facility name"}}, ) monkeypatch.setattr(gwp, "_generate_field_names_with_llm", mock_generate_field_names_with_llm) task_id = "task-123" # Actions with complete values - no race condition actions_by_task = { task_id: [ { "action_type": ActionType.INPUT_TEXT, "action_id": "action-456", "task_id": task_id, "text": "Urdaneta", # Has value - complete "intention": "Enter facility name", }, ] } try: schema_code, action_field_mappings = await generate_workflow_parameters_schema(actions_by_task) # Should have generated schema assert "facility_name" in schema_code # Context flag should NOT be set - no regeneration needed assert context.script_gen_had_incomplete_actions is False finally: skyvern_context.reset()