Remove setup.sh in favor of skyvern CLI (#4737)

2026-02-12 20:43:27 -08:00
parent 08d3b04d14
commit 155c07f8be
77 changed files with 12358 additions and 10 deletions
--- a/tests/unit/workflow/test_cache_invalidation.py
+++ b/tests/unit/workflow/test_cache_invalidation.py
@@ -0,0 +1,152 @@
+"""
+Tests for workflow cache invalidation logic (SKY-7016).
+
+Verifies that changes to the model field (both at workflow settings level and block level)
+do not trigger cache invalidation.
+"""
+
+from datetime import datetime, timezone
+
+from skyvern.forge.sdk.workflow.models.block import BlockType, TaskBlock
+from skyvern.forge.sdk.workflow.models.parameter import OutputParameter, ParameterType
+from skyvern.forge.sdk.workflow.models.workflow import WorkflowDefinition
+from skyvern.forge.sdk.workflow.service import _get_workflow_definition_core_data
+
+
+def make_output_parameter(key: str) -> OutputParameter:
+    """Create a test output parameter."""
+    return OutputParameter(
+        parameter_type=ParameterType.OUTPUT,
+        key=key,
+        description="Test output parameter",
+        output_parameter_id="test-output-id",
+        workflow_id="test-workflow-id",
+        created_at=datetime.now(timezone.utc),
+        modified_at=datetime.now(timezone.utc),
+    )
+
+
+def make_task_block(label: str, model: dict | None = None) -> TaskBlock:
+    """Create a test task block with optional model configuration."""
+    return TaskBlock(
+        label=label,
+        block_type=BlockType.TASK,
+        output_parameter=make_output_parameter(f"{label}_output"),
+        url="https://example.com",
+        title="Test Task",
+        navigation_goal="Complete the task",
+        model=model,
+    )
+
+
+class TestCacheInvalidation:
+    """Tests for the _get_workflow_definition_core_data function."""
+
+    def test_model_field_excluded_from_block_comparison(self) -> None:
+        """
+        SKY-7016: Verify that block-level model changes don't trigger cache invalidation.
+
+        The model field should be excluded from the comparison data.
+        """
+        # Create two identical blocks, differing only in the model field
+        block_without_model = make_task_block("task1", model=None)
+        block_with_model = make_task_block("task1", model={"model_name": "gpt-4o"})
+
+        # Create workflow definitions with these blocks
+        definition_without_model = WorkflowDefinition(
+            parameters=[],
+            blocks=[block_without_model],
+        )
+        definition_with_model = WorkflowDefinition(
+            parameters=[],
+            blocks=[block_with_model],
+        )
+
+        # Get the core data used for comparison
+        core_data_without = _get_workflow_definition_core_data(definition_without_model)
+        core_data_with = _get_workflow_definition_core_data(definition_with_model)
+
+        # The core data should be identical (model field excluded)
+        assert core_data_without == core_data_with, (
+            "Model field should be excluded from comparison. "
+            "Changing block-level model should not trigger cache invalidation."
+        )
+
+    def test_model_field_not_in_core_data(self) -> None:
+        """Verify that the model field is completely removed from the core data."""
+        block = make_task_block("task1", model={"model_name": "claude-3-sonnet"})
+        definition = WorkflowDefinition(
+            parameters=[],
+            blocks=[block],
+        )
+
+        core_data = _get_workflow_definition_core_data(definition)
+
+        # Check that model is not present in any block
+        for block_data in core_data.get("blocks", []):
+            assert "model" not in block_data, "Model field should be removed from block data"
+
+    def test_other_block_changes_still_detected(self) -> None:
+        """Verify that non-model block changes are still detected."""
+        # Create two blocks with different navigation goals
+        block1 = make_task_block("task1")
+        block1.navigation_goal = "Goal A"
+
+        block2 = make_task_block("task1")
+        block2.navigation_goal = "Goal B"
+
+        definition1 = WorkflowDefinition(parameters=[], blocks=[block1])
+        definition2 = WorkflowDefinition(parameters=[], blocks=[block2])
+
+        core_data1 = _get_workflow_definition_core_data(definition1)
+        core_data2 = _get_workflow_definition_core_data(definition2)
+
+        # These should be different (navigation_goal is not excluded)
+        assert core_data1 != core_data2, "Non-model changes should still be detected for cache invalidation"
+
+    def test_different_models_same_core_data(self) -> None:
+        """Verify that switching between different models produces same core data."""
+        models = [
+            None,
+            {"model_name": "gpt-4o"},
+            {"model_name": "claude-3-opus"},
+            {"model_name": "gemini-pro", "extra_param": "value"},
+        ]
+
+        definitions = []
+        for model in models:
+            block = make_task_block("task1", model=model)
+            definition = WorkflowDefinition(parameters=[], blocks=[block])
+            definitions.append(_get_workflow_definition_core_data(definition))
+
+        # All core data should be identical
+        for i in range(1, len(definitions)):
+            assert definitions[0] == definitions[i], (
+                f"Core data should be identical regardless of model. Definition 0 vs {i} differ."
+            )
+
+    def test_timestamps_excluded_from_comparison(self) -> None:
+        """Verify that timestamps are properly excluded from comparison."""
+        # Create two blocks with different timestamps
+        block1 = make_task_block("task1")
+        block2 = make_task_block("task1")
+
+        # Simulate different timestamps by recreating output parameters
+        block2.output_parameter = OutputParameter(
+            parameter_type=ParameterType.OUTPUT,
+            key="task1_output",
+            description="Test output parameter",
+            output_parameter_id="different-output-id",  # Different ID
+            workflow_id="different-workflow-id",  # Different workflow ID
+            created_at=datetime(2024, 1, 1, tzinfo=timezone.utc),  # Different timestamp
+            modified_at=datetime(2024, 6, 1, tzinfo=timezone.utc),  # Different timestamp
+        )
+
+        definition1 = WorkflowDefinition(parameters=[], blocks=[block1])
+        definition2 = WorkflowDefinition(parameters=[], blocks=[block2])
+
+        core_data1 = _get_workflow_definition_core_data(definition1)
+        core_data2 = _get_workflow_definition_core_data(definition2)
+
+        # These should be identical (timestamps and IDs are excluded)
+        assert core_data1 == core_data2, "Timestamps and IDs should be excluded from comparison"
--- a/tests/unit/workflow/test_continue_on_failure_cache.py
+++ b/tests/unit/workflow/test_continue_on_failure_cache.py
@@ -0,0 +1,232 @@
+"""
+Tests for continue_on_failure behavior with caching.
+
+Verifies that:
+1. When a block with continue_on_failure=True fails, it's not cached (existing behavior)
+2. When a cached block with continue_on_failure=True fails during cached execution,
+   it's marked for regeneration so the next run uses AI execution
+"""
+
+from datetime import UTC, datetime
+from unittest.mock import MagicMock
+
+import pytest
+
+from skyvern.forge.sdk.workflow.models.block import (
+    BlockResult,
+    BlockType,
+    NavigationBlock,
+)
+from skyvern.forge.sdk.workflow.models.parameter import OutputParameter
+from skyvern.forge.sdk.workflow.service import BLOCK_TYPES_THAT_SHOULD_BE_CACHED
+from skyvern.schemas.workflows import BlockStatus
+
+
+def _output_parameter(key: str) -> OutputParameter:
+    now = datetime.now(UTC)
+    return OutputParameter(
+        output_parameter_id=f"{key}_id",
+        key=key,
+        workflow_id="wf",
+        created_at=now,
+        modified_at=now,
+    )
+
+
+def _navigation_block(
+    label: str,
+    continue_on_failure: bool = False,
+    next_block_label: str | None = None,
+) -> NavigationBlock:
+    return NavigationBlock(
+        url="https://example.com",
+        label=label,
+        title=label,
+        navigation_goal="goal",
+        output_parameter=_output_parameter(f"{label}_output"),
+        next_block_label=next_block_label,
+        continue_on_failure=continue_on_failure,
+    )
+
+
+class TestContinueOnFailureWithCache:
+    """Tests for cache invalidation when continue_on_failure blocks fail."""
+
+    def test_navigation_block_is_cacheable(self) -> None:
+        """Verify NavigationBlock is in the cacheable block types."""
+        assert BlockType.NAVIGATION in BLOCK_TYPES_THAT_SHOULD_BE_CACHED
+
+    def test_failed_block_without_continue_on_failure_not_added_to_update(self) -> None:
+        """
+        Test that a failed block without continue_on_failure=True doesn't trigger
+        special cache invalidation logic (it would stop the workflow instead).
+        """
+        block = _navigation_block("nav1", continue_on_failure=False)
+        blocks_to_update: set[str] = set()
+        script_blocks_by_label = {"nav1": MagicMock()}  # Block is cached
+
+        # Simulate failed block result
+        result = BlockResult(
+            success=False,
+            failure_reason="Block failed",
+            output_parameter=block.output_parameter,
+            output_parameter_value=None,
+            status=BlockStatus.failed,
+            workflow_run_block_id="wrb-1",
+        )
+
+        # The cache invalidation logic for continue_on_failure
+        # This simulates the condition from service.py
+        should_invalidate = (
+            block.label
+            and block.continue_on_failure
+            and result.status != BlockStatus.completed
+            and block.block_type in BLOCK_TYPES_THAT_SHOULD_BE_CACHED
+            and block.label in script_blocks_by_label
+        )
+
+        if should_invalidate:
+            blocks_to_update.add(block.label)
+
+        # Should NOT be in blocks_to_update because continue_on_failure=False
+        assert block.label not in blocks_to_update
+
+    def test_failed_block_with_continue_on_failure_and_cached_added_to_update(self) -> None:
+        """
+        Test that a cached block with continue_on_failure=True that fails
+        is added to blocks_to_update for regeneration.
+        """
+        block = _navigation_block("nav1", continue_on_failure=True)
+        blocks_to_update: set[str] = set()
+        script_blocks_by_label = {"nav1": MagicMock()}  # Block is cached
+
+        # Simulate failed block result
+        result = BlockResult(
+            success=False,
+            failure_reason="Block failed",
+            output_parameter=block.output_parameter,
+            output_parameter_value=None,
+            status=BlockStatus.failed,
+            workflow_run_block_id="wrb-1",
+        )
+
+        # The cache invalidation logic for continue_on_failure
+        should_invalidate = (
+            block.label
+            and block.continue_on_failure
+            and result.status != BlockStatus.completed
+            and block.block_type in BLOCK_TYPES_THAT_SHOULD_BE_CACHED
+            and block.label in script_blocks_by_label
+        )
+
+        if should_invalidate:
+            blocks_to_update.add(block.label)
+
+        # SHOULD be in blocks_to_update for regeneration
+        assert block.label in blocks_to_update
+
+    def test_failed_uncached_block_with_continue_on_failure_not_added_to_update(self) -> None:
+        """
+        Test that an uncached block with continue_on_failure=True that fails
+        is NOT added to blocks_to_update (there's nothing to invalidate).
+        """
+        block = _navigation_block("nav1", continue_on_failure=True)
+        blocks_to_update: set[str] = set()
+        script_blocks_by_label: dict = {}  # Block is NOT cached
+
+        # Simulate failed block result
+        result = BlockResult(
+            success=False,
+            failure_reason="Block failed",
+            output_parameter=block.output_parameter,
+            output_parameter_value=None,
+            status=BlockStatus.failed,
+            workflow_run_block_id="wrb-1",
+        )
+
+        # The cache invalidation logic for continue_on_failure
+        should_invalidate = (
+            block.label
+            and block.continue_on_failure
+            and result.status != BlockStatus.completed
+            and block.block_type in BLOCK_TYPES_THAT_SHOULD_BE_CACHED
+            and block.label in script_blocks_by_label
+        )
+
+        if should_invalidate:
+            blocks_to_update.add(block.label)
+
+        # Should NOT be in blocks_to_update - nothing to invalidate
+        assert block.label not in blocks_to_update
+
+    def test_successful_block_with_continue_on_failure_not_added_to_update_for_invalidation(self) -> None:
+        """
+        Test that a successful cached block with continue_on_failure=True
+        is NOT added to blocks_to_update for invalidation.
+        """
+        block = _navigation_block("nav1", continue_on_failure=True)
+        blocks_to_update: set[str] = set()
+        script_blocks_by_label = {"nav1": MagicMock()}  # Block is cached
+
+        # Simulate successful block result
+        result = BlockResult(
+            success=True,
+            failure_reason=None,
+            output_parameter=block.output_parameter,
+            output_parameter_value={"result": "success"},
+            status=BlockStatus.completed,
+            workflow_run_block_id="wrb-1",
+        )
+
+        # The cache invalidation logic for continue_on_failure
+        should_invalidate = (
+            block.label
+            and block.continue_on_failure
+            and result.status != BlockStatus.completed
+            and block.block_type in BLOCK_TYPES_THAT_SHOULD_BE_CACHED
+            and block.label in script_blocks_by_label
+        )
+
+        if should_invalidate:
+            blocks_to_update.add(block.label)
+
+        # Should NOT be in blocks_to_update - block succeeded
+        assert block.label not in blocks_to_update
+
+    @pytest.mark.parametrize(
+        "status",
+        [BlockStatus.failed, BlockStatus.terminated, BlockStatus.timed_out],
+    )
+    def test_all_failure_statuses_trigger_cache_invalidation(self, status: BlockStatus) -> None:
+        """
+        Test that all non-completed statuses (failed, terminated, timed_out)
+        trigger cache invalidation when continue_on_failure=True.
+        """
+        block = _navigation_block("nav1", continue_on_failure=True)
+        blocks_to_update: set[str] = set()
+        script_blocks_by_label = {"nav1": MagicMock()}  # Block is cached
+
+        # Simulate block result with the given status
+        result = BlockResult(
+            success=False,
+            failure_reason=f"Block {status.value}",
+            output_parameter=block.output_parameter,
+            output_parameter_value=None,
+            status=status,
+            workflow_run_block_id="wrb-1",
+        )
+
+        # The cache invalidation logic for continue_on_failure
+        should_invalidate = (
+            block.label
+            and block.continue_on_failure
+            and result.status != BlockStatus.completed
+            and block.block_type in BLOCK_TYPES_THAT_SHOULD_BE_CACHED
+            and block.label in script_blocks_by_label
+        )
+
+        if should_invalidate:
+            blocks_to_update.add(block.label)
+
+        # SHOULD be in blocks_to_update for all failure statuses
+        assert block.label in blocks_to_update, f"Status {status} should trigger cache invalidation"
--- a/tests/unit/workflow/test_dag_engine.py
+++ b/tests/unit/workflow/test_dag_engine.py
@@ -0,0 +1,249 @@
+from datetime import UTC, datetime
+from unittest.mock import AsyncMock
+
+import pytest
+
+from skyvern.forge import app
+from skyvern.forge.sdk.workflow.exceptions import InvalidWorkflowDefinition
+from skyvern.forge.sdk.workflow.models.block import (
+    BranchCondition,
+    ConditionalBlock,
+    ExtractionBlock,
+    JinjaBranchCriteria,
+    NavigationBlock,
+    PromptBranchCriteria,
+)
+from skyvern.forge.sdk.workflow.models.parameter import OutputParameter
+from skyvern.forge.sdk.workflow.service import WorkflowService
+from skyvern.schemas.workflows import BlockStatus
+
+
+def _output_parameter(key: str) -> OutputParameter:
+    now = datetime.now(UTC)
+    return OutputParameter(
+        output_parameter_id=f"{key}_id",
+        key=key,
+        workflow_id="wf",
+        created_at=now,
+        modified_at=now,
+    )
+
+
+def _navigation_block(label: str, next_block_label: str | None = None) -> NavigationBlock:
+    return NavigationBlock(
+        url="https://example.com",
+        label=label,
+        title=label,
+        navigation_goal="goal",
+        output_parameter=_output_parameter(f"{label}_output"),
+        next_block_label=next_block_label,
+    )
+
+
+def _extraction_block(label: str, next_block_label: str | None = None) -> ExtractionBlock:
+    return ExtractionBlock(
+        url="https://example.com",
+        label=label,
+        title=label,
+        data_extraction_goal="extract data",
+        output_parameter=_output_parameter(f"{label}_output"),
+        next_block_label=next_block_label,
+    )
+
+
+def _conditional_block(
+    label: str, branch_conditions: list[BranchCondition], next_block_label: str | None = None
+) -> ConditionalBlock:
+    return ConditionalBlock(
+        label=label,
+        output_parameter=_output_parameter(f"{label}_output"),
+        branch_conditions=branch_conditions,
+        next_block_label=next_block_label,
+    )
+
+
+class DummyContext:
+    def __init__(self, workflow_run_id: str) -> None:
+        self.blocks_metadata: dict[str, dict] = {}
+        self.values: dict[str, object] = {}
+        self.secrets: dict[str, object] = {}
+        self.parameters: dict[str, object] = {}
+        self.workflow_run_outputs: dict[str, object] = {}
+        self.include_secrets_in_templates = False
+        self.workflow_title = "test"
+        self.workflow_id = "wf"
+        self.workflow_permanent_id = "wf-perm"
+        self.workflow_run_id = workflow_run_id
+
+    def update_block_metadata(self, label: str, metadata: dict) -> None:
+        self.blocks_metadata[label] = metadata
+
+    def get_block_metadata(self, label: str | None) -> dict:
+        if label is None:
+            return {}
+        return self.blocks_metadata.get(label, {})
+
+    def mask_secrets_in_data(self, data: object) -> object:
+        """Mock method - returns data as-is since no secrets in tests."""
+        return data
+
+    async def register_output_parameter_value_post_execution(self, parameter: OutputParameter, value: object) -> None:  # noqa: ARG002
+        return None
+
+    def build_workflow_run_summary(self) -> dict:
+        return {}
+
+
+def test_build_workflow_graph_infers_default_edges() -> None:
+    service = WorkflowService()
+    first = _navigation_block("first")
+    second = _navigation_block("second")
+
+    start_label, label_to_block, default_next_map = service._build_workflow_graph([first, second])
+
+    assert start_label == "first"
+    assert set(label_to_block.keys()) == {"first", "second"}
+    assert default_next_map["first"] == "second"
+    assert default_next_map["second"] is None
+
+
+def test_build_workflow_graph_rejects_cycles() -> None:
+    service = WorkflowService()
+    first = _navigation_block("first", next_block_label="second")
+    second = _navigation_block("second", next_block_label="first")
+
+    with pytest.raises(InvalidWorkflowDefinition):
+        service._build_workflow_graph([first, second])
+
+
+def test_build_workflow_graph_requires_single_root() -> None:
+    service = WorkflowService()
+    first = _navigation_block("first")
+    second = _navigation_block("second")
+
+    with pytest.raises(InvalidWorkflowDefinition):
+        service._build_workflow_graph([first, second, _navigation_block("third", next_block_label="second")])
+
+
+def test_build_workflow_graph_conditional_blocks_no_sequential_defaulting() -> None:
+    """
+    Test that workflows with conditional blocks do not apply sequential defaulting.
+
+    This prevents cycles when blocks are ordered differently than execution order.
+    For example, if a terminal block appears before branch targets in the blocks array,
+    sequential defaulting would incorrectly create a cycle.
+    """
+    service = WorkflowService()
+
+    # Simulate a workflow where execution order differs from block array order
+    # Execution: start -> extract -> conditional -> (branch_a OR branch_b) -> terminal
+    # Array order: [start, extract, conditional, terminal, branch_a, branch_b]
+    start = _navigation_block("start", next_block_label="extract")
+    extract = _extraction_block("extract", next_block_label="conditional")
+    conditional = _conditional_block(
+        "conditional",
+        branch_conditions=[
+            BranchCondition(
+                criteria=JinjaBranchCriteria(expression="{{ true }}"), next_block_label="branch_a", is_default=False
+            ),
+            BranchCondition(criteria=None, next_block_label="branch_b", is_default=True),
+        ],
+        next_block_label="terminal",  # This should be ignored for conditional blocks
+    )
+    terminal = _extraction_block("terminal", next_block_label=None)  # Terminal block with explicit None
+    branch_a = _navigation_block("branch_a", next_block_label="terminal")
+    branch_b = _navigation_block("branch_b", next_block_label="terminal")
+
+    # Block array has terminal before branch_a and branch_b
+    blocks = [start, extract, conditional, terminal, branch_a, branch_b]
+
+    # This should succeed without creating a cycle
+    start_label, label_to_block, default_next_map = service._build_workflow_graph(blocks)
+
+    assert start_label == "start"
+    assert set(label_to_block.keys()) == {"start", "extract", "conditional", "terminal", "branch_a", "branch_b"}
+
+    # Verify that sequential defaulting was NOT applied
+    # terminal should remain None, not be defaulted to branch_a
+    assert default_next_map["terminal"] is None
+    assert default_next_map["branch_a"] == "terminal"
+    assert default_next_map["branch_b"] == "terminal"
+
+
+@pytest.mark.asyncio
+async def test_evaluate_conditional_block_records_branch_metadata(monkeypatch: pytest.MonkeyPatch) -> None:
+    output_param = _output_parameter("conditional_output")
+    block = ConditionalBlock(
+        label="cond",
+        output_parameter=output_param,
+        branch_conditions=[
+            BranchCondition(criteria=JinjaBranchCriteria(expression="{{ flag }}"), next_block_label="next"),
+            BranchCondition(is_default=True, next_block_label=None),
+        ],
+    )
+
+    ctx = DummyContext(workflow_run_id="run-1")
+    ctx.values["flag"] = True
+    monkeypatch.setattr(app.WORKFLOW_CONTEXT_MANAGER, "get_workflow_run_context", lambda workflow_run_id: ctx)
+
+    app.DATABASE.update_workflow_run_block.reset_mock()
+    app.DATABASE.create_or_update_workflow_run_output_parameter.reset_mock()
+
+    result = await block.execute(
+        workflow_run_id="run-1",
+        workflow_run_block_id="wrb-1",
+        organization_id="org-1",
+    )
+
+    metadata = result.output_parameter_value
+    assert metadata["branch_taken"] == "next"
+    assert metadata["next_block_label"] == "next"
+    assert result.status == BlockStatus.completed
+    assert ctx.blocks_metadata["cond"]["branch_taken"] == "next"
+
+    # Get the actual call arguments
+    call_args = app.DATABASE.update_workflow_run_block.call_args
+    assert call_args.kwargs["workflow_run_block_id"] == "wrb-1"
+    assert call_args.kwargs["output"] == metadata
+    assert call_args.kwargs["status"] == BlockStatus.completed
+    assert call_args.kwargs["failure_reason"] is None
+    assert call_args.kwargs["organization_id"] == "org-1"
+
+    # Verify the new execution tracking fields are present
+    assert call_args.kwargs["executed_branch_expression"] == "{{ flag }}"
+    assert call_args.kwargs["executed_branch_result"] is True
+    assert call_args.kwargs["executed_branch_next_block"] == "next"
+    # executed_branch_id should be a UUID string
+    assert isinstance(call_args.kwargs["executed_branch_id"], str)
+
+
+@pytest.mark.asyncio
+async def test_prompt_branch_uses_batched_evaluation(monkeypatch: pytest.MonkeyPatch) -> None:
+    output_param = _output_parameter("conditional_output_prompt")
+    prompt_branch = BranchCondition(
+        criteria=PromptBranchCriteria(expression="Check if urgent"), next_block_label="next"
+    )
+    default_branch = BranchCondition(is_default=True, next_block_label=None)
+    block = ConditionalBlock(
+        label="cond_prompt",
+        output_parameter=output_param,
+        branch_conditions=[prompt_branch, default_branch],
+    )
+
+    ctx = DummyContext(workflow_run_id="run-2")
+    monkeypatch.setattr(app.WORKFLOW_CONTEXT_MANAGER, "get_workflow_run_context", lambda workflow_run_id: ctx)
+    # Return tuple: (results, rendered_expressions, extraction_goal, llm_response)
+    prompt_eval_mock = AsyncMock(return_value=([True], ["Check if urgent"], "test prompt", None))
+    monkeypatch.setattr(ConditionalBlock, "_evaluate_prompt_branches", prompt_eval_mock)
+
+    result = await block.execute(
+        workflow_run_id="run-2",
+        workflow_run_block_id="wrb-2",
+        organization_id="org-2",
+    )
+
+    assert result.status == BlockStatus.completed
+    metadata = result.output_parameter_value
+    assert metadata["branch_taken"] == "next"
+    assert metadata["criteria_type"] == "prompt"
+    prompt_eval_mock.assert_awaited_once()
--- a/tests/unit/workflow/test_file_parser_block.py
+++ b/tests/unit/workflow/test_file_parser_block.py
@@ -0,0 +1,232 @@
+"""
+Tests for FileParserBlock DOCX support.
+
+Covers file type detection, validation, text extraction (paragraphs + tables),
+token truncation, and error handling for DOCX files.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime, timezone
+from pathlib import Path
+
+import docx
+import pytest
+
+from skyvern.forge.sdk.workflow.exceptions import InvalidFileType
+from skyvern.forge.sdk.workflow.models.block import BlockType, FileParserBlock
+from skyvern.forge.sdk.workflow.models.parameter import OutputParameter, ParameterType
+from skyvern.schemas.workflows import FileType
+
+
+def _make_output_parameter(key: str) -> OutputParameter:
+    return OutputParameter(
+        parameter_type=ParameterType.OUTPUT,
+        key=key,
+        description="test",
+        output_parameter_id="test-output-id",
+        workflow_id="test-workflow-id",
+        created_at=datetime.now(timezone.utc),
+        modified_at=datetime.now(timezone.utc),
+    )
+
+
+def _make_file_parser_block(file_url: str, file_type: FileType) -> FileParserBlock:
+    return FileParserBlock(
+        label="test_file_parser",
+        block_type=BlockType.FILE_URL_PARSER,
+        output_parameter=_make_output_parameter("test_output"),
+        file_url=file_url,
+        file_type=file_type,
+    )
+
+
+def _create_docx(
+    path: Path,
+    paragraphs: list[str] | None = None,
+    table_rows: list[list[str]] | None = None,
+) -> Path:
+    """Create a DOCX file with optional paragraphs and tables."""
+    doc = docx.Document()
+    if paragraphs:
+        for text in paragraphs:
+            doc.add_paragraph(text)
+    if table_rows:
+        cols = len(table_rows[0])
+        table = doc.add_table(rows=len(table_rows), cols=cols)
+        for i, row_data in enumerate(table_rows):
+            for j, cell_text in enumerate(row_data):
+                table.rows[i].cells[j].text = cell_text
+    doc.save(str(path))
+    return path
+
+
+class TestDetectFileTypeFromUrl:
+    """Tests for _detect_file_type_from_url with DOCX extensions."""
+
+    def _detect(self, url: str) -> FileType:
+        block = _make_file_parser_block(url, FileType.CSV)
+        return block._detect_file_type_from_url(url)
+
+    def test_docx_extension(self) -> None:
+        assert self._detect("https://example.com/file.docx") == FileType.DOCX
+
+    def test_doc_extension_raises_error(self) -> None:
+        # Legacy .doc (Word 97-2003) is not supported by python-docx
+        with pytest.raises(InvalidFileType, match="Legacy .doc format"):
+            self._detect("https://example.com/file.doc")
+
+    def test_docx_with_query_params(self) -> None:
+        assert self._detect("https://example.com/file.docx?token=abc&v=1") == FileType.DOCX
+
+    def test_docx_case_insensitive(self) -> None:
+        assert self._detect("https://example.com/file.DOCX") == FileType.DOCX
+
+    def test_other_extensions_unchanged(self) -> None:
+        assert self._detect("https://example.com/file.pdf") == FileType.PDF
+        assert self._detect("https://example.com/file.xlsx") == FileType.EXCEL
+        assert self._detect("https://example.com/file.csv") == FileType.CSV
+        assert self._detect("https://example.com/file.png") == FileType.IMAGE
+
+
+class TestValidateFileType:
+    """Tests for validate_file_type with DOCX files."""
+
+    def test_valid_docx(self, tmp_path: Path) -> None:
+        path = _create_docx(tmp_path / "valid.docx", paragraphs=["Hello"])
+        block = _make_file_parser_block("https://example.com/valid.docx", FileType.DOCX)
+        # Should not raise
+        block.validate_file_type("https://example.com/valid.docx", str(path))
+
+    def test_plain_text_with_docx_extension(self, tmp_path: Path) -> None:
+        path = tmp_path / "fake.docx"
+        path.write_text("This is plain text, not a DOCX file.")
+        block = _make_file_parser_block("https://example.com/fake.docx", FileType.DOCX)
+        with pytest.raises(InvalidFileType):
+            block.validate_file_type("https://example.com/fake.docx", str(path))
+
+    def test_empty_file(self, tmp_path: Path) -> None:
+        path = tmp_path / "empty.docx"
+        path.write_bytes(b"")
+        block = _make_file_parser_block("https://example.com/empty.docx", FileType.DOCX)
+        with pytest.raises(InvalidFileType):
+            block.validate_file_type("https://example.com/empty.docx", str(path))
+
+
+@pytest.mark.asyncio
+class TestParseDocxFile:
+    """Tests for _parse_docx_file text extraction."""
+
+    async def test_paragraphs_joined_by_newline(self, tmp_path: Path) -> None:
+        path = _create_docx(tmp_path / "paras.docx", paragraphs=["Hello", "World"])
+        block = _make_file_parser_block("https://example.com/paras.docx", FileType.DOCX)
+        result = await block._parse_docx_file(str(path))
+        assert result == "Hello\nWorld"
+
+    async def test_empty_paragraphs_skipped(self, tmp_path: Path) -> None:
+        path = _create_docx(tmp_path / "blanks.docx", paragraphs=["Hello", "", "   ", "World"])
+        block = _make_file_parser_block("https://example.com/blanks.docx", FileType.DOCX)
+        result = await block._parse_docx_file(str(path))
+        assert result == "Hello\nWorld"
+
+    async def test_table_rows_formatted_with_pipe(self, tmp_path: Path) -> None:
+        path = _create_docx(
+            tmp_path / "table.docx",
+            table_rows=[["Name", "Age"], ["Alice", "30"]],
+        )
+        block = _make_file_parser_block("https://example.com/table.docx", FileType.DOCX)
+        result = await block._parse_docx_file(str(path))
+        assert result == "Name | Age\nAlice | 30"
+
+    async def test_mixed_paragraphs_and_tables(self, tmp_path: Path) -> None:
+        path = _create_docx(
+            tmp_path / "mixed.docx",
+            paragraphs=["Intro"],
+            table_rows=[["Col1", "Col2"], ["A", "B"]],
+        )
+        block = _make_file_parser_block("https://example.com/mixed.docx", FileType.DOCX)
+        result = await block._parse_docx_file(str(path))
+        assert result == "Intro\nCol1 | Col2\nA | B"
+
+    async def test_empty_document(self, tmp_path: Path) -> None:
+        path = _create_docx(tmp_path / "empty.docx")
+        block = _make_file_parser_block("https://example.com/empty.docx", FileType.DOCX)
+        result = await block._parse_docx_file(str(path))
+        assert result == ""
+
+    async def test_empty_table_cells_skipped(self, tmp_path: Path) -> None:
+        path = _create_docx(
+            tmp_path / "sparse.docx",
+            table_rows=[["Name", "", "Age"], ["", "", ""]],
+        )
+        block = _make_file_parser_block("https://example.com/sparse.docx", FileType.DOCX)
+        result = await block._parse_docx_file(str(path))
+        # First row: "Name" and "Age" (empty cell skipped), second row: all empty -> skipped
+        assert result == "Name | Age"
+
+    async def test_multiple_tables(self, tmp_path: Path) -> None:
+        doc = docx.Document()
+        t1 = doc.add_table(rows=1, cols=2)
+        t1.rows[0].cells[0].text = "T1C1"
+        t1.rows[0].cells[1].text = "T1C2"
+        t2 = doc.add_table(rows=1, cols=2)
+        t2.rows[0].cells[0].text = "T2C1"
+        t2.rows[0].cells[1].text = "T2C2"
+        path = tmp_path / "multi_table.docx"
+        doc.save(str(path))
+
+        block = _make_file_parser_block("https://example.com/multi_table.docx", FileType.DOCX)
+        result = await block._parse_docx_file(str(path))
+        assert result == "T1C1 | T1C2\nT2C1 | T2C2"
+
+
+@pytest.mark.asyncio
+class TestParseDocxFileTokenTruncation:
+    """Tests for _parse_docx_file token limit enforcement."""
+
+    async def test_paragraphs_truncated(self, tmp_path: Path) -> None:
+        # Create many paragraphs that will exceed a small token limit
+        paragraphs = [f"This is paragraph number {i} with some text content." for i in range(100)]
+        path = _create_docx(tmp_path / "long.docx", paragraphs=paragraphs)
+        block = _make_file_parser_block("https://example.com/long.docx", FileType.DOCX)
+        result = await block._parse_docx_file(str(path), max_tokens=20)
+        lines = result.split("\n")
+        assert len(lines) < len(paragraphs)
+        # Each included line should be a valid paragraph
+        for line in lines:
+            assert line.startswith("This is paragraph number")
+
+    async def test_tables_truncated(self, tmp_path: Path) -> None:
+        table_rows = [[f"R{i}C1", f"R{i}C2", f"R{i}C3"] for i in range(100)]
+        path = _create_docx(tmp_path / "big_table.docx", table_rows=table_rows)
+        block = _make_file_parser_block("https://example.com/big_table.docx", FileType.DOCX)
+        result = await block._parse_docx_file(str(path), max_tokens=20)
+        lines = result.split("\n")
+        assert len(lines) < len(table_rows)
+
+    async def test_tables_skipped_when_paragraphs_exhaust_budget(self, tmp_path: Path) -> None:
+        paragraphs = [f"Long paragraph {i} with lots of content to fill tokens." for i in range(100)]
+        table_rows = [["Should", "Not", "Appear"]]
+        path = _create_docx(tmp_path / "para_heavy.docx", paragraphs=paragraphs, table_rows=table_rows)
+        block = _make_file_parser_block("https://example.com/para_heavy.docx", FileType.DOCX)
+        result = await block._parse_docx_file(str(path), max_tokens=20)
+        assert "Should" not in result
+        assert "Not" not in result
+        assert "Appear" not in result
+
+
+@pytest.mark.asyncio
+class TestParseDocxFileErrorHandling:
+    """Tests for _parse_docx_file error handling."""
+
+    async def test_corrupt_file(self, tmp_path: Path) -> None:
+        path = tmp_path / "corrupt.docx"
+        path.write_bytes(b"\x00\x01\x02\x03random bytes")
+        block = _make_file_parser_block("https://example.com/corrupt.docx", FileType.DOCX)
+        with pytest.raises(InvalidFileType):
+            await block._parse_docx_file(str(path))
+
+    async def test_nonexistent_file(self, tmp_path: Path) -> None:
+        block = _make_file_parser_block("https://example.com/missing.docx", FileType.DOCX)
+        with pytest.raises(InvalidFileType):
+            await block._parse_docx_file(str(tmp_path / "nonexistent.docx"))
--- a/tests/unit/workflow/test_http_request_block.py
+++ b/tests/unit/workflow/test_http_request_block.py
@@ -0,0 +1,46 @@
+import json
+
+
+class TestJsonTextParsingEquivalence:
+    """Prove JSON/text parsing behavior matches aiohttp semantics.
+
+    The HttpRequestBlock parses responses using:
+        try:
+            response_body = json.loads(response_bytes.decode("utf-8"))
+        except (json.JSONDecodeError, UnicodeDecodeError):
+            response_body = response_bytes.decode("utf-8", errors="replace")
+
+    This should behave equivalently to aiohttp's:
+        try:
+            response_body = await response.json()
+        except (aiohttp.ContentTypeError, Exception):
+            response_body = await response.text()
+    """
+
+    def _parse_response(self, response_bytes: bytes) -> str | dict | list:
+        try:
+            return json.loads(response_bytes.decode("utf-8"))
+        except (json.JSONDecodeError, UnicodeDecodeError):
+            return response_bytes.decode("utf-8", errors="replace")
+
+    def test_valid_json_utf8(self) -> None:
+        data = {"key": "value", "number": 42, "unicode": "日本語"}
+        response_bytes = json.dumps(data).encode("utf-8")
+        result = self._parse_response(response_bytes)
+        assert result == data
+
+    def test_invalid_json_returns_text(self) -> None:
+        response_bytes = b"not json, just text"
+        result = self._parse_response(response_bytes)
+        assert result == "not json, just text"
+
+    def test_non_utf8_bytes_handled_gracefully(self) -> None:
+        response_bytes = "café".encode("latin-1")  # b'caf\xe9'
+        result = self._parse_response(response_bytes)
+        assert "caf" in result
+        assert isinstance(result, str)
+
+    def test_empty_response(self) -> None:
+        response_bytes = b""
+        result = self._parse_response(response_bytes)
+        assert result == ""