always capture llm artifacts (#4284)

2025-12-12 10:30:36 -08:00
parent 1c22a79a71
commit 99a477e0dd
4 changed files with 69 additions and 80 deletions
--- a/skyvern/forge/sdk/api/llm/api_handler_factory.py
+++ b/skyvern/forge/sdk/api/llm/api_handler_factory.py
@@ -74,6 +74,24 @@ class LLMCallStats(BaseModel):
    llm_cost: float | None = None


+def _get_artifact_targets_and_persist_flag(
+    step: Step | None,
+    is_speculative_step: bool,
+    task_v2: TaskV2 | None,
+    thought: Thought | None,
+    ai_suggestion: AISuggestion | None,
+) -> tuple[bool, dict[str, Any]]:
+    artifact_targets = {
+        "step": step if not is_speculative_step else None,
+        "task_v2": task_v2,
+        "thought": thought,
+        "ai_suggestion": ai_suggestion,
+    }
+    has_artifact_target = any(value is not None for value in artifact_targets.values())
+    should_persist_llm_artifacts = not is_speculative_step and has_artifact_target
+    return should_persist_llm_artifacts, artifact_targets
+
+
 async def _log_hashed_href_map_artifacts_if_needed(
    context: SkyvernContext | None,
    step: Step | None,
@@ -83,14 +101,14 @@ async def _log_hashed_href_map_artifacts_if_needed(
    *,
    is_speculative_step: bool,
 ) -> None:
-    if context and context.hashed_href_map and step and not is_speculative_step:
+    should_persist_llm_artifacts, artifact_targets = _get_artifact_targets_and_persist_flag(
+        step, is_speculative_step, task_v2, thought, ai_suggestion
+    )
+    if context and context.hashed_href_map and should_persist_llm_artifacts:
        await app.ARTIFACT_MANAGER.create_llm_artifact(
            data=json.dumps(context.hashed_href_map, indent=2).encode("utf-8"),
            artifact_type=ArtifactType.HASHED_HREF_MAP,
-            step=step,
-            task_v2=task_v2,
-            thought=thought,
-            ai_suggestion=ai_suggestion,
+            **artifact_targets,
        )


@@ -391,6 +409,9 @@ class LLMAPIHandlerFactory:

            context = skyvern_context.current()
            is_speculative_step = step.is_speculative if step else False
+            should_persist_llm_artifacts, artifact_targets = _get_artifact_targets_and_persist_flag(
+                step, is_speculative_step, task_v2, thought, ai_suggestion
+            )
            await _log_hashed_href_map_artifacts_if_needed(
                context,
                step,
@@ -401,15 +422,12 @@ class LLMAPIHandlerFactory:
            )

            llm_prompt_value = prompt
-
-            if step and not is_speculative_step:
+            if should_persist_llm_artifacts:
                await app.ARTIFACT_MANAGER.create_llm_artifact(
                    data=llm_prompt_value.encode("utf-8"),
                    artifact_type=ArtifactType.LLM_PROMPT,
                    screenshots=screenshots,
-                    step=step,
-                    task_v2=task_v2,
-                    thought=thought,
+                    **artifact_targets,
                )
            # Build messages and apply caching in one step
            messages = await llm_messages_builder(prompt, screenshots, llm_config.add_assistant_prefix)
@@ -422,14 +440,11 @@ class LLMAPIHandlerFactory:
                    "vertex_cache_attached": vertex_cache_attached_flag,
                }
                llm_request_json = json.dumps(llm_request_payload)
-                if step and not is_speculative_step:
+                if should_persist_llm_artifacts:
                    await app.ARTIFACT_MANAGER.create_llm_artifact(
                        data=llm_request_json.encode("utf-8"),
                        artifact_type=ArtifactType.LLM_REQUEST,
-                        step=step,
-                        task_v2=task_v2,
-                        thought=thought,
-                        ai_suggestion=ai_suggestion,
+                        **artifact_targets,
                    )
                return llm_request_json

@@ -595,14 +610,11 @@ class LLMAPIHandlerFactory:
                raise LLMProviderError(llm_key) from e

            llm_response_json = response.model_dump_json(indent=2)
-            if step and not is_speculative_step:
+            if should_persist_llm_artifacts:
                await app.ARTIFACT_MANAGER.create_llm_artifact(
                    data=llm_response_json.encode("utf-8"),
                    artifact_type=ArtifactType.LLM_RESPONSE,
-                    step=step,
-                    task_v2=task_v2,
-                    thought=thought,
-                    ai_suggestion=ai_suggestion,
+                    **artifact_targets,
                )
            prompt_tokens = 0
            completion_tokens = 0
@@ -661,14 +673,11 @@ class LLMAPIHandlerFactory:
                )
            parsed_response = parse_api_response(response, llm_config.add_assistant_prefix, force_dict)
            parsed_response_json = json.dumps(parsed_response, indent=2)
-            if step and not is_speculative_step:
+            if should_persist_llm_artifacts:
                await app.ARTIFACT_MANAGER.create_llm_artifact(
                    data=parsed_response_json.encode("utf-8"),
                    artifact_type=ArtifactType.LLM_RESPONSE_PARSED,
-                    step=step,
-                    task_v2=task_v2,
-                    thought=thought,
-                    ai_suggestion=ai_suggestion,
+                    **artifact_targets,
                )

            rendered_response_json = None
@@ -677,14 +686,11 @@ class LLMAPIHandlerFactory:
                rendered_content = Template(llm_content).render(context.hashed_href_map)
                parsed_response = json.loads(rendered_content)
                rendered_response_json = json.dumps(parsed_response, indent=2)
-                if step and not is_speculative_step:
+                if should_persist_llm_artifacts:
                    await app.ARTIFACT_MANAGER.create_llm_artifact(
                        data=rendered_response_json.encode("utf-8"),
                        artifact_type=ArtifactType.LLM_RESPONSE_RENDERED,
-                        step=step,
-                        task_v2=task_v2,
-                        thought=thought,
-                        ai_suggestion=ai_suggestion,
+                        **artifact_targets,
                    )

            # Track LLM API handler duration, token counts, and cost
@@ -796,6 +802,9 @@ class LLMAPIHandlerFactory:

            context = skyvern_context.current()
            is_speculative_step = step.is_speculative if step else False
+            should_persist_llm_artifacts, artifact_targets = _get_artifact_targets_and_persist_flag(
+                step, is_speculative_step, task_v2, thought, ai_suggestion
+            )
            await _log_hashed_href_map_artifacts_if_needed(
                context,
                step,
@@ -806,15 +815,12 @@ class LLMAPIHandlerFactory:
            )

            llm_prompt_value = prompt
-            if step and not is_speculative_step:
+            if should_persist_llm_artifacts:
                await app.ARTIFACT_MANAGER.create_llm_artifact(
                    data=llm_prompt_value.encode("utf-8"),
                    artifact_type=ArtifactType.LLM_PROMPT,
                    screenshots=screenshots,
-                    step=step,
-                    task_v2=task_v2,
-                    thought=thought,
-                    ai_suggestion=ai_suggestion,
+                    **artifact_targets,
                )

            if not llm_config.supports_vision:
@@ -900,14 +906,11 @@ class LLMAPIHandlerFactory:
                "vertex_cache_attached": vertex_cache_attached,
            }
            llm_request_json = json.dumps(llm_request_payload)
-            if step and not is_speculative_step:
+            if should_persist_llm_artifacts:
                await app.ARTIFACT_MANAGER.create_llm_artifact(
                    data=llm_request_json.encode("utf-8"),
                    artifact_type=ArtifactType.LLM_REQUEST,
-                    step=step,
-                    task_v2=task_v2,
-                    thought=thought,
-                    ai_suggestion=ai_suggestion,
+                    **artifact_targets,
                )

            t_llm_request = time.perf_counter()
@@ -966,14 +969,11 @@ class LLMAPIHandlerFactory:
                raise LLMProviderError(llm_key) from e

            llm_response_json = response.model_dump_json(indent=2)
-            if step and not is_speculative_step:
+            if should_persist_llm_artifacts:
                await app.ARTIFACT_MANAGER.create_llm_artifact(
                    data=llm_response_json.encode("utf-8"),
                    artifact_type=ArtifactType.LLM_RESPONSE,
-                    step=step,
-                    task_v2=task_v2,
-                    thought=thought,
-                    ai_suggestion=ai_suggestion,
+                    **artifact_targets,
                )

            prompt_tokens = 0
@@ -1036,14 +1036,11 @@ class LLMAPIHandlerFactory:
                )
            parsed_response = parse_api_response(response, llm_config.add_assistant_prefix, force_dict)
            parsed_response_json = json.dumps(parsed_response, indent=2)
-            if step and not is_speculative_step:
+            if should_persist_llm_artifacts:
                await app.ARTIFACT_MANAGER.create_llm_artifact(
                    data=parsed_response_json.encode("utf-8"),
                    artifact_type=ArtifactType.LLM_RESPONSE_PARSED,
-                    step=step,
-                    task_v2=task_v2,
-                    thought=thought,
-                    ai_suggestion=ai_suggestion,
+                    **artifact_targets,
                )

            rendered_response_json = None
@@ -1052,14 +1049,11 @@ class LLMAPIHandlerFactory:
                rendered_content = Template(llm_content).render(context.hashed_href_map)
                parsed_response = json.loads(rendered_content)
                rendered_response_json = json.dumps(parsed_response, indent=2)
-                if step and not is_speculative_step:
+                if should_persist_llm_artifacts:
                    await app.ARTIFACT_MANAGER.create_llm_artifact(
                        data=rendered_response_json.encode("utf-8"),
                        artifact_type=ArtifactType.LLM_RESPONSE_RENDERED,
-                        step=step,
-                        task_v2=task_v2,
-                        thought=thought,
-                        ai_suggestion=ai_suggestion,
+                        **artifact_targets,
                    )

            # Track LLM API handler duration, token counts, and cost
@@ -1217,6 +1211,9 @@ class LLMCaller:

        context = skyvern_context.current()
        is_speculative_step = step.is_speculative if step else False
+        should_persist_llm_artifacts, artifact_targets = _get_artifact_targets_and_persist_flag(
+            step, is_speculative_step, task_v2, thought, ai_suggestion
+        )
        await _log_hashed_href_map_artifacts_if_needed(
            context,
            step,
@@ -1244,15 +1241,12 @@ class LLMCaller:
            screenshots = resize_screenshots(screenshots, target_dimension)

        llm_prompt_value = prompt or ""
-        if prompt and step and not is_speculative_step:
+        if prompt and should_persist_llm_artifacts:
            await app.ARTIFACT_MANAGER.create_llm_artifact(
                data=prompt.encode("utf-8"),
                artifact_type=ArtifactType.LLM_PROMPT,
                screenshots=screenshots,
-                step=step,
-                task_v2=task_v2,
-                thought=thought,
-                ai_suggestion=ai_suggestion,
+                **artifact_targets,
            )

        if not self.llm_config.supports_vision:
@@ -1283,14 +1277,11 @@ class LLMCaller:
            **parameters,
        }
        llm_request_json = json.dumps(llm_request_payload)
-        if step and not is_speculative_step:
+        if should_persist_llm_artifacts:
            await app.ARTIFACT_MANAGER.create_llm_artifact(
                data=llm_request_json.encode("utf-8"),
                artifact_type=ArtifactType.LLM_REQUEST,
-                step=step,
-                task_v2=task_v2,
-                thought=thought,
-                ai_suggestion=ai_suggestion,
+                **artifact_targets,
            )
        t_llm_request = time.perf_counter()
        try:
@@ -1337,14 +1328,11 @@ class LLMCaller:
            raise LLMProviderError(self.llm_key) from e

        llm_response_json = response.model_dump_json(indent=2)
-        if step and not is_speculative_step:
+        if should_persist_llm_artifacts:
            await app.ARTIFACT_MANAGER.create_llm_artifact(
                data=llm_response_json.encode("utf-8"),
                artifact_type=ArtifactType.LLM_RESPONSE,
-                step=step,
-                task_v2=task_v2,
-                thought=thought,
-                ai_suggestion=ai_suggestion,
+                **artifact_targets,
            )

        call_stats = await self.get_call_stats(response)
@@ -1397,14 +1385,11 @@ class LLMCaller:

        parsed_response = parse_api_response(response, self.llm_config.add_assistant_prefix, force_dict)
        parsed_response_json = json.dumps(parsed_response, indent=2)
-        if step and not is_speculative_step:
+        if should_persist_llm_artifacts:
            await app.ARTIFACT_MANAGER.create_llm_artifact(
                data=parsed_response_json.encode("utf-8"),
                artifact_type=ArtifactType.LLM_RESPONSE_PARSED,
-                step=step,
-                task_v2=task_v2,
-                thought=thought,
-                ai_suggestion=ai_suggestion,
+                **artifact_targets,
            )

        rendered_response_json = None
@@ -1413,14 +1398,11 @@ class LLMCaller:
            rendered_content = Template(llm_content).render(context.hashed_href_map)
            parsed_response = json.loads(rendered_content)
            rendered_response_json = json.dumps(parsed_response, indent=2)
-            if step and not is_speculative_step:
+            if should_persist_llm_artifacts:
                await app.ARTIFACT_MANAGER.create_llm_artifact(
                    data=rendered_response_json.encode("utf-8"),
                    artifact_type=ArtifactType.LLM_RESPONSE_RENDERED,
-                    step=step,
-                    task_v2=task_v2,
-                    thought=thought,
-                    ai_suggestion=ai_suggestion,
+                    **artifact_targets,
                )

        if step and is_speculative_step:
--- a/skyvern/forge/sdk/artifact/manager.py
+++ b/skyvern/forge/sdk/artifact/manager.py
@@ -52,6 +52,8 @@ class ArtifactManager:
            task_id = context.task_id
        if not run_id and context:
            run_id = context.run_id
+        if not workflow_run_block_id and context:
+            workflow_run_block_id = context.parent_workflow_run_block_id

        artifact = await app.DATABASE.create_artifact(
            artifact_id,
@@ -157,6 +159,8 @@ class ArtifactManager:
            uri=uri,
            thought_id=thought.observer_thought_id,
            task_v2_id=thought.observer_cruise_id,
+            workflow_run_id=thought.workflow_run_id,
+            workflow_run_block_id=thought.workflow_run_block_id,
            organization_id=thought.organization_id,
            data=data,
            path=path,
@@ -182,6 +186,7 @@ class ArtifactManager:
            artifact_type=artifact_type,
            uri=uri,
            task_v2_id=task_v2.observer_cruise_id,
+            workflow_run_id=task_v2.workflow_run_id,
            organization_id=task_v2.organization_id,
            data=data,
            path=path,
--- a/skyvern/forge/sdk/artifact/models.py
+++ b/skyvern/forge/sdk/artifact/models.py
@@ -76,6 +76,7 @@ class Artifact(BaseModel):
    step_id: str | None = None
    workflow_run_id: str | None = None
    workflow_run_block_id: str | None = None
+    run_id: str | None = None
    observer_cruise_id: str | None = None
    observer_thought_id: str | None = None
    ai_suggestion_id: str | None = None
--- a/skyvern/forge/sdk/db/utils.py
+++ b/skyvern/forge/sdk/db/utils.py
@@ -300,6 +300,7 @@ def convert_to_artifact(artifact_model: ArtifactModel, debug_enabled: bool = Fal
        step_id=artifact_model.step_id,
        workflow_run_id=artifact_model.workflow_run_id,
        workflow_run_block_id=artifact_model.workflow_run_block_id,
+        run_id=artifact_model.run_id,
        observer_cruise_id=artifact_model.observer_cruise_id,
        observer_thought_id=artifact_model.observer_thought_id,
        created_at=artifact_model.created_at,