always capture llm artifacts (#4284)

This commit is contained in:
pedrohsdb
2025-12-12 10:30:36 -08:00
committed by GitHub
parent 1c22a79a71
commit 99a477e0dd
4 changed files with 69 additions and 80 deletions

View File

@@ -74,6 +74,24 @@ class LLMCallStats(BaseModel):
llm_cost: float | None = None
def _get_artifact_targets_and_persist_flag(
step: Step | None,
is_speculative_step: bool,
task_v2: TaskV2 | None,
thought: Thought | None,
ai_suggestion: AISuggestion | None,
) -> tuple[bool, dict[str, Any]]:
artifact_targets = {
"step": step if not is_speculative_step else None,
"task_v2": task_v2,
"thought": thought,
"ai_suggestion": ai_suggestion,
}
has_artifact_target = any(value is not None for value in artifact_targets.values())
should_persist_llm_artifacts = not is_speculative_step and has_artifact_target
return should_persist_llm_artifacts, artifact_targets
async def _log_hashed_href_map_artifacts_if_needed(
context: SkyvernContext | None,
step: Step | None,
@@ -83,14 +101,14 @@ async def _log_hashed_href_map_artifacts_if_needed(
*,
is_speculative_step: bool,
) -> None:
if context and context.hashed_href_map and step and not is_speculative_step:
should_persist_llm_artifacts, artifact_targets = _get_artifact_targets_and_persist_flag(
step, is_speculative_step, task_v2, thought, ai_suggestion
)
if context and context.hashed_href_map and should_persist_llm_artifacts:
await app.ARTIFACT_MANAGER.create_llm_artifact(
data=json.dumps(context.hashed_href_map, indent=2).encode("utf-8"),
artifact_type=ArtifactType.HASHED_HREF_MAP,
step=step,
task_v2=task_v2,
thought=thought,
ai_suggestion=ai_suggestion,
**artifact_targets,
)
@@ -391,6 +409,9 @@ class LLMAPIHandlerFactory:
context = skyvern_context.current()
is_speculative_step = step.is_speculative if step else False
should_persist_llm_artifacts, artifact_targets = _get_artifact_targets_and_persist_flag(
step, is_speculative_step, task_v2, thought, ai_suggestion
)
await _log_hashed_href_map_artifacts_if_needed(
context,
step,
@@ -401,15 +422,12 @@ class LLMAPIHandlerFactory:
)
llm_prompt_value = prompt
if step and not is_speculative_step:
if should_persist_llm_artifacts:
await app.ARTIFACT_MANAGER.create_llm_artifact(
data=llm_prompt_value.encode("utf-8"),
artifact_type=ArtifactType.LLM_PROMPT,
screenshots=screenshots,
step=step,
task_v2=task_v2,
thought=thought,
**artifact_targets,
)
# Build messages and apply caching in one step
messages = await llm_messages_builder(prompt, screenshots, llm_config.add_assistant_prefix)
@@ -422,14 +440,11 @@ class LLMAPIHandlerFactory:
"vertex_cache_attached": vertex_cache_attached_flag,
}
llm_request_json = json.dumps(llm_request_payload)
if step and not is_speculative_step:
if should_persist_llm_artifacts:
await app.ARTIFACT_MANAGER.create_llm_artifact(
data=llm_request_json.encode("utf-8"),
artifact_type=ArtifactType.LLM_REQUEST,
step=step,
task_v2=task_v2,
thought=thought,
ai_suggestion=ai_suggestion,
**artifact_targets,
)
return llm_request_json
@@ -595,14 +610,11 @@ class LLMAPIHandlerFactory:
raise LLMProviderError(llm_key) from e
llm_response_json = response.model_dump_json(indent=2)
if step and not is_speculative_step:
if should_persist_llm_artifacts:
await app.ARTIFACT_MANAGER.create_llm_artifact(
data=llm_response_json.encode("utf-8"),
artifact_type=ArtifactType.LLM_RESPONSE,
step=step,
task_v2=task_v2,
thought=thought,
ai_suggestion=ai_suggestion,
**artifact_targets,
)
prompt_tokens = 0
completion_tokens = 0
@@ -661,14 +673,11 @@ class LLMAPIHandlerFactory:
)
parsed_response = parse_api_response(response, llm_config.add_assistant_prefix, force_dict)
parsed_response_json = json.dumps(parsed_response, indent=2)
if step and not is_speculative_step:
if should_persist_llm_artifacts:
await app.ARTIFACT_MANAGER.create_llm_artifact(
data=parsed_response_json.encode("utf-8"),
artifact_type=ArtifactType.LLM_RESPONSE_PARSED,
step=step,
task_v2=task_v2,
thought=thought,
ai_suggestion=ai_suggestion,
**artifact_targets,
)
rendered_response_json = None
@@ -677,14 +686,11 @@ class LLMAPIHandlerFactory:
rendered_content = Template(llm_content).render(context.hashed_href_map)
parsed_response = json.loads(rendered_content)
rendered_response_json = json.dumps(parsed_response, indent=2)
if step and not is_speculative_step:
if should_persist_llm_artifacts:
await app.ARTIFACT_MANAGER.create_llm_artifact(
data=rendered_response_json.encode("utf-8"),
artifact_type=ArtifactType.LLM_RESPONSE_RENDERED,
step=step,
task_v2=task_v2,
thought=thought,
ai_suggestion=ai_suggestion,
**artifact_targets,
)
# Track LLM API handler duration, token counts, and cost
@@ -796,6 +802,9 @@ class LLMAPIHandlerFactory:
context = skyvern_context.current()
is_speculative_step = step.is_speculative if step else False
should_persist_llm_artifacts, artifact_targets = _get_artifact_targets_and_persist_flag(
step, is_speculative_step, task_v2, thought, ai_suggestion
)
await _log_hashed_href_map_artifacts_if_needed(
context,
step,
@@ -806,15 +815,12 @@ class LLMAPIHandlerFactory:
)
llm_prompt_value = prompt
if step and not is_speculative_step:
if should_persist_llm_artifacts:
await app.ARTIFACT_MANAGER.create_llm_artifact(
data=llm_prompt_value.encode("utf-8"),
artifact_type=ArtifactType.LLM_PROMPT,
screenshots=screenshots,
step=step,
task_v2=task_v2,
thought=thought,
ai_suggestion=ai_suggestion,
**artifact_targets,
)
if not llm_config.supports_vision:
@@ -900,14 +906,11 @@ class LLMAPIHandlerFactory:
"vertex_cache_attached": vertex_cache_attached,
}
llm_request_json = json.dumps(llm_request_payload)
if step and not is_speculative_step:
if should_persist_llm_artifacts:
await app.ARTIFACT_MANAGER.create_llm_artifact(
data=llm_request_json.encode("utf-8"),
artifact_type=ArtifactType.LLM_REQUEST,
step=step,
task_v2=task_v2,
thought=thought,
ai_suggestion=ai_suggestion,
**artifact_targets,
)
t_llm_request = time.perf_counter()
@@ -966,14 +969,11 @@ class LLMAPIHandlerFactory:
raise LLMProviderError(llm_key) from e
llm_response_json = response.model_dump_json(indent=2)
if step and not is_speculative_step:
if should_persist_llm_artifacts:
await app.ARTIFACT_MANAGER.create_llm_artifact(
data=llm_response_json.encode("utf-8"),
artifact_type=ArtifactType.LLM_RESPONSE,
step=step,
task_v2=task_v2,
thought=thought,
ai_suggestion=ai_suggestion,
**artifact_targets,
)
prompt_tokens = 0
@@ -1036,14 +1036,11 @@ class LLMAPIHandlerFactory:
)
parsed_response = parse_api_response(response, llm_config.add_assistant_prefix, force_dict)
parsed_response_json = json.dumps(parsed_response, indent=2)
if step and not is_speculative_step:
if should_persist_llm_artifacts:
await app.ARTIFACT_MANAGER.create_llm_artifact(
data=parsed_response_json.encode("utf-8"),
artifact_type=ArtifactType.LLM_RESPONSE_PARSED,
step=step,
task_v2=task_v2,
thought=thought,
ai_suggestion=ai_suggestion,
**artifact_targets,
)
rendered_response_json = None
@@ -1052,14 +1049,11 @@ class LLMAPIHandlerFactory:
rendered_content = Template(llm_content).render(context.hashed_href_map)
parsed_response = json.loads(rendered_content)
rendered_response_json = json.dumps(parsed_response, indent=2)
if step and not is_speculative_step:
if should_persist_llm_artifacts:
await app.ARTIFACT_MANAGER.create_llm_artifact(
data=rendered_response_json.encode("utf-8"),
artifact_type=ArtifactType.LLM_RESPONSE_RENDERED,
step=step,
task_v2=task_v2,
thought=thought,
ai_suggestion=ai_suggestion,
**artifact_targets,
)
# Track LLM API handler duration, token counts, and cost
@@ -1217,6 +1211,9 @@ class LLMCaller:
context = skyvern_context.current()
is_speculative_step = step.is_speculative if step else False
should_persist_llm_artifacts, artifact_targets = _get_artifact_targets_and_persist_flag(
step, is_speculative_step, task_v2, thought, ai_suggestion
)
await _log_hashed_href_map_artifacts_if_needed(
context,
step,
@@ -1244,15 +1241,12 @@ class LLMCaller:
screenshots = resize_screenshots(screenshots, target_dimension)
llm_prompt_value = prompt or ""
if prompt and step and not is_speculative_step:
if prompt and should_persist_llm_artifacts:
await app.ARTIFACT_MANAGER.create_llm_artifact(
data=prompt.encode("utf-8"),
artifact_type=ArtifactType.LLM_PROMPT,
screenshots=screenshots,
step=step,
task_v2=task_v2,
thought=thought,
ai_suggestion=ai_suggestion,
**artifact_targets,
)
if not self.llm_config.supports_vision:
@@ -1283,14 +1277,11 @@ class LLMCaller:
**parameters,
}
llm_request_json = json.dumps(llm_request_payload)
if step and not is_speculative_step:
if should_persist_llm_artifacts:
await app.ARTIFACT_MANAGER.create_llm_artifact(
data=llm_request_json.encode("utf-8"),
artifact_type=ArtifactType.LLM_REQUEST,
step=step,
task_v2=task_v2,
thought=thought,
ai_suggestion=ai_suggestion,
**artifact_targets,
)
t_llm_request = time.perf_counter()
try:
@@ -1337,14 +1328,11 @@ class LLMCaller:
raise LLMProviderError(self.llm_key) from e
llm_response_json = response.model_dump_json(indent=2)
if step and not is_speculative_step:
if should_persist_llm_artifacts:
await app.ARTIFACT_MANAGER.create_llm_artifact(
data=llm_response_json.encode("utf-8"),
artifact_type=ArtifactType.LLM_RESPONSE,
step=step,
task_v2=task_v2,
thought=thought,
ai_suggestion=ai_suggestion,
**artifact_targets,
)
call_stats = await self.get_call_stats(response)
@@ -1397,14 +1385,11 @@ class LLMCaller:
parsed_response = parse_api_response(response, self.llm_config.add_assistant_prefix, force_dict)
parsed_response_json = json.dumps(parsed_response, indent=2)
if step and not is_speculative_step:
if should_persist_llm_artifacts:
await app.ARTIFACT_MANAGER.create_llm_artifact(
data=parsed_response_json.encode("utf-8"),
artifact_type=ArtifactType.LLM_RESPONSE_PARSED,
step=step,
task_v2=task_v2,
thought=thought,
ai_suggestion=ai_suggestion,
**artifact_targets,
)
rendered_response_json = None
@@ -1413,14 +1398,11 @@ class LLMCaller:
rendered_content = Template(llm_content).render(context.hashed_href_map)
parsed_response = json.loads(rendered_content)
rendered_response_json = json.dumps(parsed_response, indent=2)
if step and not is_speculative_step:
if should_persist_llm_artifacts:
await app.ARTIFACT_MANAGER.create_llm_artifact(
data=rendered_response_json.encode("utf-8"),
artifact_type=ArtifactType.LLM_RESPONSE_RENDERED,
step=step,
task_v2=task_v2,
thought=thought,
ai_suggestion=ai_suggestion,
**artifact_targets,
)
if step and is_speculative_step:

View File

@@ -52,6 +52,8 @@ class ArtifactManager:
task_id = context.task_id
if not run_id and context:
run_id = context.run_id
if not workflow_run_block_id and context:
workflow_run_block_id = context.parent_workflow_run_block_id
artifact = await app.DATABASE.create_artifact(
artifact_id,
@@ -157,6 +159,8 @@ class ArtifactManager:
uri=uri,
thought_id=thought.observer_thought_id,
task_v2_id=thought.observer_cruise_id,
workflow_run_id=thought.workflow_run_id,
workflow_run_block_id=thought.workflow_run_block_id,
organization_id=thought.organization_id,
data=data,
path=path,
@@ -182,6 +186,7 @@ class ArtifactManager:
artifact_type=artifact_type,
uri=uri,
task_v2_id=task_v2.observer_cruise_id,
workflow_run_id=task_v2.workflow_run_id,
organization_id=task_v2.organization_id,
data=data,
path=path,

View File

@@ -76,6 +76,7 @@ class Artifact(BaseModel):
step_id: str | None = None
workflow_run_id: str | None = None
workflow_run_block_id: str | None = None
run_id: str | None = None
observer_cruise_id: str | None = None
observer_thought_id: str | None = None
ai_suggestion_id: str | None = None

View File

@@ -300,6 +300,7 @@ def convert_to_artifact(artifact_model: ArtifactModel, debug_enabled: bool = Fal
step_id=artifact_model.step_id,
workflow_run_id=artifact_model.workflow_run_id,
workflow_run_block_id=artifact_model.workflow_run_block_id,
run_id=artifact_model.run_id,
observer_cruise_id=artifact_model.observer_cruise_id,
observer_thought_id=artifact_model.observer_thought_id,
created_at=artifact_model.created_at,