always capture llm artifacts (#4284)
This commit is contained in:
@@ -74,6 +74,24 @@ class LLMCallStats(BaseModel):
|
||||
llm_cost: float | None = None
|
||||
|
||||
|
||||
def _get_artifact_targets_and_persist_flag(
|
||||
step: Step | None,
|
||||
is_speculative_step: bool,
|
||||
task_v2: TaskV2 | None,
|
||||
thought: Thought | None,
|
||||
ai_suggestion: AISuggestion | None,
|
||||
) -> tuple[bool, dict[str, Any]]:
|
||||
artifact_targets = {
|
||||
"step": step if not is_speculative_step else None,
|
||||
"task_v2": task_v2,
|
||||
"thought": thought,
|
||||
"ai_suggestion": ai_suggestion,
|
||||
}
|
||||
has_artifact_target = any(value is not None for value in artifact_targets.values())
|
||||
should_persist_llm_artifacts = not is_speculative_step and has_artifact_target
|
||||
return should_persist_llm_artifacts, artifact_targets
|
||||
|
||||
|
||||
async def _log_hashed_href_map_artifacts_if_needed(
|
||||
context: SkyvernContext | None,
|
||||
step: Step | None,
|
||||
@@ -83,14 +101,14 @@ async def _log_hashed_href_map_artifacts_if_needed(
|
||||
*,
|
||||
is_speculative_step: bool,
|
||||
) -> None:
|
||||
if context and context.hashed_href_map and step and not is_speculative_step:
|
||||
should_persist_llm_artifacts, artifact_targets = _get_artifact_targets_and_persist_flag(
|
||||
step, is_speculative_step, task_v2, thought, ai_suggestion
|
||||
)
|
||||
if context and context.hashed_href_map and should_persist_llm_artifacts:
|
||||
await app.ARTIFACT_MANAGER.create_llm_artifact(
|
||||
data=json.dumps(context.hashed_href_map, indent=2).encode("utf-8"),
|
||||
artifact_type=ArtifactType.HASHED_HREF_MAP,
|
||||
step=step,
|
||||
task_v2=task_v2,
|
||||
thought=thought,
|
||||
ai_suggestion=ai_suggestion,
|
||||
**artifact_targets,
|
||||
)
|
||||
|
||||
|
||||
@@ -391,6 +409,9 @@ class LLMAPIHandlerFactory:
|
||||
|
||||
context = skyvern_context.current()
|
||||
is_speculative_step = step.is_speculative if step else False
|
||||
should_persist_llm_artifacts, artifact_targets = _get_artifact_targets_and_persist_flag(
|
||||
step, is_speculative_step, task_v2, thought, ai_suggestion
|
||||
)
|
||||
await _log_hashed_href_map_artifacts_if_needed(
|
||||
context,
|
||||
step,
|
||||
@@ -401,15 +422,12 @@ class LLMAPIHandlerFactory:
|
||||
)
|
||||
|
||||
llm_prompt_value = prompt
|
||||
|
||||
if step and not is_speculative_step:
|
||||
if should_persist_llm_artifacts:
|
||||
await app.ARTIFACT_MANAGER.create_llm_artifact(
|
||||
data=llm_prompt_value.encode("utf-8"),
|
||||
artifact_type=ArtifactType.LLM_PROMPT,
|
||||
screenshots=screenshots,
|
||||
step=step,
|
||||
task_v2=task_v2,
|
||||
thought=thought,
|
||||
**artifact_targets,
|
||||
)
|
||||
# Build messages and apply caching in one step
|
||||
messages = await llm_messages_builder(prompt, screenshots, llm_config.add_assistant_prefix)
|
||||
@@ -422,14 +440,11 @@ class LLMAPIHandlerFactory:
|
||||
"vertex_cache_attached": vertex_cache_attached_flag,
|
||||
}
|
||||
llm_request_json = json.dumps(llm_request_payload)
|
||||
if step and not is_speculative_step:
|
||||
if should_persist_llm_artifacts:
|
||||
await app.ARTIFACT_MANAGER.create_llm_artifact(
|
||||
data=llm_request_json.encode("utf-8"),
|
||||
artifact_type=ArtifactType.LLM_REQUEST,
|
||||
step=step,
|
||||
task_v2=task_v2,
|
||||
thought=thought,
|
||||
ai_suggestion=ai_suggestion,
|
||||
**artifact_targets,
|
||||
)
|
||||
return llm_request_json
|
||||
|
||||
@@ -595,14 +610,11 @@ class LLMAPIHandlerFactory:
|
||||
raise LLMProviderError(llm_key) from e
|
||||
|
||||
llm_response_json = response.model_dump_json(indent=2)
|
||||
if step and not is_speculative_step:
|
||||
if should_persist_llm_artifacts:
|
||||
await app.ARTIFACT_MANAGER.create_llm_artifact(
|
||||
data=llm_response_json.encode("utf-8"),
|
||||
artifact_type=ArtifactType.LLM_RESPONSE,
|
||||
step=step,
|
||||
task_v2=task_v2,
|
||||
thought=thought,
|
||||
ai_suggestion=ai_suggestion,
|
||||
**artifact_targets,
|
||||
)
|
||||
prompt_tokens = 0
|
||||
completion_tokens = 0
|
||||
@@ -661,14 +673,11 @@ class LLMAPIHandlerFactory:
|
||||
)
|
||||
parsed_response = parse_api_response(response, llm_config.add_assistant_prefix, force_dict)
|
||||
parsed_response_json = json.dumps(parsed_response, indent=2)
|
||||
if step and not is_speculative_step:
|
||||
if should_persist_llm_artifacts:
|
||||
await app.ARTIFACT_MANAGER.create_llm_artifact(
|
||||
data=parsed_response_json.encode("utf-8"),
|
||||
artifact_type=ArtifactType.LLM_RESPONSE_PARSED,
|
||||
step=step,
|
||||
task_v2=task_v2,
|
||||
thought=thought,
|
||||
ai_suggestion=ai_suggestion,
|
||||
**artifact_targets,
|
||||
)
|
||||
|
||||
rendered_response_json = None
|
||||
@@ -677,14 +686,11 @@ class LLMAPIHandlerFactory:
|
||||
rendered_content = Template(llm_content).render(context.hashed_href_map)
|
||||
parsed_response = json.loads(rendered_content)
|
||||
rendered_response_json = json.dumps(parsed_response, indent=2)
|
||||
if step and not is_speculative_step:
|
||||
if should_persist_llm_artifacts:
|
||||
await app.ARTIFACT_MANAGER.create_llm_artifact(
|
||||
data=rendered_response_json.encode("utf-8"),
|
||||
artifact_type=ArtifactType.LLM_RESPONSE_RENDERED,
|
||||
step=step,
|
||||
task_v2=task_v2,
|
||||
thought=thought,
|
||||
ai_suggestion=ai_suggestion,
|
||||
**artifact_targets,
|
||||
)
|
||||
|
||||
# Track LLM API handler duration, token counts, and cost
|
||||
@@ -796,6 +802,9 @@ class LLMAPIHandlerFactory:
|
||||
|
||||
context = skyvern_context.current()
|
||||
is_speculative_step = step.is_speculative if step else False
|
||||
should_persist_llm_artifacts, artifact_targets = _get_artifact_targets_and_persist_flag(
|
||||
step, is_speculative_step, task_v2, thought, ai_suggestion
|
||||
)
|
||||
await _log_hashed_href_map_artifacts_if_needed(
|
||||
context,
|
||||
step,
|
||||
@@ -806,15 +815,12 @@ class LLMAPIHandlerFactory:
|
||||
)
|
||||
|
||||
llm_prompt_value = prompt
|
||||
if step and not is_speculative_step:
|
||||
if should_persist_llm_artifacts:
|
||||
await app.ARTIFACT_MANAGER.create_llm_artifact(
|
||||
data=llm_prompt_value.encode("utf-8"),
|
||||
artifact_type=ArtifactType.LLM_PROMPT,
|
||||
screenshots=screenshots,
|
||||
step=step,
|
||||
task_v2=task_v2,
|
||||
thought=thought,
|
||||
ai_suggestion=ai_suggestion,
|
||||
**artifact_targets,
|
||||
)
|
||||
|
||||
if not llm_config.supports_vision:
|
||||
@@ -900,14 +906,11 @@ class LLMAPIHandlerFactory:
|
||||
"vertex_cache_attached": vertex_cache_attached,
|
||||
}
|
||||
llm_request_json = json.dumps(llm_request_payload)
|
||||
if step and not is_speculative_step:
|
||||
if should_persist_llm_artifacts:
|
||||
await app.ARTIFACT_MANAGER.create_llm_artifact(
|
||||
data=llm_request_json.encode("utf-8"),
|
||||
artifact_type=ArtifactType.LLM_REQUEST,
|
||||
step=step,
|
||||
task_v2=task_v2,
|
||||
thought=thought,
|
||||
ai_suggestion=ai_suggestion,
|
||||
**artifact_targets,
|
||||
)
|
||||
|
||||
t_llm_request = time.perf_counter()
|
||||
@@ -966,14 +969,11 @@ class LLMAPIHandlerFactory:
|
||||
raise LLMProviderError(llm_key) from e
|
||||
|
||||
llm_response_json = response.model_dump_json(indent=2)
|
||||
if step and not is_speculative_step:
|
||||
if should_persist_llm_artifacts:
|
||||
await app.ARTIFACT_MANAGER.create_llm_artifact(
|
||||
data=llm_response_json.encode("utf-8"),
|
||||
artifact_type=ArtifactType.LLM_RESPONSE,
|
||||
step=step,
|
||||
task_v2=task_v2,
|
||||
thought=thought,
|
||||
ai_suggestion=ai_suggestion,
|
||||
**artifact_targets,
|
||||
)
|
||||
|
||||
prompt_tokens = 0
|
||||
@@ -1036,14 +1036,11 @@ class LLMAPIHandlerFactory:
|
||||
)
|
||||
parsed_response = parse_api_response(response, llm_config.add_assistant_prefix, force_dict)
|
||||
parsed_response_json = json.dumps(parsed_response, indent=2)
|
||||
if step and not is_speculative_step:
|
||||
if should_persist_llm_artifacts:
|
||||
await app.ARTIFACT_MANAGER.create_llm_artifact(
|
||||
data=parsed_response_json.encode("utf-8"),
|
||||
artifact_type=ArtifactType.LLM_RESPONSE_PARSED,
|
||||
step=step,
|
||||
task_v2=task_v2,
|
||||
thought=thought,
|
||||
ai_suggestion=ai_suggestion,
|
||||
**artifact_targets,
|
||||
)
|
||||
|
||||
rendered_response_json = None
|
||||
@@ -1052,14 +1049,11 @@ class LLMAPIHandlerFactory:
|
||||
rendered_content = Template(llm_content).render(context.hashed_href_map)
|
||||
parsed_response = json.loads(rendered_content)
|
||||
rendered_response_json = json.dumps(parsed_response, indent=2)
|
||||
if step and not is_speculative_step:
|
||||
if should_persist_llm_artifacts:
|
||||
await app.ARTIFACT_MANAGER.create_llm_artifact(
|
||||
data=rendered_response_json.encode("utf-8"),
|
||||
artifact_type=ArtifactType.LLM_RESPONSE_RENDERED,
|
||||
step=step,
|
||||
task_v2=task_v2,
|
||||
thought=thought,
|
||||
ai_suggestion=ai_suggestion,
|
||||
**artifact_targets,
|
||||
)
|
||||
|
||||
# Track LLM API handler duration, token counts, and cost
|
||||
@@ -1217,6 +1211,9 @@ class LLMCaller:
|
||||
|
||||
context = skyvern_context.current()
|
||||
is_speculative_step = step.is_speculative if step else False
|
||||
should_persist_llm_artifacts, artifact_targets = _get_artifact_targets_and_persist_flag(
|
||||
step, is_speculative_step, task_v2, thought, ai_suggestion
|
||||
)
|
||||
await _log_hashed_href_map_artifacts_if_needed(
|
||||
context,
|
||||
step,
|
||||
@@ -1244,15 +1241,12 @@ class LLMCaller:
|
||||
screenshots = resize_screenshots(screenshots, target_dimension)
|
||||
|
||||
llm_prompt_value = prompt or ""
|
||||
if prompt and step and not is_speculative_step:
|
||||
if prompt and should_persist_llm_artifacts:
|
||||
await app.ARTIFACT_MANAGER.create_llm_artifact(
|
||||
data=prompt.encode("utf-8"),
|
||||
artifact_type=ArtifactType.LLM_PROMPT,
|
||||
screenshots=screenshots,
|
||||
step=step,
|
||||
task_v2=task_v2,
|
||||
thought=thought,
|
||||
ai_suggestion=ai_suggestion,
|
||||
**artifact_targets,
|
||||
)
|
||||
|
||||
if not self.llm_config.supports_vision:
|
||||
@@ -1283,14 +1277,11 @@ class LLMCaller:
|
||||
**parameters,
|
||||
}
|
||||
llm_request_json = json.dumps(llm_request_payload)
|
||||
if step and not is_speculative_step:
|
||||
if should_persist_llm_artifacts:
|
||||
await app.ARTIFACT_MANAGER.create_llm_artifact(
|
||||
data=llm_request_json.encode("utf-8"),
|
||||
artifact_type=ArtifactType.LLM_REQUEST,
|
||||
step=step,
|
||||
task_v2=task_v2,
|
||||
thought=thought,
|
||||
ai_suggestion=ai_suggestion,
|
||||
**artifact_targets,
|
||||
)
|
||||
t_llm_request = time.perf_counter()
|
||||
try:
|
||||
@@ -1337,14 +1328,11 @@ class LLMCaller:
|
||||
raise LLMProviderError(self.llm_key) from e
|
||||
|
||||
llm_response_json = response.model_dump_json(indent=2)
|
||||
if step and not is_speculative_step:
|
||||
if should_persist_llm_artifacts:
|
||||
await app.ARTIFACT_MANAGER.create_llm_artifact(
|
||||
data=llm_response_json.encode("utf-8"),
|
||||
artifact_type=ArtifactType.LLM_RESPONSE,
|
||||
step=step,
|
||||
task_v2=task_v2,
|
||||
thought=thought,
|
||||
ai_suggestion=ai_suggestion,
|
||||
**artifact_targets,
|
||||
)
|
||||
|
||||
call_stats = await self.get_call_stats(response)
|
||||
@@ -1397,14 +1385,11 @@ class LLMCaller:
|
||||
|
||||
parsed_response = parse_api_response(response, self.llm_config.add_assistant_prefix, force_dict)
|
||||
parsed_response_json = json.dumps(parsed_response, indent=2)
|
||||
if step and not is_speculative_step:
|
||||
if should_persist_llm_artifacts:
|
||||
await app.ARTIFACT_MANAGER.create_llm_artifact(
|
||||
data=parsed_response_json.encode("utf-8"),
|
||||
artifact_type=ArtifactType.LLM_RESPONSE_PARSED,
|
||||
step=step,
|
||||
task_v2=task_v2,
|
||||
thought=thought,
|
||||
ai_suggestion=ai_suggestion,
|
||||
**artifact_targets,
|
||||
)
|
||||
|
||||
rendered_response_json = None
|
||||
@@ -1413,14 +1398,11 @@ class LLMCaller:
|
||||
rendered_content = Template(llm_content).render(context.hashed_href_map)
|
||||
parsed_response = json.loads(rendered_content)
|
||||
rendered_response_json = json.dumps(parsed_response, indent=2)
|
||||
if step and not is_speculative_step:
|
||||
if should_persist_llm_artifacts:
|
||||
await app.ARTIFACT_MANAGER.create_llm_artifact(
|
||||
data=rendered_response_json.encode("utf-8"),
|
||||
artifact_type=ArtifactType.LLM_RESPONSE_RENDERED,
|
||||
step=step,
|
||||
task_v2=task_v2,
|
||||
thought=thought,
|
||||
ai_suggestion=ai_suggestion,
|
||||
**artifact_targets,
|
||||
)
|
||||
|
||||
if step and is_speculative_step:
|
||||
|
||||
@@ -52,6 +52,8 @@ class ArtifactManager:
|
||||
task_id = context.task_id
|
||||
if not run_id and context:
|
||||
run_id = context.run_id
|
||||
if not workflow_run_block_id and context:
|
||||
workflow_run_block_id = context.parent_workflow_run_block_id
|
||||
|
||||
artifact = await app.DATABASE.create_artifact(
|
||||
artifact_id,
|
||||
@@ -157,6 +159,8 @@ class ArtifactManager:
|
||||
uri=uri,
|
||||
thought_id=thought.observer_thought_id,
|
||||
task_v2_id=thought.observer_cruise_id,
|
||||
workflow_run_id=thought.workflow_run_id,
|
||||
workflow_run_block_id=thought.workflow_run_block_id,
|
||||
organization_id=thought.organization_id,
|
||||
data=data,
|
||||
path=path,
|
||||
@@ -182,6 +186,7 @@ class ArtifactManager:
|
||||
artifact_type=artifact_type,
|
||||
uri=uri,
|
||||
task_v2_id=task_v2.observer_cruise_id,
|
||||
workflow_run_id=task_v2.workflow_run_id,
|
||||
organization_id=task_v2.organization_id,
|
||||
data=data,
|
||||
path=path,
|
||||
|
||||
@@ -76,6 +76,7 @@ class Artifact(BaseModel):
|
||||
step_id: str | None = None
|
||||
workflow_run_id: str | None = None
|
||||
workflow_run_block_id: str | None = None
|
||||
run_id: str | None = None
|
||||
observer_cruise_id: str | None = None
|
||||
observer_thought_id: str | None = None
|
||||
ai_suggestion_id: str | None = None
|
||||
|
||||
@@ -300,6 +300,7 @@ def convert_to_artifact(artifact_model: ArtifactModel, debug_enabled: bool = Fal
|
||||
step_id=artifact_model.step_id,
|
||||
workflow_run_id=artifact_model.workflow_run_id,
|
||||
workflow_run_block_id=artifact_model.workflow_run_block_id,
|
||||
run_id=artifact_model.run_id,
|
||||
observer_cruise_id=artifact_model.observer_cruise_id,
|
||||
observer_thought_id=artifact_model.observer_thought_id,
|
||||
created_at=artifact_model.created_at,
|
||||
|
||||
Reference in New Issue
Block a user