Files
Dorod-Sky/skyvern/forge/sdk/artifact/manager.py

390 lines
14 KiB
Python
Raw Normal View History

import asyncio
import time
from collections import defaultdict
import structlog
from skyvern.forge import app
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType, LogEntityType
from skyvern.forge.sdk.core import skyvern_context
from skyvern.forge.sdk.db.id import generate_artifact_id
from skyvern.forge.sdk.models import Step
2025-01-08 21:45:38 -08:00
from skyvern.forge.sdk.schemas.ai_suggestions import AISuggestion
from skyvern.forge.sdk.schemas.task_v2 import TaskV2, Thought
from skyvern.forge.sdk.schemas.workflow_runs import WorkflowRunBlock
LOG = structlog.get_logger(__name__)
class ArtifactManager:
# task_id -> list of aio_tasks for uploading artifacts
upload_aiotasks_map: dict[str, list[asyncio.Task[None]]] = defaultdict(list)
2024-12-07 12:22:11 -08:00
async def _create_artifact(
self,
2024-12-07 12:22:11 -08:00
aio_task_primary_key: str,
artifact_id: str,
artifact_type: ArtifactType,
2024-12-07 12:22:11 -08:00
uri: str,
organization_id: str,
2024-12-07 12:22:11 -08:00
step_id: str | None = None,
task_id: str | None = None,
workflow_run_id: str | None = None,
workflow_run_block_id: str | None = None,
thought_id: str | None = None,
task_v2_id: str | None = None,
run_id: str | None = None,
2025-01-08 21:45:38 -08:00
ai_suggestion_id: str | None = None,
data: bytes | None = None,
path: str | None = None,
) -> str:
if data is None and path is None:
raise ValueError("Either data or path must be provided to create an artifact.")
if data and path:
raise ValueError("Both data and path cannot be provided to create an artifact.")
context = skyvern_context.current()
if not workflow_run_id and context:
workflow_run_id = context.workflow_run_id
if not task_v2_id and context:
task_v2_id = context.task_v2_id
if not task_id and context:
task_id = context.task_id
if not run_id and context:
run_id = context.run_id
artifact = await app.DATABASE.create_artifact(
artifact_id,
artifact_type,
uri,
2024-12-07 12:22:11 -08:00
step_id=step_id,
task_id=task_id,
workflow_run_id=workflow_run_id,
workflow_run_block_id=workflow_run_block_id,
thought_id=thought_id,
task_v2_id=task_v2_id,
run_id=run_id,
2024-12-07 12:22:11 -08:00
organization_id=organization_id,
2025-01-08 21:45:38 -08:00
ai_suggestion_id=ai_suggestion_id,
)
if data:
# Fire and forget
aio_task = asyncio.create_task(app.STORAGE.store_artifact(artifact, data))
2024-12-07 12:22:11 -08:00
self.upload_aiotasks_map[aio_task_primary_key].append(aio_task)
elif path:
# Fire and forget
aio_task = asyncio.create_task(app.STORAGE.store_artifact_from_path(artifact, path))
2024-12-07 12:22:11 -08:00
self.upload_aiotasks_map[aio_task_primary_key].append(aio_task)
return artifact_id
2024-12-07 12:22:11 -08:00
async def create_artifact(
self,
step: Step,
artifact_type: ArtifactType,
data: bytes | None = None,
path: str | None = None,
) -> str:
artifact_id = generate_artifact_id()
uri = app.STORAGE.build_uri(
organization_id=step.organization_id, artifact_id=artifact_id, step=step, artifact_type=artifact_type
)
2024-12-07 12:22:11 -08:00
return await self._create_artifact(
aio_task_primary_key=step.task_id,
artifact_id=artifact_id,
artifact_type=artifact_type,
uri=uri,
step_id=step.step_id,
task_id=step.task_id,
organization_id=step.organization_id,
data=data,
path=path,
)
async def create_log_artifact(
self,
*,
log_entity_type: LogEntityType,
log_entity_id: str,
artifact_type: ArtifactType,
organization_id: str,
step_id: str | None = None,
task_id: str | None = None,
workflow_run_id: str | None = None,
workflow_run_block_id: str | None = None,
data: bytes | None = None,
path: str | None = None,
) -> str:
artifact_id = generate_artifact_id()
uri = app.STORAGE.build_log_uri(
organization_id=organization_id,
log_entity_type=log_entity_type,
log_entity_id=log_entity_id,
artifact_type=artifact_type,
)
return await self._create_artifact(
aio_task_primary_key=log_entity_id,
artifact_id=artifact_id,
artifact_type=artifact_type,
uri=uri,
step_id=step_id,
task_id=task_id,
workflow_run_id=workflow_run_id,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
data=data,
path=path,
)
async def create_thought_artifact(
2024-12-07 12:22:11 -08:00
self,
thought: Thought,
2024-12-07 12:22:11 -08:00
artifact_type: ArtifactType,
data: bytes | None = None,
path: str | None = None,
) -> str:
artifact_id = generate_artifact_id()
uri = app.STORAGE.build_thought_uri(
organization_id=thought.organization_id,
artifact_id=artifact_id,
thought=thought,
artifact_type=artifact_type,
)
2024-12-07 12:22:11 -08:00
return await self._create_artifact(
aio_task_primary_key=thought.observer_cruise_id,
2024-12-07 12:22:11 -08:00
artifact_id=artifact_id,
artifact_type=artifact_type,
uri=uri,
thought_id=thought.observer_thought_id,
task_v2_id=thought.observer_cruise_id,
organization_id=thought.organization_id,
2024-12-07 12:22:11 -08:00
data=data,
path=path,
)
async def create_task_v2_artifact(
self,
task_v2: TaskV2,
artifact_type: ArtifactType,
data: bytes | None = None,
path: str | None = None,
) -> str:
artifact_id = generate_artifact_id()
uri = app.STORAGE.build_task_v2_uri(
organization_id=task_v2.organization_id,
artifact_id=artifact_id,
task_v2=task_v2,
artifact_type=artifact_type,
)
return await self._create_artifact(
aio_task_primary_key=task_v2.observer_cruise_id,
artifact_id=artifact_id,
artifact_type=artifact_type,
uri=uri,
task_v2_id=task_v2.observer_cruise_id,
organization_id=task_v2.organization_id,
data=data,
path=path,
)
async def create_workflow_run_block_artifact(
self,
workflow_run_block: WorkflowRunBlock,
artifact_type: ArtifactType,
data: bytes | None = None,
path: str | None = None,
) -> str:
artifact_id = generate_artifact_id()
uri = app.STORAGE.build_workflow_run_block_uri(
organization_id=workflow_run_block.organization_id,
artifact_id=artifact_id,
workflow_run_block=workflow_run_block,
artifact_type=artifact_type,
)
return await self._create_artifact(
aio_task_primary_key=workflow_run_block.workflow_run_block_id,
artifact_id=artifact_id,
artifact_type=artifact_type,
uri=uri,
workflow_run_block_id=workflow_run_block.workflow_run_block_id,
workflow_run_id=workflow_run_block.workflow_run_id,
organization_id=workflow_run_block.organization_id,
data=data,
path=path,
)
2025-01-08 21:45:38 -08:00
async def create_ai_suggestion_artifact(
self,
ai_suggestion: AISuggestion,
artifact_type: ArtifactType,
data: bytes | None = None,
path: str | None = None,
) -> str:
artifact_id = generate_artifact_id()
uri = app.STORAGE.build_ai_suggestion_uri(
organization_id=ai_suggestion.organization_id,
artifact_id=artifact_id,
ai_suggestion=ai_suggestion,
artifact_type=artifact_type,
)
2025-01-08 21:45:38 -08:00
return await self._create_artifact(
aio_task_primary_key=ai_suggestion.ai_suggestion_id,
artifact_id=artifact_id,
artifact_type=artifact_type,
uri=uri,
ai_suggestion_id=ai_suggestion.ai_suggestion_id,
organization_id=ai_suggestion.organization_id,
data=data,
path=path,
)
2025-08-06 22:23:38 -07:00
async def create_script_file_artifact(
self,
*,
organization_id: str,
2025-08-06 22:23:38 -07:00
script_id: str,
script_version: int,
file_path: str,
data: bytes,
) -> str:
2025-08-06 22:23:38 -07:00
"""Create an artifact for a script file.
Args:
organization_id: The organization ID
2025-08-06 22:23:38 -07:00
script_id: The script ID
script_version: The script version
file_path: The file path relative to script root
data: The file content as bytes
Returns:
The artifact ID
"""
artifact_id = generate_artifact_id()
2025-08-06 22:23:38 -07:00
uri = app.STORAGE.build_script_file_uri(
organization_id=organization_id,
2025-08-06 22:23:38 -07:00
script_id=script_id,
script_version=script_version,
file_path=file_path,
)
return await self._create_artifact(
2025-08-06 22:23:38 -07:00
aio_task_primary_key=f"{script_id}_{script_version}",
artifact_id=artifact_id,
2025-08-06 22:23:38 -07:00
artifact_type=ArtifactType.SCRIPT_FILE,
uri=uri,
organization_id=organization_id,
data=data,
)
async def create_llm_artifact(
self,
data: bytes,
artifact_type: ArtifactType,
screenshots: list[bytes] | None = None,
step: Step | None = None,
thought: Thought | None = None,
task_v2: TaskV2 | None = None,
2025-01-08 21:45:38 -08:00
ai_suggestion: AISuggestion | None = None,
) -> None:
if step:
await self.create_artifact(
step=step,
artifact_type=artifact_type,
data=data,
)
for screenshot in screenshots or []:
await self.create_artifact(
step=step,
artifact_type=ArtifactType.SCREENSHOT_LLM,
data=screenshot,
)
elif task_v2:
await self.create_task_v2_artifact(
task_v2=task_v2,
artifact_type=artifact_type,
data=data,
)
for screenshot in screenshots or []:
await self.create_task_v2_artifact(
task_v2=task_v2,
artifact_type=ArtifactType.SCREENSHOT_LLM,
data=screenshot,
)
elif thought:
await self.create_thought_artifact(
thought=thought,
artifact_type=artifact_type,
data=data,
)
for screenshot in screenshots or []:
await self.create_thought_artifact(
thought=thought,
artifact_type=ArtifactType.SCREENSHOT_LLM,
data=screenshot,
)
2025-01-08 21:45:38 -08:00
elif ai_suggestion:
await self.create_ai_suggestion_artifact(
ai_suggestion=ai_suggestion,
artifact_type=artifact_type,
data=data,
)
for screenshot in screenshots or []:
await self.create_ai_suggestion_artifact(
ai_suggestion=ai_suggestion,
artifact_type=ArtifactType.SCREENSHOT_LLM,
data=screenshot,
)
2024-12-07 12:22:11 -08:00
async def update_artifact_data(
self,
artifact_id: str | None,
organization_id: str | None,
data: bytes,
primary_key: str = "task_id",
2024-12-07 12:22:11 -08:00
) -> None:
if not artifact_id or not organization_id:
return None
artifact = await app.DATABASE.get_artifact_by_id(artifact_id, organization_id)
if not artifact:
return
# Fire and forget
aio_task = asyncio.create_task(app.STORAGE.store_artifact(artifact, data))
if not artifact[primary_key]:
raise ValueError(f"{primary_key} is required to update artifact data.")
self.upload_aiotasks_map[artifact[primary_key]].append(aio_task)
async def retrieve_artifact(self, artifact: Artifact) -> bytes | None:
return await app.STORAGE.retrieve_artifact(artifact)
async def get_share_link(self, artifact: Artifact) -> str | None:
return await app.STORAGE.get_share_link(artifact)
2024-05-13 00:03:31 -07:00
async def get_share_links(self, artifacts: list[Artifact]) -> list[str] | None:
return await app.STORAGE.get_share_links(artifacts)
async def wait_for_upload_aiotasks(self, primary_keys: list[str]) -> None:
try:
st = time.time()
async with asyncio.timeout(30):
await asyncio.gather(
*[
aio_task
for primary_key in primary_keys
for aio_task in self.upload_aiotasks_map[primary_key]
if not aio_task.done()
]
)
LOG.info(
f"Saving artifacts - aio tasks for primary_keys={primary_keys} completed in {time.time() - st:.2f}s",
primary_keys=primary_keys,
duration=time.time() - st,
)
except asyncio.TimeoutError:
LOG.error(
f"Timeout (30s) while waiting for upload aio tasks for primary_keys={primary_keys}",
primary_keys=primary_keys,
)
for primary_key in primary_keys:
del self.upload_aiotasks_map[primary_key]