Move the code over from private repository (#3)

This commit is contained in:
Kerem Yilmaz
2024-03-01 10:09:30 -08:00
committed by GitHub
parent 32dd6d92a5
commit 9eddb3d812
93 changed files with 16798 additions and 0 deletions

View File

View File

@@ -0,0 +1,112 @@
import asyncio
import time
from collections import defaultdict
import structlog
from skyvern.forge import app
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType
from skyvern.forge.sdk.db.id import generate_artifact_id
from skyvern.forge.sdk.models import Step
LOG = structlog.get_logger(__name__)
class ArtifactManager:
# task_id -> list of aio_tasks for uploading artifacts
upload_aiotasks_map: dict[str, list[asyncio.Task[None]]] = defaultdict(list)
async def create_artifact(
self, step: Step, artifact_type: ArtifactType, data: bytes | None = None, path: str | None = None
) -> str:
# TODO (kerem): Which is better?
# current: (disadvantage: we create the artifact_id UUID here)
# 1. generate artifact_id UUID here
# 2. build uri with artifact_id, step_id, task_id, artifact_type
# 3. create artifact in db using artifact_id, step_id, task_id, artifact_type, uri
# 4. store artifact in storage
# alternative: (disadvantage: two db calls)
# 1. create artifact in db without the URI
# 2. build uri with artifact_id, step_id, task_id, artifact_type
# 3. update artifact in db with the URI
# 4. store artifact in storage
if data is None and path is None:
raise ValueError("Either data or path must be provided to create an artifact.")
if data and path:
raise ValueError("Both data and path cannot be provided to create an artifact.")
artifact_id = generate_artifact_id()
uri = app.STORAGE.build_uri(artifact_id, step, artifact_type)
artifact = await app.DATABASE.create_artifact(
artifact_id,
step.step_id,
step.task_id,
artifact_type,
uri,
organization_id=step.organization_id,
)
if data:
# Fire and forget
aio_task = asyncio.create_task(app.STORAGE.store_artifact(artifact, data))
self.upload_aiotasks_map[step.task_id].append(aio_task)
elif path:
# Fire and forget
aio_task = asyncio.create_task(app.STORAGE.store_artifact_from_path(artifact, path))
self.upload_aiotasks_map[step.task_id].append(aio_task)
return artifact_id
async def update_artifact_data(self, artifact_id: str | None, organization_id: str | None, data: bytes) -> None:
if not artifact_id or not organization_id:
return None
artifact = await app.DATABASE.get_artifact_by_id(artifact_id, organization_id)
if not artifact:
return
# Fire and forget
aio_task = asyncio.create_task(app.STORAGE.store_artifact(artifact, data))
self.upload_aiotasks_map[artifact.task_id].append(aio_task)
async def retrieve_artifact(self, artifact: Artifact) -> bytes | None:
return await app.STORAGE.retrieve_artifact(artifact)
async def get_share_link(self, artifact: Artifact) -> str | None:
return await app.STORAGE.get_share_link(artifact)
async def wait_for_upload_aiotasks_for_task(self, task_id: str) -> None:
try:
st = time.time()
async with asyncio.timeout(30):
await asyncio.gather(
*[aio_task for aio_task in self.upload_aiotasks_map[task_id] if not aio_task.done()]
)
LOG.info(
f"S3 upload tasks for task_id={task_id} completed in {time.time() - st:.2f}s",
task_id=task_id,
duration=time.time() - st,
)
except asyncio.TimeoutError:
LOG.error(f"Timeout (30s) while waiting for upload tasks for task_id={task_id}", task_id=task_id)
del self.upload_aiotasks_map[task_id]
async def wait_for_upload_aiotasks_for_tasks(self, task_ids: list[str]) -> None:
try:
st = time.time()
async with asyncio.timeout(30):
await asyncio.gather(
*[
aio_task
for task_id in task_ids
for aio_task in self.upload_aiotasks_map[task_id]
if not aio_task.done()
]
)
LOG.info(
f"S3 upload tasks for task_ids={task_ids} completed in {time.time() - st:.2f}s",
task_ids=task_ids,
duration=time.time() - st,
)
except asyncio.TimeoutError:
LOG.error(f"Timeout (30s) while waiting for upload tasks for task_ids={task_ids}", task_ids=task_ids)
for task_id in task_ids:
del self.upload_aiotasks_map[task_id]

View File

@@ -0,0 +1,78 @@
from __future__ import annotations
from datetime import datetime
from enum import StrEnum
from pydantic import BaseModel, Field
class ArtifactType(StrEnum):
RECORDING = "recording"
# DEPRECATED. pls use SCREENSHOT_LLM, SCREENSHOT_ACTION or SCREENSHOT_FINAL
SCREENSHOT = "screenshot"
# USE THESE for screenshots
SCREENSHOT_LLM = "screenshot_llm"
SCREENSHOT_ACTION = "screenshot_action"
SCREENSHOT_FINAL = "screenshot_final"
LLM_PROMPT = "llm_prompt"
LLM_REQUEST = "llm_request"
LLM_RESPONSE = "llm_response"
LLM_RESPONSE_PARSED = "llm_response_parsed"
VISIBLE_ELEMENTS_ID_XPATH_MAP = "visible_elements_id_xpath_map"
VISIBLE_ELEMENTS_TREE = "visible_elements_tree"
VISIBLE_ELEMENTS_TREE_TRIMMED = "visible_elements_tree_trimmed"
# DEPRECATED. pls use HTML_SCRAPE or HTML_ACTION
HTML = "html"
# USE THESE for htmls
HTML_SCRAPE = "html_scrape"
HTML_ACTION = "html_action"
# Debugging
TRACE = "trace"
HAR = "har"
class Artifact(BaseModel):
created_at: datetime = Field(
...,
description="The creation datetime of the task.",
examples=["2023-01-01T00:00:00Z"],
json_encoders={datetime: lambda v: v.isoformat()},
)
modified_at: datetime = Field(
...,
description="The modification datetime of the task.",
examples=["2023-01-01T00:00:00Z"],
json_encoders={datetime: lambda v: v.isoformat()},
)
artifact_id: str = Field(
...,
description="The ID of the task artifact.",
examples=["6bb1801a-fd80-45e8-899a-4dd723cc602e"],
)
task_id: str = Field(
...,
description="The ID of the task this artifact belongs to.",
examples=["50da533e-3904-4401-8a07-c49adf88b5eb"],
)
step_id: str = Field(
...,
description="The ID of the task step this artifact belongs to.",
examples=["6bb1801a-fd80-45e8-899a-4dd723cc602e"],
)
artifact_type: ArtifactType = Field(
...,
description="The type of the artifact.",
examples=["screenshot"],
)
uri: str = Field(
...,
description="The URI of the artifact.",
examples=["/Users/skyvern/hello/world.png"],
)
organization_id: str | None = None

View File

@@ -0,0 +1,45 @@
from abc import ABC, abstractmethod
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType
from skyvern.forge.sdk.models import Step
# TODO: This should be a part of the ArtifactType model
FILE_EXTENTSION_MAP: dict[ArtifactType, str] = {
ArtifactType.RECORDING: "webm",
ArtifactType.SCREENSHOT_LLM: "png",
ArtifactType.SCREENSHOT_ACTION: "png",
ArtifactType.SCREENSHOT_FINAL: "png",
ArtifactType.LLM_PROMPT: "txt",
ArtifactType.LLM_REQUEST: "json",
ArtifactType.LLM_RESPONSE: "json",
ArtifactType.LLM_RESPONSE_PARSED: "json",
ArtifactType.VISIBLE_ELEMENTS_ID_XPATH_MAP: "json",
ArtifactType.VISIBLE_ELEMENTS_TREE: "json",
ArtifactType.VISIBLE_ELEMENTS_TREE_TRIMMED: "json",
ArtifactType.HTML_SCRAPE: "html",
ArtifactType.HTML_ACTION: "html",
ArtifactType.TRACE: "zip",
ArtifactType.HAR: "har",
}
class BaseStorage(ABC):
@abstractmethod
def build_uri(self, artifact_id: str, step: Step, artifact_type: ArtifactType) -> str:
pass
@abstractmethod
async def store_artifact(self, artifact: Artifact, data: bytes) -> None:
pass
@abstractmethod
async def retrieve_artifact(self, artifact: Artifact) -> bytes | None:
pass
@abstractmethod
async def get_share_link(self, artifact: Artifact) -> str | None:
pass
@abstractmethod
async def store_artifact_from_path(self, artifact: Artifact, path: str) -> None:
pass

View File

@@ -0,0 +1,14 @@
from skyvern.forge.sdk.artifact.storage.base import BaseStorage
from skyvern.forge.sdk.artifact.storage.local import LocalStorage
class StorageFactory:
__storage: BaseStorage = LocalStorage()
@staticmethod
def set_storage(storage: BaseStorage) -> None:
StorageFactory.__storage = storage
@staticmethod
def get_storage() -> BaseStorage:
return StorageFactory.__storage

View File

@@ -0,0 +1,66 @@
from datetime import datetime
from pathlib import Path
from urllib.parse import unquote, urlparse
import structlog
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType
from skyvern.forge.sdk.artifact.storage.base import FILE_EXTENTSION_MAP, BaseStorage
from skyvern.forge.sdk.models import Step
from skyvern.forge.sdk.settings_manager import SettingsManager
LOG = structlog.get_logger()
class LocalStorage(BaseStorage):
def __init__(self, artifact_path: str = SettingsManager.get_settings().ARTIFACT_STORAGE_PATH) -> None:
self.artifact_path = artifact_path
def build_uri(self, artifact_id: str, step: Step, artifact_type: ArtifactType) -> str:
file_ext = FILE_EXTENTSION_MAP[artifact_type]
return f"file://{self.artifact_path}/{step.task_id}/{step.order:02d}_{step.retry_index}_{step.step_id}/{datetime.utcnow().isoformat()}_{artifact_id}_{artifact_type}.{file_ext}"
async def store_artifact(self, artifact: Artifact, data: bytes) -> None:
file_path = None
try:
file_path = Path(self._parse_uri_to_path(artifact.uri))
self._create_directories_if_not_exists(file_path)
with open(file_path, "wb") as f:
f.write(data)
except Exception:
LOG.exception("Failed to store artifact locally.", file_path=file_path, artifact=artifact)
async def store_artifact_from_path(self, artifact: Artifact, path: str) -> None:
file_path = None
try:
file_path = Path(self._parse_uri_to_path(artifact.uri))
self._create_directories_if_not_exists(file_path)
Path(path).replace(file_path)
except Exception:
LOG.exception("Failed to store artifact locally.", file_path=file_path, artifact=artifact)
async def retrieve_artifact(self, artifact: Artifact) -> bytes | None:
file_path = None
try:
file_path = self._parse_uri_to_path(artifact.uri)
with open(file_path, "rb") as f:
return f.read()
except Exception:
LOG.exception("Failed to retrieve local artifact.", file_path=file_path, artifact=artifact)
return None
async def get_share_link(self, artifact: Artifact) -> str:
return artifact.uri
@staticmethod
def _parse_uri_to_path(uri: str) -> str:
parsed_uri = urlparse(uri)
if parsed_uri.scheme != "file":
raise ValueError("Invalid URI scheme: {parsed_uri.scheme} expected: file")
path = parsed_uri.netloc + parsed_uri.path
return unquote(path)
@staticmethod
def _create_directories_if_not_exists(path_including_file_name: Path) -> None:
path = path_including_file_name.parent
path.mkdir(parents=True, exist_ok=True)