support aws s3 storage (#720)
This commit is contained in:
@@ -44,3 +44,13 @@ The following endpoint can be used to retrieve artifacts for a specific step:
|
||||
```
|
||||
GET https://api.skyvern.com/api/v1/tasks/{task_id}/steps/{step_id}/artifacts
|
||||
```
|
||||
|
||||
## Artifacts configurations
|
||||
By default, Skyvern stores artifacts, including video recording, screenshots, llm requests and responses, html and skyvern parsed html elements locally in the `/artifacts` folder under the skyvern repository.
|
||||
You can also have skyvern to upload atrifacts to your s3 buckets. To do this, first set up these environment variables:
|
||||
- `AWS_DEFAULT_REGION`: `us-east-1`, `us-west-1`, ...
|
||||
- `AWS_ACCESS_KEY_ID`
|
||||
- `AWS_SECRET_ACCESS_KEY`
|
||||
- `SKYVERN_STORAGE_TYPE`: set it to be `s3`. The default is `local`
|
||||
|
||||
Make sure these s3 buckets are created: `skyvern-artifacts`, `skyvern-screenshots`. These are the default bucket names skyvern uses. To customize the bucket names, change these two env variables: `AWS_S3_BUCKET_ARTIFACTS` and `AWS_S3_BUCKET_SCREENSHOTS`
|
||||
|
||||
@@ -45,6 +45,11 @@ class Settings(BaseSettings):
|
||||
# Artifact storage settings
|
||||
ARTIFACT_STORAGE_PATH: str = f"{SKYVERN_DIR}/artifacts"
|
||||
GENERATE_PRESIGNED_URLS: bool = False
|
||||
AWS_S3_BUCKET_ARTIFACTS: str = "skyvern-artifacts"
|
||||
AWS_S3_BUCKET_SCREENSHOTS: str = "skyvern-screenshots"
|
||||
|
||||
# Supported storage types: local, s3
|
||||
SKYVERN_STORAGE_TYPE: str = "local"
|
||||
|
||||
# S3 bucket settings
|
||||
AWS_REGION: str = "us-east-1"
|
||||
|
||||
@@ -8,6 +8,7 @@ from skyvern.forge.agent_functions import AgentFunction
|
||||
from skyvern.forge.sdk.api.llm.api_handler_factory import LLMAPIHandlerFactory
|
||||
from skyvern.forge.sdk.artifact.manager import ArtifactManager
|
||||
from skyvern.forge.sdk.artifact.storage.factory import StorageFactory
|
||||
from skyvern.forge.sdk.artifact.storage.s3 import S3Storage
|
||||
from skyvern.forge.sdk.cache.factory import CacheFactory
|
||||
from skyvern.forge.sdk.db.client import AgentDB
|
||||
from skyvern.forge.sdk.experimentation.providers import BaseExperimentationProvider, NoOpExperimentationProvider
|
||||
@@ -22,6 +23,8 @@ DATABASE = AgentDB(
|
||||
SettingsManager.get_settings().DATABASE_STRING,
|
||||
debug_enabled=SettingsManager.get_settings().DEBUG_MODE,
|
||||
)
|
||||
if SettingsManager.get_settings().SKYVERN_STORAGE_TYPE == "s3":
|
||||
StorageFactory.set_storage(S3Storage())
|
||||
STORAGE = StorageFactory.get_storage()
|
||||
CACHE = CacheFactory.get_cache()
|
||||
ARTIFACT_MANAGER = ArtifactManager()
|
||||
|
||||
42
skyvern/forge/sdk/artifact/storage/s3.py
Normal file
42
skyvern/forge/sdk/artifact/storage/s3.py
Normal file
@@ -0,0 +1,42 @@
|
||||
from datetime import datetime
|
||||
|
||||
from skyvern.config import settings
|
||||
from skyvern.forge.sdk.api.aws import AsyncAWSClient
|
||||
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType
|
||||
from skyvern.forge.sdk.artifact.storage.base import FILE_EXTENTSION_MAP, BaseStorage
|
||||
from skyvern.forge.sdk.models import Step
|
||||
|
||||
|
||||
class S3Storage(BaseStorage):
|
||||
def __init__(self, bucket: str | None = None) -> None:
|
||||
self.async_client = AsyncAWSClient()
|
||||
self.bucket = bucket or settings.AWS_S3_BUCKET_ARTIFACTS
|
||||
|
||||
def build_uri(self, artifact_id: str, step: Step, artifact_type: ArtifactType) -> str:
|
||||
file_ext = FILE_EXTENTSION_MAP[artifact_type]
|
||||
return f"s3://{self.bucket}/{settings.ENV}/{step.task_id}/{step.order:02d}_{step.retry_index}_{step.step_id}/{datetime.utcnow().isoformat()}_{artifact_id}_{artifact_type}.{file_ext}"
|
||||
|
||||
async def store_artifact(self, artifact: Artifact, data: bytes) -> None:
|
||||
await self.async_client.upload_file(artifact.uri, data)
|
||||
|
||||
async def retrieve_artifact(self, artifact: Artifact) -> bytes | None:
|
||||
return await self.async_client.download_file(artifact.uri)
|
||||
|
||||
async def get_share_link(self, artifact: Artifact) -> str | None:
|
||||
share_urls = await self.async_client.create_presigned_urls([artifact.uri])
|
||||
return share_urls[0] if share_urls else None
|
||||
|
||||
async def get_share_links(self, artifacts: list[Artifact]) -> list[str] | None:
|
||||
return await self.async_client.create_presigned_urls([artifact.uri for artifact in artifacts])
|
||||
|
||||
async def store_artifact_from_path(self, artifact: Artifact, path: str) -> None:
|
||||
await self.async_client.upload_file_from_path(artifact.uri, path)
|
||||
|
||||
async def save_streaming_file(self, organization_id: str, file_name: str) -> None:
|
||||
from_path = f"{settings.STREAMING_FILE_BASE_PATH}/{organization_id}/{file_name}"
|
||||
to_path = f"s3://{settings.AWS_S3_BUCKET_SCREENSHOTS}/{settings.ENV}/{organization_id}/{file_name}"
|
||||
await self.async_client.upload_file_from_path(to_path, from_path)
|
||||
|
||||
async def get_streaming_file(self, organization_id: str, file_name: str, use_default: bool = True) -> bytes | None:
|
||||
path = f"s3://{settings.AWS_S3_BUCKET_SCREENSHOTS}/{settings.ENV}/{organization_id}/{file_name}"
|
||||
return await self.async_client.download_file(path, log_exception=False)
|
||||
Reference in New Issue
Block a user