Move save legacy file logic to S3Storage + add storage class support + add logging (#2624)

This commit is contained in:
Asher Foa
2025-06-09 12:13:20 -04:00
committed by GitHub
parent fc1e7ae37e
commit 4de7746ebc
4 changed files with 75 additions and 34 deletions

View File

@@ -1,4 +1,5 @@
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import BinaryIO
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType, LogEntityType from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType, LogEntityType
from skyvern.forge.sdk.models import Step from skyvern.forge.sdk.models import Step
@@ -116,3 +117,9 @@ class BaseStorage(ABC):
self, organization_id: str, task_id: str | None, workflow_run_id: str | None self, organization_id: str, task_id: str | None, workflow_run_id: str | None
) -> list[FileInfo]: ) -> list[FileInfo]:
pass pass
@abstractmethod
async def save_legacy_file(
self, *, organization_id: str, filename: str, fileObj: BinaryIO
) -> tuple[str, str] | None:
pass

View File

@@ -2,6 +2,7 @@ import os
import shutil import shutil
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from typing import BinaryIO
import structlog import structlog
@@ -178,3 +179,10 @@ class LocalStorage(BaseStorage):
def _create_directories_if_not_exists(path_including_file_name: Path) -> None: def _create_directories_if_not_exists(path_including_file_name: Path) -> None:
path = path_including_file_name.parent path = path_including_file_name.parent
path.mkdir(parents=True, exist_ok=True) path.mkdir(parents=True, exist_ok=True)
async def save_legacy_file(
self, *, organization_id: str, filename: str, fileObj: BinaryIO
) -> tuple[str, str] | None:
raise NotImplementedError(
"Legacy file storage is not implemented for LocalStorage. Please use a different storage backend."
)

View File

@@ -1,6 +1,8 @@
import os import os
import shutil import shutil
from datetime import datetime import uuid
from datetime import datetime, timezone
from typing import BinaryIO
import structlog import structlog
@@ -210,3 +212,55 @@ class S3Storage(BaseStorage):
file_infos.append(file_info) file_infos.append(file_info)
return file_infos return file_infos
async def save_legacy_file(
self, *, organization_id: str, filename: str, fileObj: BinaryIO
) -> tuple[str, str] | None:
todays_date = datetime.now(tz=timezone.utc).strftime("%Y-%m-%d")
bucket = settings.AWS_S3_BUCKET_UPLOADS
sc = await self._get_storage_class_for_org(organization_id)
# First try uploading with original filename
try:
sanitized_filename = os.path.basename(filename) # Remove any path components
s3_uri = f"s3://{bucket}/{settings.ENV}/{organization_id}/{todays_date}/{sanitized_filename}"
uploaded_s3_uri = await self.async_client.upload_file_stream(s3_uri, fileObj, storage_class=sc)
except Exception:
LOG.error("Failed to upload file to S3", exc_info=True)
uploaded_s3_uri = None
# If upload fails, try again with UUID prefix
if not uploaded_s3_uri:
uuid_prefixed_filename = f"{str(uuid.uuid4())}_{filename}"
s3_uri = f"s3://{bucket}/{settings.ENV}/{organization_id}/{todays_date}/{uuid_prefixed_filename}"
fileObj.seek(0) # Reset file pointer
uploaded_s3_uri = await self.async_client.upload_file_stream(s3_uri, fileObj, storage_class=sc)
if not uploaded_s3_uri:
LOG.error(
"Failed to upload file to S3 after retrying with UUID prefix",
organization_id=organization_id,
storage_class=sc,
filename=filename,
exc_info=True,
)
return None
LOG.debug(
"Legacy file upload",
organization_id=organization_id,
storage_class=sc,
filename=filename,
uploaded_s3_uri=uploaded_s3_uri,
)
# Generate a presigned URL for the uploaded file
presigned_urls = await self.async_client.create_presigned_urls([uploaded_s3_uri])
if not presigned_urls:
LOG.error(
"Failed to create presigned URL for uploaded file",
organization_id=organization_id,
storage_class=sc,
uploaded_s3_uri=uploaded_s3_uri,
filename=filename,
exc_info=True,
)
return None
return presigned_urls[0], uploaded_s3_uri

View File

@@ -1,6 +1,3 @@
import datetime
import os
import uuid
from enum import Enum from enum import Enum
from typing import Annotated, Any from typing import Annotated, Any
@@ -14,7 +11,6 @@ from skyvern._version import __version__
from skyvern.config import settings from skyvern.config import settings
from skyvern.forge import app from skyvern.forge import app
from skyvern.forge.prompts import prompt_engine from skyvern.forge.prompts import prompt_engine
from skyvern.forge.sdk.api.aws import aws_client
from skyvern.forge.sdk.api.llm.exceptions import LLMProviderError from skyvern.forge.sdk.api.llm.exceptions import LLMProviderError
from skyvern.forge.sdk.artifact.models import Artifact from skyvern.forge.sdk.artifact.models import Artifact
from skyvern.forge.sdk.core import skyvern_context from skyvern.forge.sdk.core import skyvern_context
@@ -1696,36 +1692,12 @@ async def upload_file(
file: UploadFile = Depends(_validate_file_size), file: UploadFile = Depends(_validate_file_size),
current_org: Organization = Depends(org_auth_service.get_current_org), current_org: Organization = Depends(org_auth_service.get_current_org),
) -> Response: ) -> Response:
bucket = app.SETTINGS_MANAGER.AWS_S3_BUCKET_UPLOADS uris = await app.STORAGE.save_legacy_file(
todays_date = datetime.datetime.now().strftime("%Y-%m-%d") organization_id=current_org.organization_id, filename=file.filename, fileObj=file.file
)
# First try uploading with original filename if not uris:
try:
sanitized_filename = os.path.basename(file.filename) # Remove any path components
s3_uri = (
f"s3://{bucket}/{app.SETTINGS_MANAGER.ENV}/{current_org.organization_id}/{todays_date}/{sanitized_filename}"
)
uploaded_s3_uri = await aws_client.upload_file_stream(s3_uri, file.file)
except Exception:
LOG.error("Failed to upload file to S3", exc_info=True)
uploaded_s3_uri = None
# If upload fails, try again with UUID prefix
if not uploaded_s3_uri:
uuid_prefixed_filename = f"{str(uuid.uuid4())}_{file.filename}"
s3_uri = f"s3://{bucket}/{app.SETTINGS_MANAGER.ENV}/{current_org.organization_id}/{todays_date}/{uuid_prefixed_filename}"
file.file.seek(0) # Reset file pointer
uploaded_s3_uri = await aws_client.upload_file_stream(s3_uri, file.file)
if not uploaded_s3_uri:
raise HTTPException(status_code=500, detail="Failed to upload file to S3.") raise HTTPException(status_code=500, detail="Failed to upload file to S3.")
presigned_url, uploaded_s3_uri = uris
# Generate a presigned URL for the uploaded file
presigned_urls = await aws_client.create_presigned_urls([uploaded_s3_uri])
if not presigned_urls:
raise HTTPException(status_code=500, detail="Failed to generate presigned URL.")
presigned_url = presigned_urls[0]
return ORJSONResponse( return ORJSONResponse(
content={"s3_uri": uploaded_s3_uri, "presigned_url": presigned_url}, content={"s3_uri": uploaded_s3_uri, "presigned_url": presigned_url},
status_code=200, status_code=200,