Add Checksums to downloaded files for Axis so they can validate it in the webhook (#1848)
This commit is contained in:
@@ -3,6 +3,7 @@ from abc import ABC, abstractmethod
|
||||
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType, LogEntityType
|
||||
from skyvern.forge.sdk.models import Step
|
||||
from skyvern.forge.sdk.schemas.ai_suggestions import AISuggestion
|
||||
from skyvern.forge.sdk.schemas.files import FileInfo
|
||||
from skyvern.forge.sdk.schemas.task_v2 import ObserverTask, ObserverThought
|
||||
from skyvern.forge.sdk.schemas.workflow_runs import WorkflowRunBlock
|
||||
|
||||
@@ -115,5 +116,5 @@ class BaseStorage(ABC):
|
||||
@abstractmethod
|
||||
async def get_downloaded_files(
|
||||
self, organization_id: str, task_id: str | None, workflow_run_id: str | None
|
||||
) -> list[str]:
|
||||
) -> list[FileInfo]:
|
||||
pass
|
||||
|
||||
@@ -6,11 +6,17 @@ from pathlib import Path
|
||||
import structlog
|
||||
|
||||
from skyvern.config import settings
|
||||
from skyvern.forge.sdk.api.files import get_download_dir, get_skyvern_temp_dir, parse_uri_to_path
|
||||
from skyvern.forge.sdk.api.files import (
|
||||
calculate_sha256_for_file,
|
||||
get_download_dir,
|
||||
get_skyvern_temp_dir,
|
||||
parse_uri_to_path,
|
||||
)
|
||||
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType, LogEntityType
|
||||
from skyvern.forge.sdk.artifact.storage.base import FILE_EXTENTSION_MAP, BaseStorage
|
||||
from skyvern.forge.sdk.models import Step
|
||||
from skyvern.forge.sdk.schemas.ai_suggestions import AISuggestion
|
||||
from skyvern.forge.sdk.schemas.files import FileInfo
|
||||
from skyvern.forge.sdk.schemas.task_v2 import ObserverTask, ObserverThought
|
||||
from skyvern.forge.sdk.schemas.workflow_runs import WorkflowRunBlock
|
||||
|
||||
@@ -157,15 +163,18 @@ class LocalStorage(BaseStorage):
|
||||
|
||||
async def get_downloaded_files(
|
||||
self, organization_id: str, task_id: str | None, workflow_run_id: str | None
|
||||
) -> list[str]:
|
||||
) -> list[FileInfo]:
|
||||
download_dir = get_download_dir(workflow_run_id=workflow_run_id, task_id=task_id)
|
||||
files: list[str] = []
|
||||
file_infos: list[FileInfo] = []
|
||||
files_and_folders = os.listdir(download_dir)
|
||||
for file_or_folder in files_and_folders:
|
||||
path = os.path.join(download_dir, file_or_folder)
|
||||
if os.path.isfile(path):
|
||||
files.append(f"file://{path}")
|
||||
return files
|
||||
# Calculate checksum for the file
|
||||
checksum = calculate_sha256_for_file(path)
|
||||
file_info = FileInfo(url=f"file://{path}", checksum=checksum, filename=file_or_folder)
|
||||
file_infos.append(file_info)
|
||||
return file_infos
|
||||
|
||||
@staticmethod
|
||||
def _create_directories_if_not_exists(path_including_file_name: Path) -> None:
|
||||
|
||||
@@ -2,10 +2,13 @@ import os
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
|
||||
import structlog
|
||||
|
||||
from skyvern.config import settings
|
||||
from skyvern.constants import DOWNLOAD_FILE_PREFIX
|
||||
from skyvern.forge.sdk.api.aws import AsyncAWSClient
|
||||
from skyvern.forge.sdk.api.files import (
|
||||
calculate_sha256_for_file,
|
||||
create_named_temporary_file,
|
||||
get_download_dir,
|
||||
get_skyvern_temp_dir,
|
||||
@@ -16,9 +19,12 @@ from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType, LogEntityT
|
||||
from skyvern.forge.sdk.artifact.storage.base import FILE_EXTENTSION_MAP, BaseStorage
|
||||
from skyvern.forge.sdk.models import Step
|
||||
from skyvern.forge.sdk.schemas.ai_suggestions import AISuggestion
|
||||
from skyvern.forge.sdk.schemas.files import FileInfo
|
||||
from skyvern.forge.sdk.schemas.task_v2 import ObserverTask, ObserverThought
|
||||
from skyvern.forge.sdk.schemas.workflow_runs import WorkflowRunBlock
|
||||
|
||||
LOG = structlog.get_logger()
|
||||
|
||||
|
||||
class S3Storage(BaseStorage):
|
||||
def __init__(self, bucket: str | None = None) -> None:
|
||||
@@ -117,21 +123,45 @@ class S3Storage(BaseStorage):
|
||||
fpath = os.path.join(download_dir, file)
|
||||
if os.path.isfile(fpath):
|
||||
uri = f"s3://{settings.AWS_S3_BUCKET_UPLOADS}/{DOWNLOAD_FILE_PREFIX}/{settings.ENV}/{organization_id}/{workflow_run_id or task_id}/{file}"
|
||||
# TODO: use coroutine to speed up uploading if too many files
|
||||
await self.async_client.upload_file_from_path(uri, fpath)
|
||||
|
||||
# Calculate SHA-256 checksum
|
||||
checksum = calculate_sha256_for_file(fpath)
|
||||
LOG.info("Calculated checksum for file", file=file, checksum=checksum)
|
||||
|
||||
# Upload file with checksum metadata
|
||||
await self.async_client.upload_file_from_path(
|
||||
uri=uri, file_path=fpath, metadata={"sha256_checksum": checksum, "original_filename": file}
|
||||
)
|
||||
|
||||
async def get_downloaded_files(
|
||||
self, organization_id: str, task_id: str | None, workflow_run_id: str | None
|
||||
) -> list[str]:
|
||||
) -> list[FileInfo]:
|
||||
uri = f"s3://{settings.AWS_S3_BUCKET_UPLOADS}/{DOWNLOAD_FILE_PREFIX}/{settings.ENV}/{organization_id}/{workflow_run_id or task_id}"
|
||||
object_keys = await self.async_client.list_files(uri=uri)
|
||||
if len(object_keys) == 0:
|
||||
return []
|
||||
object_uris: list[str] = []
|
||||
|
||||
file_infos: list[FileInfo] = []
|
||||
for key in object_keys:
|
||||
object_uri = f"s3://{settings.AWS_S3_BUCKET_UPLOADS}/{key}"
|
||||
object_uris.append(object_uri)
|
||||
presigned_urils = await self.async_client.create_presigned_urls(object_uris)
|
||||
if presigned_urils is None:
|
||||
return []
|
||||
return presigned_urils
|
||||
|
||||
# Get metadata (including checksum)
|
||||
metadata = await self.async_client.get_file_metadata(object_uri, log_exception=False)
|
||||
|
||||
# Create FileInfo object
|
||||
filename = os.path.basename(key)
|
||||
checksum = metadata.get("sha256_checksum") if metadata else None
|
||||
|
||||
# Get presigned URL
|
||||
presigned_urls = await self.async_client.create_presigned_urls([object_uri])
|
||||
if not presigned_urls:
|
||||
continue
|
||||
|
||||
file_info = FileInfo(
|
||||
url=presigned_urls[0],
|
||||
checksum=checksum,
|
||||
filename=metadata.get("original_filename", filename) if metadata else filename,
|
||||
)
|
||||
file_infos.append(file_info)
|
||||
|
||||
return file_infos
|
||||
|
||||
Reference in New Issue
Block a user