Add Checksums to downloaded files for Axis so they can validate it in the webhook (#1848)

This commit is contained in:
Shuchang Zheng
2025-02-26 17:19:05 -08:00
committed by GitHub
parent c73ad6ed68
commit 995d9461b5
10 changed files with 136 additions and 31 deletions

View File

@@ -3,6 +3,7 @@ from abc import ABC, abstractmethod
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType, LogEntityType
from skyvern.forge.sdk.models import Step
from skyvern.forge.sdk.schemas.ai_suggestions import AISuggestion
from skyvern.forge.sdk.schemas.files import FileInfo
from skyvern.forge.sdk.schemas.task_v2 import ObserverTask, ObserverThought
from skyvern.forge.sdk.schemas.workflow_runs import WorkflowRunBlock
@@ -115,5 +116,5 @@ class BaseStorage(ABC):
@abstractmethod
async def get_downloaded_files(
self, organization_id: str, task_id: str | None, workflow_run_id: str | None
) -> list[str]:
) -> list[FileInfo]:
pass

View File

@@ -6,11 +6,17 @@ from pathlib import Path
import structlog
from skyvern.config import settings
from skyvern.forge.sdk.api.files import get_download_dir, get_skyvern_temp_dir, parse_uri_to_path
from skyvern.forge.sdk.api.files import (
calculate_sha256_for_file,
get_download_dir,
get_skyvern_temp_dir,
parse_uri_to_path,
)
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType, LogEntityType
from skyvern.forge.sdk.artifact.storage.base import FILE_EXTENTSION_MAP, BaseStorage
from skyvern.forge.sdk.models import Step
from skyvern.forge.sdk.schemas.ai_suggestions import AISuggestion
from skyvern.forge.sdk.schemas.files import FileInfo
from skyvern.forge.sdk.schemas.task_v2 import ObserverTask, ObserverThought
from skyvern.forge.sdk.schemas.workflow_runs import WorkflowRunBlock
@@ -157,15 +163,18 @@ class LocalStorage(BaseStorage):
async def get_downloaded_files(
self, organization_id: str, task_id: str | None, workflow_run_id: str | None
) -> list[str]:
) -> list[FileInfo]:
download_dir = get_download_dir(workflow_run_id=workflow_run_id, task_id=task_id)
files: list[str] = []
file_infos: list[FileInfo] = []
files_and_folders = os.listdir(download_dir)
for file_or_folder in files_and_folders:
path = os.path.join(download_dir, file_or_folder)
if os.path.isfile(path):
files.append(f"file://{path}")
return files
# Calculate checksum for the file
checksum = calculate_sha256_for_file(path)
file_info = FileInfo(url=f"file://{path}", checksum=checksum, filename=file_or_folder)
file_infos.append(file_info)
return file_infos
@staticmethod
def _create_directories_if_not_exists(path_including_file_name: Path) -> None:

View File

@@ -2,10 +2,13 @@ import os
import shutil
from datetime import datetime
import structlog
from skyvern.config import settings
from skyvern.constants import DOWNLOAD_FILE_PREFIX
from skyvern.forge.sdk.api.aws import AsyncAWSClient
from skyvern.forge.sdk.api.files import (
calculate_sha256_for_file,
create_named_temporary_file,
get_download_dir,
get_skyvern_temp_dir,
@@ -16,9 +19,12 @@ from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType, LogEntityT
from skyvern.forge.sdk.artifact.storage.base import FILE_EXTENTSION_MAP, BaseStorage
from skyvern.forge.sdk.models import Step
from skyvern.forge.sdk.schemas.ai_suggestions import AISuggestion
from skyvern.forge.sdk.schemas.files import FileInfo
from skyvern.forge.sdk.schemas.task_v2 import ObserverTask, ObserverThought
from skyvern.forge.sdk.schemas.workflow_runs import WorkflowRunBlock
LOG = structlog.get_logger()
class S3Storage(BaseStorage):
def __init__(self, bucket: str | None = None) -> None:
@@ -117,21 +123,45 @@ class S3Storage(BaseStorage):
fpath = os.path.join(download_dir, file)
if os.path.isfile(fpath):
uri = f"s3://{settings.AWS_S3_BUCKET_UPLOADS}/{DOWNLOAD_FILE_PREFIX}/{settings.ENV}/{organization_id}/{workflow_run_id or task_id}/{file}"
# TODO: use coroutine to speed up uploading if too many files
await self.async_client.upload_file_from_path(uri, fpath)
# Calculate SHA-256 checksum
checksum = calculate_sha256_for_file(fpath)
LOG.info("Calculated checksum for file", file=file, checksum=checksum)
# Upload file with checksum metadata
await self.async_client.upload_file_from_path(
uri=uri, file_path=fpath, metadata={"sha256_checksum": checksum, "original_filename": file}
)
async def get_downloaded_files(
self, organization_id: str, task_id: str | None, workflow_run_id: str | None
) -> list[str]:
) -> list[FileInfo]:
uri = f"s3://{settings.AWS_S3_BUCKET_UPLOADS}/{DOWNLOAD_FILE_PREFIX}/{settings.ENV}/{organization_id}/{workflow_run_id or task_id}"
object_keys = await self.async_client.list_files(uri=uri)
if len(object_keys) == 0:
return []
object_uris: list[str] = []
file_infos: list[FileInfo] = []
for key in object_keys:
object_uri = f"s3://{settings.AWS_S3_BUCKET_UPLOADS}/{key}"
object_uris.append(object_uri)
presigned_urils = await self.async_client.create_presigned_urls(object_uris)
if presigned_urils is None:
return []
return presigned_urils
# Get metadata (including checksum)
metadata = await self.async_client.get_file_metadata(object_uri, log_exception=False)
# Create FileInfo object
filename = os.path.basename(key)
checksum = metadata.get("sha256_checksum") if metadata else None
# Get presigned URL
presigned_urls = await self.async_client.create_presigned_urls([object_uri])
if not presigned_urls:
continue
file_info = FileInfo(
url=presigned_urls[0],
checksum=checksum,
filename=metadata.get("original_filename", filename) if metadata else filename,
)
file_infos.append(file_info)
return file_infos