fix harfile zstd compression in s3 (#4567)

This commit is contained in:
LawyZheng
2026-01-28 23:13:45 +08:00
committed by GitHub
parent 76cbe02690
commit f9d249ccef
2 changed files with 9 additions and 10 deletions

View File

@@ -29,6 +29,9 @@ from skyvern.forge.sdk.schemas.workflow_runs import WorkflowRunBlock
LOG = structlog.get_logger()
S3_ZSTD_COMPRESSED_SUFFIX = ".zst"
class S3Storage(BaseStorage):
_PATH_VERSION = "v1"
@@ -38,6 +41,9 @@ class S3Storage(BaseStorage):
def build_uri(self, *, organization_id: str, artifact_id: str, step: Step, artifact_type: ArtifactType) -> str:
file_ext = FILE_EXTENTSION_MAP[artifact_type]
if artifact_type == ArtifactType.HAR:
file_ext = f"{file_ext}{S3_ZSTD_COMPRESSED_SUFFIX}"
return f"{self._build_base_uri(organization_id)}/{step.task_id}/{step.order:02d}_{step.retry_index}_{step.step_id}/{datetime.utcnow().isoformat()}_{artifact_id}_{artifact_type}.{file_ext}"
async def retrieve_global_workflows(self) -> list[str]:
@@ -106,12 +112,9 @@ class S3Storage(BaseStorage):
# HARs are easily compressible because they are mostly JSON.
# Other artifacts are not compressed because they are not easily compressible.
uri = artifact.uri
if artifact.artifact_type == ArtifactType.HAR:
if uri.endswith(S3_ZSTD_COMPRESSED_SUFFIX):
cctx = zstd.ZstdCompressor(level=3)
data = cctx.compress(data)
file_ext = FILE_EXTENTSION_MAP[artifact.artifact_type]
uri = uri.replace(f".{file_ext}", f".{file_ext}.zst")
artifact.uri = uri
sc = await self._get_storage_class_for_org(artifact.organization_id, self.bucket, len(data))
tags = await self._get_tags_for_org(artifact.organization_id)
@@ -139,7 +142,7 @@ class S3Storage(BaseStorage):
async def retrieve_artifact(self, artifact: Artifact) -> bytes | None:
data = await self.async_client.download_file(artifact.uri)
# Decompress zstd-compressed files
if data and artifact.uri.endswith(".zst"):
if data and artifact.uri.endswith(S3_ZSTD_COMPRESSED_SUFFIX):
dctx = zstd.ZstdDecompressor()
data = dctx.decompress(data)
return data

View File

@@ -488,15 +488,11 @@ class TestS3StorageHARCompression:
# Create sample HAR JSON data (easily compressible)
har_data = b'{"log": {"version": "1.2", "entries": [{"request": {}, "response": {}}]}}'
artifact = self._create_har_artifact(s3_storage, TEST_STEP_ID)
original_uri = artifact.uri
assert artifact.uri.endswith(".har.zst")
# Store the artifact
await s3_storage.store_artifact(artifact, har_data)
# Verify URI was updated to .har.zst
assert artifact.uri.endswith(".har.zst")
assert artifact.uri == original_uri.replace(".har", ".har.zst")
# Verify the stored data is compressed
s3uri = S3Uri(artifact.uri)
obj_response = boto3_test_client.get_object(Bucket=TEST_BUCKET, Key=s3uri.key)