fix: Ensure video recordings upload before stopping artifact sync (#4316)

This commit is contained in:
Marc Kelechava
2025-12-17 12:57:53 -08:00
committed by GitHub
parent 8d8d766b2a
commit 0d6a070a80
2 changed files with 24 additions and 0 deletions

View File

@@ -175,6 +175,7 @@ class AsyncAWSClient:
metadata: dict | None = None,
raise_exception: bool = False,
tags: dict[str, str] | None = None,
content_type: str | None = None,
) -> None:
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/client/upload_file.html
try:
@@ -185,6 +186,8 @@ class AsyncAWSClient:
extra_args["Metadata"] = metadata
if tags:
extra_args["Tagging"] = self._create_tag_string(tags)
if content_type:
extra_args["ContentType"] = content_type
await client.upload_file(
Filename=file_path,
Bucket=parsed_uri.bucket,

View File

@@ -304,16 +304,34 @@ class S3Storage(BaseStorage):
file_infos: list[FileInfo] = []
for key in object_keys:
# Playwright's record_video_dir should only contain .webm files.
# Filter defensively in case of unexpected files.
key_lower = key.lower()
if not (key_lower.endswith(".webm") or key_lower.endswith(".mp4")):
LOG.warning(
"Skipping recording file with unsupported extension",
uri=key,
organization_id=organization_id,
browser_session_id=browser_session_id,
)
continue
metadata = {}
modified_at: datetime | None = None
content_length: int | None = None
# Get metadata (including checksum)
try:
object_info = await self.async_client.get_object_info(key)
metadata = object_info.get("Metadata", {})
modified_at = object_info.get("LastModified")
content_length = object_info.get("ContentLength")
except Exception:
LOG.exception("Recording object info retrieval failed", uri=key)
# Skip zero-byte objects (if any incompleted uploads)
if content_length == 0:
continue
# Create FileInfo object
filename = os.path.basename(key)
checksum = metadata.get("sha256_checksum") if metadata else None
@@ -331,6 +349,9 @@ class S3Storage(BaseStorage):
)
file_infos.append(file_info)
# Prefer the newest recording first (S3 list order is not guaranteed).
# Treat None as "oldest".
file_infos.sort(key=lambda f: (f.modified_at is not None, f.modified_at), reverse=True)
return file_infos
async def save_downloaded_files(self, organization_id: str, run_id: str | None) -> None: