diff --git a/skyvern/forge/sdk/api/aws.py b/skyvern/forge/sdk/api/aws.py index 46ed58dc..e777a48c 100644 --- a/skyvern/forge/sdk/api/aws.py +++ b/skyvern/forge/sdk/api/aws.py @@ -175,6 +175,7 @@ class AsyncAWSClient: metadata: dict | None = None, raise_exception: bool = False, tags: dict[str, str] | None = None, + content_type: str | None = None, ) -> None: # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/client/upload_file.html try: @@ -185,6 +186,8 @@ class AsyncAWSClient: extra_args["Metadata"] = metadata if tags: extra_args["Tagging"] = self._create_tag_string(tags) + if content_type: + extra_args["ContentType"] = content_type await client.upload_file( Filename=file_path, Bucket=parsed_uri.bucket, diff --git a/skyvern/forge/sdk/artifact/storage/s3.py b/skyvern/forge/sdk/artifact/storage/s3.py index 60b773ad..6bb359a0 100644 --- a/skyvern/forge/sdk/artifact/storage/s3.py +++ b/skyvern/forge/sdk/artifact/storage/s3.py @@ -304,16 +304,34 @@ class S3Storage(BaseStorage): file_infos: list[FileInfo] = [] for key in object_keys: + # Playwright's record_video_dir should only contain .webm files. + # Filter defensively in case of unexpected files. + key_lower = key.lower() + if not (key_lower.endswith(".webm") or key_lower.endswith(".mp4")): + LOG.warning( + "Skipping recording file with unsupported extension", + uri=key, + organization_id=organization_id, + browser_session_id=browser_session_id, + ) + continue + metadata = {} modified_at: datetime | None = None + content_length: int | None = None # Get metadata (including checksum) try: object_info = await self.async_client.get_object_info(key) metadata = object_info.get("Metadata", {}) modified_at = object_info.get("LastModified") + content_length = object_info.get("ContentLength") except Exception: LOG.exception("Recording object info retrieval failed", uri=key) + # Skip zero-byte objects (if any incompleted uploads) + if content_length == 0: + continue + # Create FileInfo object filename = os.path.basename(key) checksum = metadata.get("sha256_checksum") if metadata else None @@ -331,6 +349,9 @@ class S3Storage(BaseStorage): ) file_infos.append(file_info) + # Prefer the newest recording first (S3 list order is not guaranteed). + # Treat None as "oldest". + file_infos.sort(key=lambda f: (f.modified_at is not None, f.modified_at), reverse=True) return file_infos async def save_downloaded_files(self, organization_id: str, run_id: str | None) -> None: