From 698d29036dd3ef5406777115345f3a5a87f994a2 Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Tue, 23 Sep 2025 16:54:34 -0400 Subject: [PATCH] add recordings to browser session object (#3503) --- skyvern/forge/sdk/artifact/storage/base.py | 10 +++++ skyvern/forge/sdk/artifact/storage/local.py | 10 +++++ skyvern/forge/sdk/artifact/storage/s3.py | 46 +++++++++++++++++++++ skyvern/webeye/schemas.py | 16 +++++++ 4 files changed, 82 insertions(+) diff --git a/skyvern/forge/sdk/artifact/storage/base.py b/skyvern/forge/sdk/artifact/storage/base.py index f90a509c..efd33016 100644 --- a/skyvern/forge/sdk/artifact/storage/base.py +++ b/skyvern/forge/sdk/artifact/storage/base.py @@ -141,6 +141,16 @@ class BaseStorage(ABC): ) -> list[FileInfo]: pass + @abstractmethod + async def list_recordings_in_browser_session(self, organization_id: str, browser_session_id: str) -> list[str]: + pass + + @abstractmethod + async def get_shared_recordings_in_browser_session( + self, organization_id: str, browser_session_id: str + ) -> list[FileInfo]: + pass + @abstractmethod async def save_downloaded_files(self, organization_id: str, run_id: str | None) -> None: pass diff --git a/skyvern/forge/sdk/artifact/storage/local.py b/skyvern/forge/sdk/artifact/storage/local.py index fecba41f..bdb70ffb 100644 --- a/skyvern/forge/sdk/artifact/storage/local.py +++ b/skyvern/forge/sdk/artifact/storage/local.py @@ -225,6 +225,16 @@ class LocalStorage(BaseStorage): ) -> list[str]: return [] + async def list_recordings_in_browser_session(self, organization_id: str, browser_session_id: str) -> list[str]: + """List all recording files for a browser session (not implemented for local storage).""" + return [] + + async def get_shared_recordings_in_browser_session( + self, organization_id: str, browser_session_id: str + ) -> list[FileInfo]: + """Get recording files with URLs for a browser session (not implemented for local storage).""" + return [] + async def get_downloaded_files(self, organization_id: str, run_id: str | None) -> list[FileInfo]: download_dir = get_download_dir(run_id=run_id) file_infos: list[FileInfo] = [] diff --git a/skyvern/forge/sdk/artifact/storage/s3.py b/skyvern/forge/sdk/artifact/storage/s3.py index 2ee9b39d..4799c863 100644 --- a/skyvern/forge/sdk/artifact/storage/s3.py +++ b/skyvern/forge/sdk/artifact/storage/s3.py @@ -250,6 +250,52 @@ class S3Storage(BaseStorage): ] return [file for file in files if file.endswith(BROWSER_DOWNLOADING_SUFFIX)] + async def list_recordings_in_browser_session(self, organization_id: str, browser_session_id: str) -> list[str]: + """List all recording files for a browser session from S3.""" + uri = f"s3://{settings.AWS_S3_BUCKET_ARTIFACTS}/v1/{settings.ENV}/{organization_id}/browser_sessions/{browser_session_id}/videos" + return [ + f"s3://{settings.AWS_S3_BUCKET_ARTIFACTS}/{file}" for file in await self.async_client.list_files(uri=uri) + ] + + async def get_shared_recordings_in_browser_session( + self, organization_id: str, browser_session_id: str + ) -> list[FileInfo]: + """Get recording files with presigned URLs for a browser session.""" + object_keys = await self.list_recordings_in_browser_session(organization_id, browser_session_id) + if len(object_keys) == 0: + return [] + + file_infos: list[FileInfo] = [] + for key in object_keys: + metadata = {} + modified_at: datetime | None = None + # Get metadata (including checksum) + try: + object_info = await self.async_client.get_object_info(key) + metadata = object_info.get("Metadata", {}) + modified_at = object_info.get("LastModified") + except Exception: + LOG.exception("Recording object info retrieval failed", uri=key) + + # Create FileInfo object + filename = os.path.basename(key) + checksum = metadata.get("sha256_checksum") if metadata else None + + # Get presigned URL + presigned_urls = await self.async_client.create_presigned_urls([key]) + if not presigned_urls: + continue + + file_info = FileInfo( + url=presigned_urls[0], + checksum=checksum, + filename=metadata.get("original_filename", filename) if metadata else filename, + modified_at=modified_at, + ) + file_infos.append(file_info) + + return file_infos + async def save_downloaded_files(self, organization_id: str, run_id: str | None) -> None: download_dir = get_download_dir(run_id=run_id) files = os.listdir(download_dir) diff --git a/skyvern/webeye/schemas.py b/skyvern/webeye/schemas.py index faec6a4c..00694de9 100644 --- a/skyvern/webeye/schemas.py +++ b/skyvern/webeye/schemas.py @@ -51,6 +51,7 @@ class BrowserSessionResponse(BaseModel): downloaded_files: list[FileInfo] | None = Field( None, description="The list of files downloaded by the browser session" ) + recordings: list[FileInfo] | None = Field(None, description="The list of video recordings from the browser session") started_at: datetime | None = Field(None, description="Timestamp when the session was started") completed_at: datetime | None = Field(None, description="Timestamp when the session was completed") created_at: datetime = Field( @@ -79,6 +80,7 @@ class BrowserSessionResponse(BaseModel): f"/app/downloads/{browser_session.organization_id}/{browser_session.persistent_browser_session_id}" ) downloaded_files: list[FileInfo] = [] + recordings: list[FileInfo] = [] if storage: try: async with asyncio.timeout(GET_DOWNLOADED_FILES_TIMEOUT): @@ -91,8 +93,21 @@ class BrowserSessionResponse(BaseModel): "Timeout getting downloaded files", browser_session_id=browser_session.persistent_browser_session_id ) + try: + async with asyncio.timeout(GET_DOWNLOADED_FILES_TIMEOUT): + recordings = await storage.get_shared_recordings_in_browser_session( + organization_id=browser_session.organization_id, + browser_session_id=browser_session.persistent_browser_session_id, + ) + except asyncio.TimeoutError: + LOG.warning( + "Timeout getting recordings", browser_session_id=browser_session.persistent_browser_session_id + ) + # Sort downloaded files by modified_at in descending order (newest first) downloaded_files.sort(key=lambda x: x.modified_at or datetime.min, reverse=True) + # Sort recordings by modified_at in descending order (newest first) + recordings.sort(key=lambda x: x.modified_at or datetime.min, reverse=True) return cls( browser_session_id=browser_session.persistent_browser_session_id, @@ -110,4 +125,5 @@ class BrowserSessionResponse(BaseModel): deleted_at=browser_session.deleted_at, download_path=download_path, downloaded_files=downloaded_files, + recordings=recordings, )