From 5e1409a5c7fff9cbe6fd2ae334b8310d769e2698 Mon Sep 17 00:00:00 2001 From: LawyZheng Date: Sat, 20 Sep 2025 09:34:25 +0800 Subject: [PATCH] downloaded files in pbs response (#3487) --- skyvern/forge/sdk/artifact/storage/base.py | 6 ++++ skyvern/forge/sdk/artifact/storage/local.py | 5 +++ skyvern/forge/sdk/artifact/storage/s3.py | 30 ++++++++++++++++++ skyvern/forge/sdk/routes/browser_sessions.py | 13 ++++++-- skyvern/webeye/schemas.py | 33 +++++++++++++++++++- 5 files changed, 83 insertions(+), 4 deletions(-) diff --git a/skyvern/forge/sdk/artifact/storage/base.py b/skyvern/forge/sdk/artifact/storage/base.py index 85d3a445..f90a509c 100644 --- a/skyvern/forge/sdk/artifact/storage/base.py +++ b/skyvern/forge/sdk/artifact/storage/base.py @@ -135,6 +135,12 @@ class BaseStorage(ABC): ) -> list[str]: pass + @abstractmethod + async def get_shared_downloaded_files_in_browser_session( + self, organization_id: str, browser_session_id: str + ) -> list[FileInfo]: + pass + @abstractmethod async def save_downloaded_files(self, organization_id: str, run_id: str | None) -> None: pass diff --git a/skyvern/forge/sdk/artifact/storage/local.py b/skyvern/forge/sdk/artifact/storage/local.py index e5801df4..fecba41f 100644 --- a/skyvern/forge/sdk/artifact/storage/local.py +++ b/skyvern/forge/sdk/artifact/storage/local.py @@ -215,6 +215,11 @@ class LocalStorage(BaseStorage): ) -> list[str]: return [] + async def get_shared_downloaded_files_in_browser_session( + self, organization_id: str, browser_session_id: str + ) -> list[FileInfo]: + return [] + async def list_downloading_files_in_browser_session( self, organization_id: str, browser_session_id: str ) -> list[str]: diff --git a/skyvern/forge/sdk/artifact/storage/s3.py b/skyvern/forge/sdk/artifact/storage/s3.py index 27cac5c3..8c705000 100644 --- a/skyvern/forge/sdk/artifact/storage/s3.py +++ b/skyvern/forge/sdk/artifact/storage/s3.py @@ -203,6 +203,36 @@ class S3Storage(BaseStorage): f"s3://{settings.AWS_S3_BUCKET_ARTIFACTS}/{file}" for file in await self.async_client.list_files(uri=uri) ] + async def get_shared_downloaded_files_in_browser_session( + self, organization_id: str, browser_session_id: str + ) -> list[FileInfo]: + object_keys = await self.list_downloaded_files_in_browser_session(organization_id, browser_session_id) + if len(object_keys) == 0: + return [] + + file_infos: list[FileInfo] = [] + for key in object_keys: + # Get metadata (including checksum) + metadata = await self.async_client.get_file_metadata(key, log_exception=False) + + # Create FileInfo object + filename = os.path.basename(key) + checksum = metadata.get("sha256_checksum") if metadata else None + + # Get presigned URL + presigned_urls = await self.async_client.create_presigned_urls([key]) + if not presigned_urls: + continue + + file_info = FileInfo( + url=presigned_urls[0], + checksum=checksum, + filename=metadata.get("original_filename", filename) if metadata else filename, + ) + file_infos.append(file_info) + + return file_infos + async def list_downloading_files_in_browser_session( self, organization_id: str, browser_session_id: str ) -> list[str]: diff --git a/skyvern/forge/sdk/routes/browser_sessions.py b/skyvern/forge/sdk/routes/browser_sessions.py index 076c1343..862f4109 100644 --- a/skyvern/forge/sdk/routes/browser_sessions.py +++ b/skyvern/forge/sdk/routes/browser_sessions.py @@ -1,3 +1,5 @@ +import asyncio + from fastapi import Depends, HTTPException, Path from fastapi.responses import ORJSONResponse @@ -45,7 +47,7 @@ async def create_browser_session( timeout_minutes=browser_session_request.timeout, proxy_location=browser_session_request.proxy_location, ) - return BrowserSessionResponse.from_browser_session(browser_session) + return await BrowserSessionResponse.from_browser_session(browser_session) @base_router.post( @@ -116,7 +118,7 @@ async def get_browser_session( ) if not browser_session: raise HTTPException(status_code=404, detail=f"Browser session {browser_session_id} not found") - return BrowserSessionResponse.from_browser_session(browser_session) + return await BrowserSessionResponse.from_browser_session(browser_session, app.STORAGE) @base_router.get( @@ -145,4 +147,9 @@ async def get_browser_sessions( """Get all active browser sessions for the organization""" analytics.capture("skyvern-oss-agent-browser-sessions-get") browser_sessions = await app.PERSISTENT_SESSIONS_MANAGER.get_active_sessions(current_org.organization_id) - return [BrowserSessionResponse.from_browser_session(browser_session) for browser_session in browser_sessions] + return await asyncio.gather( + *[ + BrowserSessionResponse.from_browser_session(browser_session, app.STORAGE) + for browser_session in browser_sessions + ] + ) diff --git a/skyvern/webeye/schemas.py b/skyvern/webeye/schemas.py index 75017e91..d4a1449c 100644 --- a/skyvern/webeye/schemas.py +++ b/skyvern/webeye/schemas.py @@ -1,12 +1,19 @@ from __future__ import annotations +import asyncio from datetime import datetime +import structlog from pydantic import BaseModel, Field from skyvern.config import settings +from skyvern.constants import GET_DOWNLOADED_FILES_TIMEOUT +from skyvern.forge.sdk.artifact.storage.base import BaseStorage +from skyvern.forge.sdk.schemas.files import FileInfo from skyvern.forge.sdk.schemas.persistent_browser_sessions import PersistentBrowserSession +LOG = structlog.get_logger() + class BrowserSessionResponse(BaseModel): """Response model for browser session information.""" @@ -40,6 +47,10 @@ class BrowserSessionResponse(BaseModel): examples=["https://app.skyvern.com/browser-session/pbs_123456"], ) vnc_streaming_supported: bool = Field(False, description="Whether the browser session supports VNC streaming") + download_path: str | None = Field(None, description="The path where the browser session downloads files") + downloaded_files: list[FileInfo] | None = Field( + None, description="The list of files downloaded by the browser session" + ) started_at: datetime | None = Field(None, description="Timestamp when the session was started") completed_at: datetime | None = Field(None, description="Timestamp when the session was completed") created_at: datetime = Field( @@ -49,7 +60,9 @@ class BrowserSessionResponse(BaseModel): deleted_at: datetime | None = Field(None, description="Timestamp when the session was deleted, if applicable") @classmethod - def from_browser_session(cls, browser_session: PersistentBrowserSession) -> BrowserSessionResponse: + async def from_browser_session( + cls, browser_session: PersistentBrowserSession, storage: BaseStorage | None = None + ) -> BrowserSessionResponse: """ Creates a BrowserSessionResponse from a PersistentBrowserSession object. @@ -62,6 +75,22 @@ class BrowserSessionResponse(BaseModel): app_url = ( f"{settings.SKYVERN_APP_URL.rstrip('/')}/browser-session/{browser_session.persistent_browser_session_id}" ) + download_path = ( + f"/app/downloads/{browser_session.organization_id}/{browser_session.persistent_browser_session_id}" + ) + downloaded_files: list[FileInfo] = [] + if storage: + try: + async with asyncio.timeout(GET_DOWNLOADED_FILES_TIMEOUT): + downloaded_files = await storage.get_shared_downloaded_files_in_browser_session( + organization_id=browser_session.organization_id, + browser_session_id=browser_session.persistent_browser_session_id, + ) + except asyncio.TimeoutError: + LOG.warning( + "Timeout getting downloaded files", browser_session_id=browser_session.persistent_browser_session_id + ) + return cls( browser_session_id=browser_session.persistent_browser_session_id, organization_id=browser_session.organization_id, @@ -76,4 +105,6 @@ class BrowserSessionResponse(BaseModel): created_at=browser_session.created_at, modified_at=browser_session.modified_at, deleted_at=browser_session.deleted_at, + download_path=download_path, + downloaded_files=downloaded_files, )