downloaded files in pbs response (#3487)

This commit is contained in:
LawyZheng
2025-09-20 09:34:25 +08:00
committed by GitHub
parent c0082006e2
commit 5e1409a5c7
5 changed files with 83 additions and 4 deletions

View File

@@ -135,6 +135,12 @@ class BaseStorage(ABC):
) -> list[str]:
pass
@abstractmethod
async def get_shared_downloaded_files_in_browser_session(
self, organization_id: str, browser_session_id: str
) -> list[FileInfo]:
pass
@abstractmethod
async def save_downloaded_files(self, organization_id: str, run_id: str | None) -> None:
pass

View File

@@ -215,6 +215,11 @@ class LocalStorage(BaseStorage):
) -> list[str]:
return []
async def get_shared_downloaded_files_in_browser_session(
self, organization_id: str, browser_session_id: str
) -> list[FileInfo]:
return []
async def list_downloading_files_in_browser_session(
self, organization_id: str, browser_session_id: str
) -> list[str]:

View File

@@ -203,6 +203,36 @@ class S3Storage(BaseStorage):
f"s3://{settings.AWS_S3_BUCKET_ARTIFACTS}/{file}" for file in await self.async_client.list_files(uri=uri)
]
async def get_shared_downloaded_files_in_browser_session(
self, organization_id: str, browser_session_id: str
) -> list[FileInfo]:
object_keys = await self.list_downloaded_files_in_browser_session(organization_id, browser_session_id)
if len(object_keys) == 0:
return []
file_infos: list[FileInfo] = []
for key in object_keys:
# Get metadata (including checksum)
metadata = await self.async_client.get_file_metadata(key, log_exception=False)
# Create FileInfo object
filename = os.path.basename(key)
checksum = metadata.get("sha256_checksum") if metadata else None
# Get presigned URL
presigned_urls = await self.async_client.create_presigned_urls([key])
if not presigned_urls:
continue
file_info = FileInfo(
url=presigned_urls[0],
checksum=checksum,
filename=metadata.get("original_filename", filename) if metadata else filename,
)
file_infos.append(file_info)
return file_infos
async def list_downloading_files_in_browser_session(
self, organization_id: str, browser_session_id: str
) -> list[str]:

View File

@@ -1,3 +1,5 @@
import asyncio
from fastapi import Depends, HTTPException, Path
from fastapi.responses import ORJSONResponse
@@ -45,7 +47,7 @@ async def create_browser_session(
timeout_minutes=browser_session_request.timeout,
proxy_location=browser_session_request.proxy_location,
)
return BrowserSessionResponse.from_browser_session(browser_session)
return await BrowserSessionResponse.from_browser_session(browser_session)
@base_router.post(
@@ -116,7 +118,7 @@ async def get_browser_session(
)
if not browser_session:
raise HTTPException(status_code=404, detail=f"Browser session {browser_session_id} not found")
return BrowserSessionResponse.from_browser_session(browser_session)
return await BrowserSessionResponse.from_browser_session(browser_session, app.STORAGE)
@base_router.get(
@@ -145,4 +147,9 @@ async def get_browser_sessions(
"""Get all active browser sessions for the organization"""
analytics.capture("skyvern-oss-agent-browser-sessions-get")
browser_sessions = await app.PERSISTENT_SESSIONS_MANAGER.get_active_sessions(current_org.organization_id)
return [BrowserSessionResponse.from_browser_session(browser_session) for browser_session in browser_sessions]
return await asyncio.gather(
*[
BrowserSessionResponse.from_browser_session(browser_session, app.STORAGE)
for browser_session in browser_sessions
]
)

View File

@@ -1,12 +1,19 @@
from __future__ import annotations
import asyncio
from datetime import datetime
import structlog
from pydantic import BaseModel, Field
from skyvern.config import settings
from skyvern.constants import GET_DOWNLOADED_FILES_TIMEOUT
from skyvern.forge.sdk.artifact.storage.base import BaseStorage
from skyvern.forge.sdk.schemas.files import FileInfo
from skyvern.forge.sdk.schemas.persistent_browser_sessions import PersistentBrowserSession
LOG = structlog.get_logger()
class BrowserSessionResponse(BaseModel):
"""Response model for browser session information."""
@@ -40,6 +47,10 @@ class BrowserSessionResponse(BaseModel):
examples=["https://app.skyvern.com/browser-session/pbs_123456"],
)
vnc_streaming_supported: bool = Field(False, description="Whether the browser session supports VNC streaming")
download_path: str | None = Field(None, description="The path where the browser session downloads files")
downloaded_files: list[FileInfo] | None = Field(
None, description="The list of files downloaded by the browser session"
)
started_at: datetime | None = Field(None, description="Timestamp when the session was started")
completed_at: datetime | None = Field(None, description="Timestamp when the session was completed")
created_at: datetime = Field(
@@ -49,7 +60,9 @@ class BrowserSessionResponse(BaseModel):
deleted_at: datetime | None = Field(None, description="Timestamp when the session was deleted, if applicable")
@classmethod
def from_browser_session(cls, browser_session: PersistentBrowserSession) -> BrowserSessionResponse:
async def from_browser_session(
cls, browser_session: PersistentBrowserSession, storage: BaseStorage | None = None
) -> BrowserSessionResponse:
"""
Creates a BrowserSessionResponse from a PersistentBrowserSession object.
@@ -62,6 +75,22 @@ class BrowserSessionResponse(BaseModel):
app_url = (
f"{settings.SKYVERN_APP_URL.rstrip('/')}/browser-session/{browser_session.persistent_browser_session_id}"
)
download_path = (
f"/app/downloads/{browser_session.organization_id}/{browser_session.persistent_browser_session_id}"
)
downloaded_files: list[FileInfo] = []
if storage:
try:
async with asyncio.timeout(GET_DOWNLOADED_FILES_TIMEOUT):
downloaded_files = await storage.get_shared_downloaded_files_in_browser_session(
organization_id=browser_session.organization_id,
browser_session_id=browser_session.persistent_browser_session_id,
)
except asyncio.TimeoutError:
LOG.warning(
"Timeout getting downloaded files", browser_session_id=browser_session.persistent_browser_session_id
)
return cls(
browser_session_id=browser_session.persistent_browser_session_id,
organization_id=browser_session.organization_id,
@@ -76,4 +105,6 @@ class BrowserSessionResponse(BaseModel):
created_at=browser_session.created_at,
modified_at=browser_session.modified_at,
deleted_at=browser_session.deleted_at,
download_path=download_path,
downloaded_files=downloaded_files,
)