downloaded files in pbs response (#3487)

This commit is contained in:
LawyZheng
2025-09-20 09:34:25 +08:00
committed by GitHub
parent c0082006e2
commit 5e1409a5c7
5 changed files with 83 additions and 4 deletions

View File

@@ -135,6 +135,12 @@ class BaseStorage(ABC):
) -> list[str]: ) -> list[str]:
pass pass
@abstractmethod
async def get_shared_downloaded_files_in_browser_session(
self, organization_id: str, browser_session_id: str
) -> list[FileInfo]:
pass
@abstractmethod @abstractmethod
async def save_downloaded_files(self, organization_id: str, run_id: str | None) -> None: async def save_downloaded_files(self, organization_id: str, run_id: str | None) -> None:
pass pass

View File

@@ -215,6 +215,11 @@ class LocalStorage(BaseStorage):
) -> list[str]: ) -> list[str]:
return [] return []
async def get_shared_downloaded_files_in_browser_session(
self, organization_id: str, browser_session_id: str
) -> list[FileInfo]:
return []
async def list_downloading_files_in_browser_session( async def list_downloading_files_in_browser_session(
self, organization_id: str, browser_session_id: str self, organization_id: str, browser_session_id: str
) -> list[str]: ) -> list[str]:

View File

@@ -203,6 +203,36 @@ class S3Storage(BaseStorage):
f"s3://{settings.AWS_S3_BUCKET_ARTIFACTS}/{file}" for file in await self.async_client.list_files(uri=uri) f"s3://{settings.AWS_S3_BUCKET_ARTIFACTS}/{file}" for file in await self.async_client.list_files(uri=uri)
] ]
async def get_shared_downloaded_files_in_browser_session(
self, organization_id: str, browser_session_id: str
) -> list[FileInfo]:
object_keys = await self.list_downloaded_files_in_browser_session(organization_id, browser_session_id)
if len(object_keys) == 0:
return []
file_infos: list[FileInfo] = []
for key in object_keys:
# Get metadata (including checksum)
metadata = await self.async_client.get_file_metadata(key, log_exception=False)
# Create FileInfo object
filename = os.path.basename(key)
checksum = metadata.get("sha256_checksum") if metadata else None
# Get presigned URL
presigned_urls = await self.async_client.create_presigned_urls([key])
if not presigned_urls:
continue
file_info = FileInfo(
url=presigned_urls[0],
checksum=checksum,
filename=metadata.get("original_filename", filename) if metadata else filename,
)
file_infos.append(file_info)
return file_infos
async def list_downloading_files_in_browser_session( async def list_downloading_files_in_browser_session(
self, organization_id: str, browser_session_id: str self, organization_id: str, browser_session_id: str
) -> list[str]: ) -> list[str]:

View File

@@ -1,3 +1,5 @@
import asyncio
from fastapi import Depends, HTTPException, Path from fastapi import Depends, HTTPException, Path
from fastapi.responses import ORJSONResponse from fastapi.responses import ORJSONResponse
@@ -45,7 +47,7 @@ async def create_browser_session(
timeout_minutes=browser_session_request.timeout, timeout_minutes=browser_session_request.timeout,
proxy_location=browser_session_request.proxy_location, proxy_location=browser_session_request.proxy_location,
) )
return BrowserSessionResponse.from_browser_session(browser_session) return await BrowserSessionResponse.from_browser_session(browser_session)
@base_router.post( @base_router.post(
@@ -116,7 +118,7 @@ async def get_browser_session(
) )
if not browser_session: if not browser_session:
raise HTTPException(status_code=404, detail=f"Browser session {browser_session_id} not found") raise HTTPException(status_code=404, detail=f"Browser session {browser_session_id} not found")
return BrowserSessionResponse.from_browser_session(browser_session) return await BrowserSessionResponse.from_browser_session(browser_session, app.STORAGE)
@base_router.get( @base_router.get(
@@ -145,4 +147,9 @@ async def get_browser_sessions(
"""Get all active browser sessions for the organization""" """Get all active browser sessions for the organization"""
analytics.capture("skyvern-oss-agent-browser-sessions-get") analytics.capture("skyvern-oss-agent-browser-sessions-get")
browser_sessions = await app.PERSISTENT_SESSIONS_MANAGER.get_active_sessions(current_org.organization_id) browser_sessions = await app.PERSISTENT_SESSIONS_MANAGER.get_active_sessions(current_org.organization_id)
return [BrowserSessionResponse.from_browser_session(browser_session) for browser_session in browser_sessions] return await asyncio.gather(
*[
BrowserSessionResponse.from_browser_session(browser_session, app.STORAGE)
for browser_session in browser_sessions
]
)

View File

@@ -1,12 +1,19 @@
from __future__ import annotations from __future__ import annotations
import asyncio
from datetime import datetime from datetime import datetime
import structlog
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from skyvern.config import settings from skyvern.config import settings
from skyvern.constants import GET_DOWNLOADED_FILES_TIMEOUT
from skyvern.forge.sdk.artifact.storage.base import BaseStorage
from skyvern.forge.sdk.schemas.files import FileInfo
from skyvern.forge.sdk.schemas.persistent_browser_sessions import PersistentBrowserSession from skyvern.forge.sdk.schemas.persistent_browser_sessions import PersistentBrowserSession
LOG = structlog.get_logger()
class BrowserSessionResponse(BaseModel): class BrowserSessionResponse(BaseModel):
"""Response model for browser session information.""" """Response model for browser session information."""
@@ -40,6 +47,10 @@ class BrowserSessionResponse(BaseModel):
examples=["https://app.skyvern.com/browser-session/pbs_123456"], examples=["https://app.skyvern.com/browser-session/pbs_123456"],
) )
vnc_streaming_supported: bool = Field(False, description="Whether the browser session supports VNC streaming") vnc_streaming_supported: bool = Field(False, description="Whether the browser session supports VNC streaming")
download_path: str | None = Field(None, description="The path where the browser session downloads files")
downloaded_files: list[FileInfo] | None = Field(
None, description="The list of files downloaded by the browser session"
)
started_at: datetime | None = Field(None, description="Timestamp when the session was started") started_at: datetime | None = Field(None, description="Timestamp when the session was started")
completed_at: datetime | None = Field(None, description="Timestamp when the session was completed") completed_at: datetime | None = Field(None, description="Timestamp when the session was completed")
created_at: datetime = Field( created_at: datetime = Field(
@@ -49,7 +60,9 @@ class BrowserSessionResponse(BaseModel):
deleted_at: datetime | None = Field(None, description="Timestamp when the session was deleted, if applicable") deleted_at: datetime | None = Field(None, description="Timestamp when the session was deleted, if applicable")
@classmethod @classmethod
def from_browser_session(cls, browser_session: PersistentBrowserSession) -> BrowserSessionResponse: async def from_browser_session(
cls, browser_session: PersistentBrowserSession, storage: BaseStorage | None = None
) -> BrowserSessionResponse:
""" """
Creates a BrowserSessionResponse from a PersistentBrowserSession object. Creates a BrowserSessionResponse from a PersistentBrowserSession object.
@@ -62,6 +75,22 @@ class BrowserSessionResponse(BaseModel):
app_url = ( app_url = (
f"{settings.SKYVERN_APP_URL.rstrip('/')}/browser-session/{browser_session.persistent_browser_session_id}" f"{settings.SKYVERN_APP_URL.rstrip('/')}/browser-session/{browser_session.persistent_browser_session_id}"
) )
download_path = (
f"/app/downloads/{browser_session.organization_id}/{browser_session.persistent_browser_session_id}"
)
downloaded_files: list[FileInfo] = []
if storage:
try:
async with asyncio.timeout(GET_DOWNLOADED_FILES_TIMEOUT):
downloaded_files = await storage.get_shared_downloaded_files_in_browser_session(
organization_id=browser_session.organization_id,
browser_session_id=browser_session.persistent_browser_session_id,
)
except asyncio.TimeoutError:
LOG.warning(
"Timeout getting downloaded files", browser_session_id=browser_session.persistent_browser_session_id
)
return cls( return cls(
browser_session_id=browser_session.persistent_browser_session_id, browser_session_id=browser_session.persistent_browser_session_id,
organization_id=browser_session.organization_id, organization_id=browser_session.organization_id,
@@ -76,4 +105,6 @@ class BrowserSessionResponse(BaseModel):
created_at=browser_session.created_at, created_at=browser_session.created_at,
modified_at=browser_session.modified_at, modified_at=browser_session.modified_at,
deleted_at=browser_session.deleted_at, deleted_at=browser_session.deleted_at,
download_path=download_path,
downloaded_files=downloaded_files,
) )