browser sessions v2 - backend (#4515)

Signed-off-by: Benji Visser <benji@093b.org>
This commit is contained in:
Benji Visser
2026-01-21 22:27:16 -05:00
committed by GitHub
parent f781a6f0ef
commit b5ff547a3a
15 changed files with 273 additions and 35 deletions

View File

@@ -0,0 +1,35 @@
"""add browser session v2 compute fields
Revision ID: ce791d022652
Revises: a720c991f779
Create Date: 2026-01-22 03:14:43.766916+00:00
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "ce791d022652"
down_revision: Union[str, None] = "a720c991f779"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.add_column("persistent_browser_sessions", sa.Column("instance_type", sa.String(), nullable=True))
op.add_column("persistent_browser_sessions", sa.Column("vcpu_millicores", sa.Integer(), nullable=True))
op.add_column("persistent_browser_sessions", sa.Column("memory_mb", sa.Integer(), nullable=True))
op.add_column("persistent_browser_sessions", sa.Column("duration_ms", sa.BigInteger(), nullable=True))
op.add_column("persistent_browser_sessions", sa.Column("compute_cost", sa.Numeric(), nullable=True))
def downgrade() -> None:
op.drop_column("persistent_browser_sessions", "compute_cost")
op.drop_column("persistent_browser_sessions", "duration_ms")
op.drop_column("persistent_browser_sessions", "memory_mb")
op.drop_column("persistent_browser_sessions", "vcpu_millicores")
op.drop_column("persistent_browser_sessions", "instance_type")

View File

@@ -829,6 +829,14 @@ class BrowserSessionNotFound(SkyvernHTTPException):
) )
class BrowserSessionStartupTimeout(SkyvernHTTPException):
def __init__(self, browser_session_id: str) -> None:
super().__init__(
f"Browser session {browser_session_id} failed to start within the timeout period.",
status_code=status.HTTP_504_GATEWAY_TIMEOUT,
)
class BrowserProfileNotFound(SkyvernHTTPException): class BrowserProfileNotFound(SkyvernHTTPException):
def __init__(self, profile_id: str, organization_id: str | None = None) -> None: def __init__(self, profile_id: str, organization_id: str | None = None) -> None:
message = f"Browser profile {profile_id} not found" message = f"Browser profile {profile_id} not found"

View File

@@ -289,14 +289,72 @@ class LocalStorage(BaseStorage):
return [] return []
async def list_recordings_in_browser_session(self, organization_id: str, browser_session_id: str) -> list[str]: async def list_recordings_in_browser_session(self, organization_id: str, browser_session_id: str) -> list[str]:
"""List all recording files for a browser session (not implemented for local storage).""" """List all recording files for a browser session from local storage.
return []
Videos are synced to the browser_sessions storage path when the session closes.
"""
videos_base = (
Path(self.artifact_path)
/ settings.ENV
/ organization_id
/ "browser_sessions"
/ browser_session_id
/ "videos"
)
recording_files: list[str] = []
if videos_base.exists():
for root, _, files in os.walk(videos_base):
for file in files:
file_path = Path(root) / file
recording_files.append(f"file://{file_path}")
return recording_files
async def get_shared_recordings_in_browser_session( async def get_shared_recordings_in_browser_session(
self, organization_id: str, browser_session_id: str self, organization_id: str, browser_session_id: str
) -> list[FileInfo]: ) -> list[FileInfo]:
"""Get recording files with URLs for a browser session (not implemented for local storage).""" """Get recording files with URLs for a browser session from local storage."""
return [] file_uris = await self.list_recordings_in_browser_session(organization_id, browser_session_id)
if not file_uris:
return []
file_infos: list[FileInfo] = []
for uri in file_uris:
uri_lower = uri.lower()
if not (uri_lower.endswith(".webm") or uri_lower.endswith(".mp4")):
LOG.warning(
"Skipping recording file with unsupported extension",
uri=uri,
organization_id=organization_id,
browser_session_id=browser_session_id,
)
continue
file_path = parse_uri_to_path(uri)
path_obj = Path(file_path)
if not path_obj.exists():
continue
file_size = path_obj.stat().st_size
if file_size == 0:
continue
modified_at = datetime.fromtimestamp(path_obj.stat().st_mtime)
checksum = calculate_sha256_for_file(file_path)
filename = path_obj.name
file_info = FileInfo(
url=uri,
checksum=checksum,
filename=filename,
modified_at=modified_at,
)
file_infos.append(file_info)
file_infos.sort(key=lambda f: (f.modified_at is not None, f.modified_at), reverse=True)
return file_infos
async def get_downloaded_files(self, organization_id: str, run_id: str | None) -> list[FileInfo]: async def get_downloaded_files(self, organization_id: str, run_id: str | None) -> list[FileInfo]:
download_dir = get_download_dir(run_id=run_id) download_dir = get_download_dir(run_id=run_id)

View File

@@ -4705,6 +4705,7 @@ class AgentDB(BaseAlchemyDB):
status: str | None = None, status: str | None = None,
timeout_minutes: int | None = None, timeout_minutes: int | None = None,
organization_id: str | None = None, organization_id: str | None = None,
completed_at: datetime | None = None,
) -> PersistentBrowserSession: ) -> PersistentBrowserSession:
try: try:
async with self.Session() as session: async with self.Session() as session:
@@ -4723,6 +4724,8 @@ class AgentDB(BaseAlchemyDB):
persistent_browser_session.status = status persistent_browser_session.status = status
if timeout_minutes: if timeout_minutes:
persistent_browser_session.timeout_minutes = timeout_minutes persistent_browser_session.timeout_minutes = timeout_minutes
if completed_at:
persistent_browser_session.completed_at = completed_at
await session.commit() await session.commit()
await session.refresh(persistent_browser_session) await session.refresh(persistent_browser_session)
@@ -4741,7 +4744,7 @@ class AgentDB(BaseAlchemyDB):
self, self,
browser_session_id: str, browser_session_id: str,
browser_address: str | None, browser_address: str | None,
ip_address: str, ip_address: str | None,
ecs_task_arn: str | None, ecs_task_arn: str | None,
organization_id: str | None = None, organization_id: str | None = None,
) -> None: ) -> None:
@@ -4779,6 +4782,47 @@ class AgentDB(BaseAlchemyDB):
LOG.error("UnexpectedError", exc_info=True) LOG.error("UnexpectedError", exc_info=True)
raise raise
async def update_persistent_browser_session_compute_cost(
self,
session_id: str,
organization_id: str,
instance_type: str,
vcpu_millicores: int,
memory_mb: int,
duration_ms: int,
compute_cost: float,
) -> None:
"""Update the compute cost fields for a persistent browser session"""
try:
async with self.Session() as session:
persistent_browser_session = (
await session.scalars(
select(PersistentBrowserSessionModel)
.filter_by(persistent_browser_session_id=session_id)
.filter_by(organization_id=organization_id)
.filter_by(deleted_at=None)
)
).first()
if persistent_browser_session:
persistent_browser_session.instance_type = instance_type
persistent_browser_session.vcpu_millicores = vcpu_millicores
persistent_browser_session.memory_mb = memory_mb
persistent_browser_session.duration_ms = duration_ms
persistent_browser_session.compute_cost = compute_cost
await session.commit()
await session.refresh(persistent_browser_session)
else:
raise NotFoundError(f"PersistentBrowserSession {session_id} not found")
except NotFoundError:
LOG.error("NotFoundError", exc_info=True)
raise
except SQLAlchemyError:
LOG.error("SQLAlchemyError", exc_info=True)
raise
except Exception:
LOG.error("UnexpectedError", exc_info=True)
raise
async def mark_persistent_browser_session_deleted(self, session_id: str, organization_id: str) -> None: async def mark_persistent_browser_session_deleted(self, session_id: str, organization_id: str) -> None:
"""Mark a persistent browser session as deleted.""" """Mark a persistent browser session as deleted."""
try: try:

View File

@@ -856,6 +856,11 @@ class PersistentBrowserSessionModel(Base):
proxy_location = Column(String, nullable=True) proxy_location = Column(String, nullable=True)
extensions = Column(JSON, nullable=True) extensions = Column(JSON, nullable=True)
browser_type = Column(String, nullable=True) browser_type = Column(String, nullable=True)
instance_type = Column(String, nullable=True)
vcpu_millicores = Column(Integer, nullable=True)
memory_mb = Column(Integer, nullable=True)
duration_ms = Column(BigInteger, nullable=True)
compute_cost = Column(Numeric, nullable=True)
started_at = Column(DateTime, nullable=True) started_at = Column(DateTime, nullable=True)
completed_at = Column(DateTime, nullable=True) completed_at = Column(DateTime, nullable=True)
created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False, index=True) created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False, index=True)

View File

@@ -33,7 +33,7 @@ async def await_browser_session(
if persistent_browser_session is None: if persistent_browser_session is None:
raise Exception(f"Persistent browser session not found for {session_id}") raise Exception(f"Persistent browser session not found for {session_id}")
LOG.info( LOG.debug(
"Checking browser address", "Checking browser address",
session_id=session_id, session_id=session_id,
address=persistent_browser_session.browser_address, address=persistent_browser_session.browser_address,

View File

@@ -281,6 +281,43 @@ async def ask_for_clipboard(vnc_channel: VncChannel) -> None:
LOG.exception(f"{class_name} Failed to ask for clipboard via CDP", **vnc_channel.identity) LOG.exception(f"{class_name} Failed to ask for clipboard via CDP", **vnc_channel.identity)
# TODO(benji): I hate this function. It's messy and gross. Once we remove v1,
# we should clean this up.
def _build_vnc_url_from_browser_address(browser_address: str) -> str | None:
"""
Build a routed VNC URL from a V2 K8s routed browser_address.
V2 K8s routed browser_address format:
wss://{domain}/{session_id}/{token}/devtools/browser/{browser_id}
Returns VNC URL in format:
wss://{domain}/vnc/{session_id}/{token}
Returns None if browser_address is not a V2 routed URL.
"""
if not browser_address:
return None
parsed = urlparse(browser_address)
# Check if this looks like a V2 routed URL (wss:// with token in path)
if parsed.scheme not in ("wss", "ws"):
return None
# Parse path: /{session_id}/{token}/devtools/browser/{browser_id}
path_parts = parsed.path.strip("/").split("/")
if len(path_parts) < 4 or path_parts[2] != "devtools":
return None
session_id = path_parts[0]
token = path_parts[1]
domain = parsed.netloc
# Build VNC URL with same domain and token
scheme = "wss" if parsed.scheme == "wss" else "ws"
return f"{scheme}://{domain}/vnc/{session_id}/{token}"
async def loop_stream_vnc(vnc_channel: VncChannel) -> None: async def loop_stream_vnc(vnc_channel: VncChannel) -> None:
""" """
Actually stream the VNC data between a frontend and a browser. Actually stream the VNC data between a frontend and a browser.
@@ -292,24 +329,28 @@ async def loop_stream_vnc(vnc_channel: VncChannel) -> None:
browser_session = vnc_channel.browser_session browser_session = vnc_channel.browser_session
class_name = vnc_channel.class_name class_name = vnc_channel.class_name
if browser_session: if not browser_session:
if browser_session.ip_address:
if ":" in browser_session.ip_address:
ip, _ = browser_session.ip_address.split(":")
vnc_url = f"ws://{ip}:{vnc_channel.vnc_port}"
else:
vnc_url = f"ws://{browser_session.ip_address}:{vnc_channel.vnc_port}"
else:
browser_address = browser_session.browser_address
parsed_browser_address = urlparse(browser_address)
host = parsed_browser_address.hostname
vnc_url = f"ws://{host}:{vnc_channel.vnc_port}"
else:
raise Exception(f"{class_name} No browser session associated with vnc channel.") raise Exception(f"{class_name} No browser session associated with vnc channel.")
# NOTE(jdo:streaming-local-dev) # First, check if this is a V2 K8s routed session by examining browser_address
# vnc_url = "ws://localhost:6080" # V2 sessions have browser_address like: wss://{domain}/{session_id}/{token}/devtools/...
# For these, we need to route VNC through the same nginx proxy
routed_vnc_url = _build_vnc_url_from_browser_address(browser_session.browser_address)
if routed_vnc_url:
vnc_url = routed_vnc_url
elif browser_session.ip_address:
# V1 ECS sessions: Direct IP connection (ip_address is a public/reachable IP)
if ":" in browser_session.ip_address:
ip, _ = browser_session.ip_address.split(":")
vnc_url = f"ws://{ip}:{vnc_channel.vnc_port}"
else:
vnc_url = f"ws://{browser_session.ip_address}:{vnc_channel.vnc_port}"
else:
# Last resort: parse browser_address hostname
browser_address = browser_session.browser_address
parsed_browser_address = urlparse(browser_address)
host = parsed_browser_address.hostname
vnc_url = f"ws://{host}:{vnc_channel.vnc_port}"
LOG.info( LOG.info(
f"{class_name} Connecting to vnc url.", f"{class_name} Connecting to vnc url.",
@@ -317,7 +358,12 @@ async def loop_stream_vnc(vnc_channel: VncChannel) -> None:
**vnc_channel.identity, **vnc_channel.identity,
) )
async with websockets.connect(vnc_url) as novnc_ws: # For routed VNC URLs (wss://), we need to pass the x-api-key header for authentication
extra_headers: dict[str, str] = {}
if vnc_url.startswith("wss://") and vnc_channel.x_api_key:
extra_headers["x-api-key"] = vnc_channel.x_api_key
async with websockets.connect(vnc_url, additional_headers=extra_headers) as novnc_ws:
async def frontend_to_browser() -> None: async def frontend_to_browser() -> None:
nonlocal class_name nonlocal class_name

View File

@@ -5,8 +5,10 @@ Provides WS endpoints for streaming messages to/from our frontend application.
import structlog import structlog
from fastapi import WebSocket from fastapi import WebSocket
from skyvern.config import settings
from skyvern.forge.sdk.routes.routers import base_router, legacy_base_router from skyvern.forge.sdk.routes.routers import base_router, legacy_base_router
from skyvern.forge.sdk.routes.streaming.auth import auth from skyvern.forge.sdk.routes.streaming.auth import _auth as local_auth
from skyvern.forge.sdk.routes.streaming.auth import auth as real_auth
from skyvern.forge.sdk.routes.streaming.channels.message import ( from skyvern.forge.sdk.routes.streaming.channels.message import (
Loops, Loops,
MessageChannel, MessageChannel,
@@ -60,6 +62,7 @@ async def messages(
client_id: str | None = None, client_id: str | None = None,
token: str | None = None, token: str | None = None,
) -> None: ) -> None:
auth = local_auth if settings.ENV == "local" else real_auth
organization_id = await auth(apikey=apikey, token=token, websocket=websocket) organization_id = await auth(apikey=apikey, token=token, websocket=websocket)
if not organization_id: if not organization_id:
@@ -98,7 +101,7 @@ async def messages(
) )
else: else:
LOG.error( LOG.error(
"Message channel: no browser_session_id or workflow_run_id provided.", "[WS] messages: no browser_session_id or workflow_run_id provided",
client_id=client_id, client_id=client_id,
organization_id=organization_id, organization_id=organization_id,
) )
@@ -139,5 +142,4 @@ async def messages(
workflow_run_id=workflow_run_id, workflow_run_id=workflow_run_id,
organization_id=organization_id, organization_id=organization_id,
) )
await message_channel.close(reason="message-stream-closed") await message_channel.close(reason="message-stream-closed")

View File

@@ -45,12 +45,12 @@ async def verify_browser_session(
""" """
Verify the browser session exists, and is usable. Verify the browser session exists, and is usable.
""" """
if settings.ENV == "local": if settings.ENV == "local":
dummy_browser_session = AddressablePersistentBrowserSession( dummy_browser_session = AddressablePersistentBrowserSession(
persistent_browser_session_id=browser_session_id, persistent_browser_session_id=browser_session_id,
organization_id=organization_id, organization_id=organization_id,
browser_address="0.0.0.0:9223", browser_address="0.0.0.0:9223",
ip_address="localhost",
created_at=datetime.now(), created_at=datetime.now(),
modified_at=datetime.now(), modified_at=datetime.now(),
) )
@@ -199,6 +199,7 @@ async def verify_workflow_run(
persistent_browser_session_id=workflow_run_id, persistent_browser_session_id=workflow_run_id,
organization_id=organization_id, organization_id=organization_id,
browser_address="0.0.0.0:9223", browser_address="0.0.0.0:9223",
ip_address="localhost",
created_at=datetime.now(), created_at=datetime.now(),
modified_at=datetime.now(), modified_at=datetime.now(),
) )

View File

@@ -12,8 +12,10 @@ NOTE(jdo:streaming-local-dev)
import structlog import structlog
from fastapi import WebSocket from fastapi import WebSocket
from skyvern.config import settings
from skyvern.forge.sdk.routes.routers import base_router, legacy_base_router from skyvern.forge.sdk.routes.routers import base_router, legacy_base_router
from skyvern.forge.sdk.routes.streaming.auth import auth from skyvern.forge.sdk.routes.streaming.auth import _auth as local_auth
from skyvern.forge.sdk.routes.streaming.auth import auth as real_auth
from skyvern.forge.sdk.routes.streaming.channels.vnc import ( from skyvern.forge.sdk.routes.streaming.channels.vnc import (
Loops, Loops,
VncChannel, VncChannel,
@@ -86,6 +88,7 @@ async def stream(
workflow_run_id=workflow_run_id, workflow_run_id=workflow_run_id,
) )
auth = local_auth if settings.ENV == "local" else real_auth
organization_id = await auth(apikey=apikey, token=token, websocket=websocket) organization_id = await auth(apikey=apikey, token=token, websocket=websocket)
if not organization_id: if not organization_id:

View File

@@ -1,4 +1,5 @@
from datetime import datetime from datetime import datetime
from decimal import Decimal
from enum import StrEnum from enum import StrEnum
from pydantic import BaseModel, ConfigDict from pydantic import BaseModel, ConfigDict
@@ -48,6 +49,11 @@ class PersistentBrowserSession(BaseModel):
status: str | None = None status: str | None = None
timeout_minutes: int | None = None timeout_minutes: int | None = None
proxy_location: ProxyLocation | None = None proxy_location: ProxyLocation | None = None
instance_type: str | None = None
vcpu_millicores: int | None = None
memory_mb: int | None = None
duration_ms: int | None = None
compute_cost: Decimal | None = None
started_at: datetime | None = None started_at: datetime | None = None
completed_at: datetime | None = None completed_at: datetime | None = None
created_at: datetime created_at: datetime

View File

@@ -613,8 +613,8 @@ async def _create_cdp_connection_browser(
"--remote-debugging-port=9222", "--remote-debugging-port=9222",
"--no-first-run", "--no-first-run",
"--no-default-browser-check", "--no-default-browser-check",
"--remote-debugging-address=0.0.0.0",
"--user-data-dir=./tmp/user_data_dir", "--user-data-dir=./tmp/user_data_dir",
"--remote-debugging-address=0.0.0.0",
], ],
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, stderr=subprocess.PIPE,

View File

@@ -3,6 +3,7 @@ from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from math import floor from math import floor
from pathlib import Path
import structlog import structlog
from playwright._impl._errors import TargetClosedError from playwright._impl._errors import TargetClosedError
@@ -161,10 +162,13 @@ async def update_status(
organization_id=organization_id, organization_id=organization_id,
browser_status=status, browser_status=status,
) )
completed_at = datetime.now(timezone.utc) if is_final_status(status) else None
persistent_browser_session = await db.update_persistent_browser_session( persistent_browser_session = await db.update_persistent_browser_session(
session_id, session_id,
status=status, status=status,
organization_id=organization_id, organization_id=organization_id,
completed_at=completed_at,
) )
return persistent_browser_session return persistent_browser_session
@@ -264,15 +268,14 @@ class DefaultPersistentSessionsManager(PersistentSessionsManager):
timeout_minutes: int | None = None, timeout_minutes: int | None = None,
extensions: list[Extensions] | None = None, extensions: list[Extensions] | None = None,
browser_type: PersistentBrowserType | None = None, browser_type: PersistentBrowserType | None = None,
is_high_priority: bool = False,
) -> PersistentBrowserSession: ) -> PersistentBrowserSession:
"""Create a new browser session for an organization and return its ID with the browser state.""" """Create a new browser session for an organization and return its ID with the browser state."""
LOG.info( LOG.info(
"Creating new browser session", "Creating new browser session",
organization_id=organization_id, organization_id=organization_id,
) )
return await self.database.create_persistent_browser_session(
browser_session_db = await self.database.create_persistent_browser_session(
organization_id=organization_id, organization_id=organization_id,
runnable_type=runnable_type, runnable_type=runnable_type,
runnable_id=runnable_id, runnable_id=runnable_id,
@@ -282,8 +285,6 @@ class DefaultPersistentSessionsManager(PersistentSessionsManager):
browser_type=browser_type, browser_type=browser_type,
) )
return browser_session_db
async def occupy_browser_session( async def occupy_browser_session(
self, self,
session_id: str, session_id: str,
@@ -324,7 +325,6 @@ class DefaultPersistentSessionsManager(PersistentSessionsManager):
organization_id=organization_id, organization_id=organization_id,
session_id=browser_session_id, session_id=browser_session_id,
) )
# Export session profile before closing (so it can be used to create browser profiles) # Export session profile before closing (so it can be used to create browser profiles)
browser_artifacts = browser_session.browser_state.browser_artifacts browser_artifacts = browser_session.browser_state.browser_artifacts
if browser_artifacts and browser_artifacts.browser_session_dir: if browser_artifacts and browser_artifacts.browser_session_dir:
@@ -346,6 +346,29 @@ class DefaultPersistentSessionsManager(PersistentSessionsManager):
organization_id=organization_id, organization_id=organization_id,
) )
if browser_artifacts and browser_artifacts.video_artifacts:
for video_artifact in browser_artifacts.video_artifacts:
if video_artifact.video_path:
try:
video_path = Path(video_artifact.video_path)
if video_path.exists():
date = video_path.parent.name
await app.STORAGE.sync_browser_session_file(
organization_id=organization_id,
browser_session_id=browser_session_id,
artifact_type="videos",
local_file_path=str(video_path),
remote_path=video_path.name,
date=date,
)
except Exception:
LOG.exception(
"Failed to sync video recording",
browser_session_id=browser_session_id,
organization_id=organization_id,
video_path=video_artifact.video_path,
)
self._browser_sessions.pop(browser_session_id, None) self._browser_sessions.pop(browser_session_id, None)
try: try:

View File

@@ -67,6 +67,7 @@ class PersistentSessionsManager(Protocol):
timeout_minutes: int | None = None, timeout_minutes: int | None = None,
extensions: list[Extensions] | None = None, extensions: list[Extensions] | None = None,
browser_type: PersistentBrowserType | None = None, browser_type: PersistentBrowserType | None = None,
is_high_priority: bool = False,
) -> PersistentBrowserSession: ) -> PersistentBrowserSession:
"""Create a new browser session.""" """Create a new browser session."""
... ...

View File

@@ -27,6 +27,11 @@ class BrowserSessionResponse(BaseModel):
examples=["pbs_123456"], examples=["pbs_123456"],
) )
organization_id: str = Field(description="ID of the organization that owns this session") organization_id: str = Field(description="ID of the organization that owns this session")
status: str | None = Field(
None,
description="Current status of the browser session",
examples=["created", "running", "completed", "failed", "timeout"],
)
runnable_type: str | None = Field( runnable_type: str | None = Field(
None, None,
description="Type of the current runnable associated with this session (workflow, task etc)", description="Type of the current runnable associated with this session (workflow, task etc)",
@@ -124,6 +129,7 @@ class BrowserSessionResponse(BaseModel):
return cls( return cls(
browser_session_id=browser_session.persistent_browser_session_id, browser_session_id=browser_session.persistent_browser_session_id,
organization_id=browser_session.organization_id, organization_id=browser_session.organization_id,
status=browser_session.status,
runnable_type=browser_session.runnable_type, runnable_type=browser_session.runnable_type,
runnable_id=browser_session.runnable_id, runnable_id=browser_session.runnable_id,
timeout=browser_session.timeout_minutes, timeout=browser_session.timeout_minutes,