browser sessions v2 - backend (#4515)
Signed-off-by: Benji Visser <benji@093b.org>
This commit is contained in:
@@ -289,14 +289,72 @@ class LocalStorage(BaseStorage):
|
||||
return []
|
||||
|
||||
async def list_recordings_in_browser_session(self, organization_id: str, browser_session_id: str) -> list[str]:
|
||||
"""List all recording files for a browser session (not implemented for local storage)."""
|
||||
return []
|
||||
"""List all recording files for a browser session from local storage.
|
||||
|
||||
Videos are synced to the browser_sessions storage path when the session closes.
|
||||
"""
|
||||
videos_base = (
|
||||
Path(self.artifact_path)
|
||||
/ settings.ENV
|
||||
/ organization_id
|
||||
/ "browser_sessions"
|
||||
/ browser_session_id
|
||||
/ "videos"
|
||||
)
|
||||
|
||||
recording_files: list[str] = []
|
||||
if videos_base.exists():
|
||||
for root, _, files in os.walk(videos_base):
|
||||
for file in files:
|
||||
file_path = Path(root) / file
|
||||
recording_files.append(f"file://{file_path}")
|
||||
|
||||
return recording_files
|
||||
|
||||
async def get_shared_recordings_in_browser_session(
|
||||
self, organization_id: str, browser_session_id: str
|
||||
) -> list[FileInfo]:
|
||||
"""Get recording files with URLs for a browser session (not implemented for local storage)."""
|
||||
return []
|
||||
"""Get recording files with URLs for a browser session from local storage."""
|
||||
file_uris = await self.list_recordings_in_browser_session(organization_id, browser_session_id)
|
||||
if not file_uris:
|
||||
return []
|
||||
|
||||
file_infos: list[FileInfo] = []
|
||||
for uri in file_uris:
|
||||
uri_lower = uri.lower()
|
||||
if not (uri_lower.endswith(".webm") or uri_lower.endswith(".mp4")):
|
||||
LOG.warning(
|
||||
"Skipping recording file with unsupported extension",
|
||||
uri=uri,
|
||||
organization_id=organization_id,
|
||||
browser_session_id=browser_session_id,
|
||||
)
|
||||
continue
|
||||
|
||||
file_path = parse_uri_to_path(uri)
|
||||
path_obj = Path(file_path)
|
||||
|
||||
if not path_obj.exists():
|
||||
continue
|
||||
|
||||
file_size = path_obj.stat().st_size
|
||||
if file_size == 0:
|
||||
continue
|
||||
|
||||
modified_at = datetime.fromtimestamp(path_obj.stat().st_mtime)
|
||||
checksum = calculate_sha256_for_file(file_path)
|
||||
filename = path_obj.name
|
||||
|
||||
file_info = FileInfo(
|
||||
url=uri,
|
||||
checksum=checksum,
|
||||
filename=filename,
|
||||
modified_at=modified_at,
|
||||
)
|
||||
file_infos.append(file_info)
|
||||
|
||||
file_infos.sort(key=lambda f: (f.modified_at is not None, f.modified_at), reverse=True)
|
||||
return file_infos
|
||||
|
||||
async def get_downloaded_files(self, organization_id: str, run_id: str | None) -> list[FileInfo]:
|
||||
download_dir = get_download_dir(run_id=run_id)
|
||||
|
||||
@@ -4705,6 +4705,7 @@ class AgentDB(BaseAlchemyDB):
|
||||
status: str | None = None,
|
||||
timeout_minutes: int | None = None,
|
||||
organization_id: str | None = None,
|
||||
completed_at: datetime | None = None,
|
||||
) -> PersistentBrowserSession:
|
||||
try:
|
||||
async with self.Session() as session:
|
||||
@@ -4723,6 +4724,8 @@ class AgentDB(BaseAlchemyDB):
|
||||
persistent_browser_session.status = status
|
||||
if timeout_minutes:
|
||||
persistent_browser_session.timeout_minutes = timeout_minutes
|
||||
if completed_at:
|
||||
persistent_browser_session.completed_at = completed_at
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(persistent_browser_session)
|
||||
@@ -4741,7 +4744,7 @@ class AgentDB(BaseAlchemyDB):
|
||||
self,
|
||||
browser_session_id: str,
|
||||
browser_address: str | None,
|
||||
ip_address: str,
|
||||
ip_address: str | None,
|
||||
ecs_task_arn: str | None,
|
||||
organization_id: str | None = None,
|
||||
) -> None:
|
||||
@@ -4779,6 +4782,47 @@ class AgentDB(BaseAlchemyDB):
|
||||
LOG.error("UnexpectedError", exc_info=True)
|
||||
raise
|
||||
|
||||
async def update_persistent_browser_session_compute_cost(
|
||||
self,
|
||||
session_id: str,
|
||||
organization_id: str,
|
||||
instance_type: str,
|
||||
vcpu_millicores: int,
|
||||
memory_mb: int,
|
||||
duration_ms: int,
|
||||
compute_cost: float,
|
||||
) -> None:
|
||||
"""Update the compute cost fields for a persistent browser session"""
|
||||
try:
|
||||
async with self.Session() as session:
|
||||
persistent_browser_session = (
|
||||
await session.scalars(
|
||||
select(PersistentBrowserSessionModel)
|
||||
.filter_by(persistent_browser_session_id=session_id)
|
||||
.filter_by(organization_id=organization_id)
|
||||
.filter_by(deleted_at=None)
|
||||
)
|
||||
).first()
|
||||
if persistent_browser_session:
|
||||
persistent_browser_session.instance_type = instance_type
|
||||
persistent_browser_session.vcpu_millicores = vcpu_millicores
|
||||
persistent_browser_session.memory_mb = memory_mb
|
||||
persistent_browser_session.duration_ms = duration_ms
|
||||
persistent_browser_session.compute_cost = compute_cost
|
||||
await session.commit()
|
||||
await session.refresh(persistent_browser_session)
|
||||
else:
|
||||
raise NotFoundError(f"PersistentBrowserSession {session_id} not found")
|
||||
except NotFoundError:
|
||||
LOG.error("NotFoundError", exc_info=True)
|
||||
raise
|
||||
except SQLAlchemyError:
|
||||
LOG.error("SQLAlchemyError", exc_info=True)
|
||||
raise
|
||||
except Exception:
|
||||
LOG.error("UnexpectedError", exc_info=True)
|
||||
raise
|
||||
|
||||
async def mark_persistent_browser_session_deleted(self, session_id: str, organization_id: str) -> None:
|
||||
"""Mark a persistent browser session as deleted."""
|
||||
try:
|
||||
|
||||
@@ -856,6 +856,11 @@ class PersistentBrowserSessionModel(Base):
|
||||
proxy_location = Column(String, nullable=True)
|
||||
extensions = Column(JSON, nullable=True)
|
||||
browser_type = Column(String, nullable=True)
|
||||
instance_type = Column(String, nullable=True)
|
||||
vcpu_millicores = Column(Integer, nullable=True)
|
||||
memory_mb = Column(Integer, nullable=True)
|
||||
duration_ms = Column(BigInteger, nullable=True)
|
||||
compute_cost = Column(Numeric, nullable=True)
|
||||
started_at = Column(DateTime, nullable=True)
|
||||
completed_at = Column(DateTime, nullable=True)
|
||||
created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False, index=True)
|
||||
|
||||
@@ -33,7 +33,7 @@ async def await_browser_session(
|
||||
if persistent_browser_session is None:
|
||||
raise Exception(f"Persistent browser session not found for {session_id}")
|
||||
|
||||
LOG.info(
|
||||
LOG.debug(
|
||||
"Checking browser address",
|
||||
session_id=session_id,
|
||||
address=persistent_browser_session.browser_address,
|
||||
|
||||
@@ -281,6 +281,43 @@ async def ask_for_clipboard(vnc_channel: VncChannel) -> None:
|
||||
LOG.exception(f"{class_name} Failed to ask for clipboard via CDP", **vnc_channel.identity)
|
||||
|
||||
|
||||
# TODO(benji): I hate this function. It's messy and gross. Once we remove v1,
|
||||
# we should clean this up.
|
||||
def _build_vnc_url_from_browser_address(browser_address: str) -> str | None:
|
||||
"""
|
||||
Build a routed VNC URL from a V2 K8s routed browser_address.
|
||||
|
||||
V2 K8s routed browser_address format:
|
||||
wss://{domain}/{session_id}/{token}/devtools/browser/{browser_id}
|
||||
|
||||
Returns VNC URL in format:
|
||||
wss://{domain}/vnc/{session_id}/{token}
|
||||
|
||||
Returns None if browser_address is not a V2 routed URL.
|
||||
"""
|
||||
if not browser_address:
|
||||
return None
|
||||
|
||||
parsed = urlparse(browser_address)
|
||||
|
||||
# Check if this looks like a V2 routed URL (wss:// with token in path)
|
||||
if parsed.scheme not in ("wss", "ws"):
|
||||
return None
|
||||
|
||||
# Parse path: /{session_id}/{token}/devtools/browser/{browser_id}
|
||||
path_parts = parsed.path.strip("/").split("/")
|
||||
if len(path_parts) < 4 or path_parts[2] != "devtools":
|
||||
return None
|
||||
|
||||
session_id = path_parts[0]
|
||||
token = path_parts[1]
|
||||
domain = parsed.netloc
|
||||
|
||||
# Build VNC URL with same domain and token
|
||||
scheme = "wss" if parsed.scheme == "wss" else "ws"
|
||||
return f"{scheme}://{domain}/vnc/{session_id}/{token}"
|
||||
|
||||
|
||||
async def loop_stream_vnc(vnc_channel: VncChannel) -> None:
|
||||
"""
|
||||
Actually stream the VNC data between a frontend and a browser.
|
||||
@@ -292,24 +329,28 @@ async def loop_stream_vnc(vnc_channel: VncChannel) -> None:
|
||||
browser_session = vnc_channel.browser_session
|
||||
class_name = vnc_channel.class_name
|
||||
|
||||
if browser_session:
|
||||
if browser_session.ip_address:
|
||||
if ":" in browser_session.ip_address:
|
||||
ip, _ = browser_session.ip_address.split(":")
|
||||
vnc_url = f"ws://{ip}:{vnc_channel.vnc_port}"
|
||||
else:
|
||||
vnc_url = f"ws://{browser_session.ip_address}:{vnc_channel.vnc_port}"
|
||||
else:
|
||||
browser_address = browser_session.browser_address
|
||||
|
||||
parsed_browser_address = urlparse(browser_address)
|
||||
host = parsed_browser_address.hostname
|
||||
vnc_url = f"ws://{host}:{vnc_channel.vnc_port}"
|
||||
else:
|
||||
if not browser_session:
|
||||
raise Exception(f"{class_name} No browser session associated with vnc channel.")
|
||||
|
||||
# NOTE(jdo:streaming-local-dev)
|
||||
# vnc_url = "ws://localhost:6080"
|
||||
# First, check if this is a V2 K8s routed session by examining browser_address
|
||||
# V2 sessions have browser_address like: wss://{domain}/{session_id}/{token}/devtools/...
|
||||
# For these, we need to route VNC through the same nginx proxy
|
||||
routed_vnc_url = _build_vnc_url_from_browser_address(browser_session.browser_address)
|
||||
if routed_vnc_url:
|
||||
vnc_url = routed_vnc_url
|
||||
elif browser_session.ip_address:
|
||||
# V1 ECS sessions: Direct IP connection (ip_address is a public/reachable IP)
|
||||
if ":" in browser_session.ip_address:
|
||||
ip, _ = browser_session.ip_address.split(":")
|
||||
vnc_url = f"ws://{ip}:{vnc_channel.vnc_port}"
|
||||
else:
|
||||
vnc_url = f"ws://{browser_session.ip_address}:{vnc_channel.vnc_port}"
|
||||
else:
|
||||
# Last resort: parse browser_address hostname
|
||||
browser_address = browser_session.browser_address
|
||||
parsed_browser_address = urlparse(browser_address)
|
||||
host = parsed_browser_address.hostname
|
||||
vnc_url = f"ws://{host}:{vnc_channel.vnc_port}"
|
||||
|
||||
LOG.info(
|
||||
f"{class_name} Connecting to vnc url.",
|
||||
@@ -317,7 +358,12 @@ async def loop_stream_vnc(vnc_channel: VncChannel) -> None:
|
||||
**vnc_channel.identity,
|
||||
)
|
||||
|
||||
async with websockets.connect(vnc_url) as novnc_ws:
|
||||
# For routed VNC URLs (wss://), we need to pass the x-api-key header for authentication
|
||||
extra_headers: dict[str, str] = {}
|
||||
if vnc_url.startswith("wss://") and vnc_channel.x_api_key:
|
||||
extra_headers["x-api-key"] = vnc_channel.x_api_key
|
||||
|
||||
async with websockets.connect(vnc_url, additional_headers=extra_headers) as novnc_ws:
|
||||
|
||||
async def frontend_to_browser() -> None:
|
||||
nonlocal class_name
|
||||
|
||||
@@ -5,8 +5,10 @@ Provides WS endpoints for streaming messages to/from our frontend application.
|
||||
import structlog
|
||||
from fastapi import WebSocket
|
||||
|
||||
from skyvern.config import settings
|
||||
from skyvern.forge.sdk.routes.routers import base_router, legacy_base_router
|
||||
from skyvern.forge.sdk.routes.streaming.auth import auth
|
||||
from skyvern.forge.sdk.routes.streaming.auth import _auth as local_auth
|
||||
from skyvern.forge.sdk.routes.streaming.auth import auth as real_auth
|
||||
from skyvern.forge.sdk.routes.streaming.channels.message import (
|
||||
Loops,
|
||||
MessageChannel,
|
||||
@@ -60,6 +62,7 @@ async def messages(
|
||||
client_id: str | None = None,
|
||||
token: str | None = None,
|
||||
) -> None:
|
||||
auth = local_auth if settings.ENV == "local" else real_auth
|
||||
organization_id = await auth(apikey=apikey, token=token, websocket=websocket)
|
||||
|
||||
if not organization_id:
|
||||
@@ -98,7 +101,7 @@ async def messages(
|
||||
)
|
||||
else:
|
||||
LOG.error(
|
||||
"Message channel: no browser_session_id or workflow_run_id provided.",
|
||||
"[WS] messages: no browser_session_id or workflow_run_id provided",
|
||||
client_id=client_id,
|
||||
organization_id=organization_id,
|
||||
)
|
||||
@@ -139,5 +142,4 @@ async def messages(
|
||||
workflow_run_id=workflow_run_id,
|
||||
organization_id=organization_id,
|
||||
)
|
||||
|
||||
await message_channel.close(reason="message-stream-closed")
|
||||
|
||||
@@ -45,12 +45,12 @@ async def verify_browser_session(
|
||||
"""
|
||||
Verify the browser session exists, and is usable.
|
||||
"""
|
||||
|
||||
if settings.ENV == "local":
|
||||
dummy_browser_session = AddressablePersistentBrowserSession(
|
||||
persistent_browser_session_id=browser_session_id,
|
||||
organization_id=organization_id,
|
||||
browser_address="0.0.0.0:9223",
|
||||
ip_address="localhost",
|
||||
created_at=datetime.now(),
|
||||
modified_at=datetime.now(),
|
||||
)
|
||||
@@ -199,6 +199,7 @@ async def verify_workflow_run(
|
||||
persistent_browser_session_id=workflow_run_id,
|
||||
organization_id=organization_id,
|
||||
browser_address="0.0.0.0:9223",
|
||||
ip_address="localhost",
|
||||
created_at=datetime.now(),
|
||||
modified_at=datetime.now(),
|
||||
)
|
||||
|
||||
@@ -12,8 +12,10 @@ NOTE(jdo:streaming-local-dev)
|
||||
import structlog
|
||||
from fastapi import WebSocket
|
||||
|
||||
from skyvern.config import settings
|
||||
from skyvern.forge.sdk.routes.routers import base_router, legacy_base_router
|
||||
from skyvern.forge.sdk.routes.streaming.auth import auth
|
||||
from skyvern.forge.sdk.routes.streaming.auth import _auth as local_auth
|
||||
from skyvern.forge.sdk.routes.streaming.auth import auth as real_auth
|
||||
from skyvern.forge.sdk.routes.streaming.channels.vnc import (
|
||||
Loops,
|
||||
VncChannel,
|
||||
@@ -86,6 +88,7 @@ async def stream(
|
||||
workflow_run_id=workflow_run_id,
|
||||
)
|
||||
|
||||
auth = local_auth if settings.ENV == "local" else real_auth
|
||||
organization_id = await auth(apikey=apikey, token=token, websocket=websocket)
|
||||
|
||||
if not organization_id:
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
from enum import StrEnum
|
||||
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
@@ -48,6 +49,11 @@ class PersistentBrowserSession(BaseModel):
|
||||
status: str | None = None
|
||||
timeout_minutes: int | None = None
|
||||
proxy_location: ProxyLocation | None = None
|
||||
instance_type: str | None = None
|
||||
vcpu_millicores: int | None = None
|
||||
memory_mb: int | None = None
|
||||
duration_ms: int | None = None
|
||||
compute_cost: Decimal | None = None
|
||||
started_at: datetime | None = None
|
||||
completed_at: datetime | None = None
|
||||
created_at: datetime
|
||||
|
||||
Reference in New Issue
Block a user