From 75404962897daf5d000bc8569782012e2d67cc5d Mon Sep 17 00:00:00 2001 From: Marc Kelechava Date: Wed, 17 Dec 2025 13:45:29 -0800 Subject: [PATCH] Browser Profile Persistence for Self-Hosted OSS (#4268) --- skyvern/forge/sdk/workflow/service.py | 8 ++- skyvern/webeye/browser_factory.py | 69 +++++++++++++++++-- skyvern/webeye/persistent_sessions_manager.py | 23 +++++++ 3 files changed, 94 insertions(+), 6 deletions(-) diff --git a/skyvern/forge/sdk/workflow/service.py b/skyvern/forge/sdk/workflow/service.py index bb9930eb..f90b7a50 100644 --- a/skyvern/forge/sdk/workflow/service.py +++ b/skyvern/forge/sdk/workflow/service.py @@ -2730,7 +2730,13 @@ class WorkflowService: await self.persist_video_data(browser_state, workflow, workflow_run) if tasks: await self.persist_debug_artifacts(browser_state, tasks[-1], workflow, workflow_run) - if workflow.persist_browser_session and browser_state.browser_artifacts.browser_session_dir: + # Skip workflow-scoped session save when using browser_profile_id to avoid conflicts + # (profile persistence is handled separately via the profile storage) + if ( + workflow.persist_browser_session + and browser_state.browser_artifacts.browser_session_dir + and not workflow_run.browser_profile_id + ): await app.STORAGE.store_browser_session( workflow_run.organization_id, workflow.workflow_permanent_id, diff --git a/skyvern/webeye/browser_factory.py b/skyvern/webeye/browser_factory.py index 8a851b84..d10afac5 100644 --- a/skyvern/webeye/browser_factory.py +++ b/skyvern/webeye/browser_factory.py @@ -13,7 +13,7 @@ import time import uuid from datetime import datetime from pathlib import Path -from typing import Any, Awaitable, Callable, Protocol +from typing import Any, Awaitable, Callable, Protocol, cast from urllib.parse import parse_qsl, urlparse import psutil @@ -26,6 +26,7 @@ from skyvern.constants import ( SKYVERN_DIR, ) from skyvern.exceptions import UnknownBrowserType, UnknownErrorWhileCreatingBrowserContext +from skyvern.forge import app from skyvern.forge.sdk.api.files import get_download_dir, make_temp_directory from skyvern.forge.sdk.core.skyvern_context import current, ensure_context from skyvern.schemas.runs import ProxyLocation, get_tzinfo_from_proxy @@ -420,7 +421,33 @@ async def _create_headless_chromium( apply_download_behaviour=True, ) - user_data_dir = make_temp_directory(prefix="skyvern_browser_") + # Check for browser_profile_id and load from storage if available + browser_profile_id = cast(str | None, kwargs.get("browser_profile_id")) + organization_id_for_profile = cast(str | None, kwargs.get("organization_id")) + user_data_dir: str | None = None + + if browser_profile_id and organization_id_for_profile: + profile_dir = await app.STORAGE.retrieve_browser_profile( + organization_id=organization_id_for_profile, + profile_id=browser_profile_id, + ) + if profile_dir: + user_data_dir = profile_dir + LOG.info( + "Using browser profile", + browser_profile_id=browser_profile_id, + profile_dir=profile_dir, + ) + else: + LOG.warning( + "Browser profile not found, using temp directory", + browser_profile_id=browser_profile_id, + organization_id=organization_id_for_profile, + ) + + if not user_data_dir: + user_data_dir = make_temp_directory(prefix="skyvern_browser_") + download_dir = initialize_download_dir() BrowserContextFactory.update_chromium_browser_preferences( user_data_dir=user_data_dir, @@ -437,7 +464,10 @@ async def _create_headless_chromium( } ) - browser_artifacts = BrowserContextFactory.build_browser_artifacts(har_path=browser_args["record_har_path"]) + browser_artifacts = BrowserContextFactory.build_browser_artifacts( + har_path=browser_args["record_har_path"], + browser_session_dir=user_data_dir, + ) browser_context = await playwright.chromium.launch_persistent_context(**browser_args) return browser_context, browser_artifacts, None @@ -456,7 +486,33 @@ async def _create_headful_chromium( apply_download_behaviour=True, ) - user_data_dir = make_temp_directory(prefix="skyvern_browser_") + # Check for browser_profile_id and load from storage if available + browser_profile_id = cast(str | None, kwargs.get("browser_profile_id")) + organization_id_for_profile = cast(str | None, kwargs.get("organization_id")) + user_data_dir: str | None = None + + if browser_profile_id and organization_id_for_profile: + profile_dir = await app.STORAGE.retrieve_browser_profile( + organization_id=organization_id_for_profile, + profile_id=browser_profile_id, + ) + if profile_dir: + user_data_dir = profile_dir + LOG.info( + "Using browser profile", + browser_profile_id=browser_profile_id, + profile_dir=profile_dir, + ) + else: + LOG.warning( + "Browser profile not found, using temp directory", + browser_profile_id=browser_profile_id, + organization_id=organization_id_for_profile, + ) + + if not user_data_dir: + user_data_dir = make_temp_directory(prefix="skyvern_browser_") + download_dir = initialize_download_dir() BrowserContextFactory.update_chromium_browser_preferences( user_data_dir=user_data_dir, @@ -473,7 +529,10 @@ async def _create_headful_chromium( "headless": False, } ) - browser_artifacts = BrowserContextFactory.build_browser_artifacts(har_path=browser_args["record_har_path"]) + browser_artifacts = BrowserContextFactory.build_browser_artifacts( + har_path=browser_args["record_har_path"], + browser_session_dir=user_data_dir, + ) browser_context = await playwright.chromium.launch_persistent_context(**browser_args) return browser_context, browser_artifacts, None diff --git a/skyvern/webeye/persistent_sessions_manager.py b/skyvern/webeye/persistent_sessions_manager.py index d07dae6e..ca9748cb 100644 --- a/skyvern/webeye/persistent_sessions_manager.py +++ b/skyvern/webeye/persistent_sessions_manager.py @@ -9,6 +9,7 @@ from playwright._impl._errors import TargetClosedError from skyvern.config import settings from skyvern.exceptions import BrowserSessionNotRenewable, MissingBrowserAddressError +from skyvern.forge import app from skyvern.forge.sdk.db.agent_db import AgentDB from skyvern.forge.sdk.db.polls import wait_on_persistent_browser_address from skyvern.forge.sdk.schemas.persistent_browser_sessions import ( @@ -313,6 +314,28 @@ class PersistentSessionsManager: organization_id=organization_id, session_id=browser_session_id, ) + + # Export session profile before closing (so it can be used to create browser profiles) + browser_artifacts = browser_session.browser_state.browser_artifacts + if browser_artifacts and browser_artifacts.browser_session_dir: + try: + await app.STORAGE.store_browser_profile( + organization_id=organization_id, + profile_id=browser_session_id, + directory=browser_artifacts.browser_session_dir, + ) + LOG.info( + "Exported browser session profile", + browser_session_id=browser_session_id, + organization_id=organization_id, + ) + except Exception: + LOG.exception( + "Failed to export browser session profile", + browser_session_id=browser_session_id, + organization_id=organization_id, + ) + self._browser_sessions.pop(browser_session_id, None) try: