From 3db5ec6cd746c783f3ab447dab9a1a4cfc656d6e Mon Sep 17 00:00:00 2001 From: Marc Kelechava Date: Thu, 6 Nov 2025 01:24:39 -0800 Subject: [PATCH] [SKY-6974] Browser Profiles [2/3] Marc/backend browser session profiles (#3923) --- ...1_06_0805-89d531e8b4ed_oss_sync_alembic.py | 33 ++ ...1_06_0857-7581811d57b1_oss_sync_alembic.py | 27 ++ ...1_06_0914-39e16cf92225_oss_sync_alembic.py | 27 ++ skyvern/client/types/workflow_run_response.py | 5 + .../core/script_generations/skyvern_page.py | 1 + skyvern/forge/agent.py | 1 + skyvern/forge/sdk/artifact/storage/local.py | 86 +++- skyvern/forge/sdk/db/client.py | 2 + skyvern/forge/sdk/db/models.py | 1 + skyvern/forge/sdk/db/utils.py | 1 + skyvern/forge/sdk/routes/__init__.py | 1 + skyvern/forge/sdk/routes/agent_protocol.py | 1 + skyvern/forge/sdk/routes/browser_profiles.py | 387 ++++++++++++++++++ skyvern/forge/sdk/routes/run_blocks.py | 3 + skyvern/forge/sdk/schemas/browser_profiles.py | 19 +- skyvern/forge/sdk/workflow/models/block.py | 7 +- skyvern/forge/sdk/workflow/models/workflow.py | 11 +- skyvern/forge/sdk/workflow/service.py | 33 +- skyvern/schemas/runs.py | 17 +- skyvern/services/block_service.py | 1 + skyvern/services/task_v2_service.py | 10 +- skyvern/services/workflow_service.py | 2 + skyvern/webeye/browser_factory.py | 6 + skyvern/webeye/browser_manager.py | 7 + 24 files changed, 662 insertions(+), 27 deletions(-) create mode 100644 alembic/versions/2025_11_06_0805-89d531e8b4ed_oss_sync_alembic.py create mode 100644 alembic/versions/2025_11_06_0857-7581811d57b1_oss_sync_alembic.py create mode 100644 alembic/versions/2025_11_06_0914-39e16cf92225_oss_sync_alembic.py create mode 100644 skyvern/forge/sdk/routes/browser_profiles.py diff --git a/alembic/versions/2025_11_06_0805-89d531e8b4ed_oss_sync_alembic.py b/alembic/versions/2025_11_06_0805-89d531e8b4ed_oss_sync_alembic.py new file mode 100644 index 00000000..1375015b --- /dev/null +++ b/alembic/versions/2025_11_06_0805-89d531e8b4ed_oss_sync_alembic.py @@ -0,0 +1,33 @@ +"""oss sync alembic + +Revision ID: 89d531e8b4ed +Revises: 2c34dee3304e +Create Date: 2025-11-06 08:05:01.832765+00:00 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "89d531e8b4ed" +down_revision: Union[str, None] = "2c34dee3304e" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("workflow_runs", sa.Column("browser_profile_id", sa.String(), nullable=True)) + op.create_index(op.f("ix_workflow_runs_browser_profile_id"), "workflow_runs", ["browser_profile_id"], unique=False) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index(op.f("ix_workflow_runs_browser_profile_id"), table_name="workflow_runs") + op.drop_column("workflow_runs", "browser_profile_id") + # ### end Alembic commands ### diff --git a/alembic/versions/2025_11_06_0857-7581811d57b1_oss_sync_alembic.py b/alembic/versions/2025_11_06_0857-7581811d57b1_oss_sync_alembic.py new file mode 100644 index 00000000..eab38ff0 --- /dev/null +++ b/alembic/versions/2025_11_06_0857-7581811d57b1_oss_sync_alembic.py @@ -0,0 +1,27 @@ +"""oss sync alembic + +Revision ID: 7581811d57b1 +Revises: 89d531e8b4ed +Create Date: 2025-11-06 08:57:29.949001+00:00 + +""" + +from typing import Sequence, Union + +# revision identifiers, used by Alembic. +revision: str = "7581811d57b1" +down_revision: Union[str, None] = "89d531e8b4ed" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + pass + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + pass + # ### end Alembic commands ### diff --git a/alembic/versions/2025_11_06_0914-39e16cf92225_oss_sync_alembic.py b/alembic/versions/2025_11_06_0914-39e16cf92225_oss_sync_alembic.py new file mode 100644 index 00000000..4e006eaa --- /dev/null +++ b/alembic/versions/2025_11_06_0914-39e16cf92225_oss_sync_alembic.py @@ -0,0 +1,27 @@ +"""oss sync alembic + +Revision ID: 39e16cf92225 +Revises: 7581811d57b1 +Create Date: 2025-11-06 09:14:11.912223+00:00 + +""" + +from typing import Sequence, Union + +# revision identifiers, used by Alembic. +revision: str = "39e16cf92225" +down_revision: Union[str, None] = "7581811d57b1" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + pass + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + pass + # ### end Alembic commands ### diff --git a/skyvern/client/types/workflow_run_response.py b/skyvern/client/types/workflow_run_response.py index 71cd462e..0459d13a 100644 --- a/skyvern/client/types/workflow_run_response.py +++ b/skyvern/client/types/workflow_run_response.py @@ -83,6 +83,11 @@ class WorkflowRunResponse(UniversalBaseModel): ID of the Skyvern persistent browser session used for this run """ + browser_profile_id: typing.Optional[str] = pydantic.Field(default=None) + """ + ID of the browser profile used for this run + """ + max_screenshot_scrolls: typing.Optional[int] = pydantic.Field(default=None) """ The maximum number of scrolls for the post action screenshot. When it's None or 0, it takes the current viewpoint screenshot diff --git a/skyvern/core/script_generations/skyvern_page.py b/skyvern/core/script_generations/skyvern_page.py index c92aece8..ad3725ba 100644 --- a/skyvern/core/script_generations/skyvern_page.py +++ b/skyvern/core/script_generations/skyvern_page.py @@ -95,6 +95,7 @@ class SkyvernPage: browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run( workflow_run=workflow_run, browser_session_id=browser_session_id, + browser_profile_id=workflow_run.browser_profile_id, ) else: raise WorkflowRunNotFound(workflow_run_id=context.workflow_run_id) diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index 77614ae9..a3a3c69c 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -1918,6 +1918,7 @@ class ForgeAgent: workflow_run=workflow_run, url=task.url, browser_session_id=browser_session_id, + browser_profile_id=workflow_run.browser_profile_id, ) else: browser_state = await app.BROWSER_MANAGER.get_or_create_for_task( diff --git a/skyvern/forge/sdk/artifact/storage/local.py b/skyvern/forge/sdk/artifact/storage/local.py index b4637555..d39e5011 100644 --- a/skyvern/forge/sdk/artifact/storage/local.py +++ b/skyvern/forge/sdk/artifact/storage/local.py @@ -180,58 +180,92 @@ class LocalStorage(BaseStorage): return None async def store_browser_session(self, organization_id: str, workflow_permanent_id: str, directory: str) -> None: - stored_folder_path = Path(settings.BROWSER_SESSION_BASE_PATH) / organization_id / workflow_permanent_id - if directory == str(stored_folder_path): + stored_folder_path = self._resolve_browser_storage_path(organization_id, workflow_permanent_id) + if stored_folder_path is None: + LOG.warning( + "Refused to store browser session outside storage base path", + organization_id=organization_id, + workflow_permanent_id=workflow_permanent_id, + base_path=settings.BROWSER_SESSION_BASE_PATH, + ) + return + source_directory = Path(directory).resolve() + if source_directory == stored_folder_path: return self._create_directories_if_not_exists(stored_folder_path) LOG.info( "Storing browser session locally", organization_id=organization_id, workflow_permanent_id=workflow_permanent_id, - directory=directory, - browser_session_path=stored_folder_path, + directory=str(source_directory), + browser_session_path=str(stored_folder_path), ) # Copy all files from the directory to the stored folder - for root, _, files in os.walk(directory): + for root, _, files in os.walk(source_directory): for file in files: source_file_path = Path(root) / file - relative_path = source_file_path.relative_to(directory) + relative_path = source_file_path.relative_to(source_directory) target_file_path = stored_folder_path / relative_path self._create_directories_if_not_exists(target_file_path) shutil.copy2(source_file_path, target_file_path) async def retrieve_browser_session(self, organization_id: str, workflow_permanent_id: str) -> str | None: - stored_folder_path = Path(settings.BROWSER_SESSION_BASE_PATH) / organization_id / workflow_permanent_id + stored_folder_path = self._resolve_browser_storage_path(organization_id, workflow_permanent_id) + if stored_folder_path is None: + LOG.warning( + "Refused to retrieve browser session outside storage base path", + organization_id=organization_id, + workflow_permanent_id=workflow_permanent_id, + base_path=settings.BROWSER_SESSION_BASE_PATH, + ) + return None if not stored_folder_path.exists(): return None return str(stored_folder_path) async def store_browser_profile(self, organization_id: str, profile_id: str, directory: str) -> None: """Store browser profile locally.""" - stored_folder_path = Path(settings.BROWSER_SESSION_BASE_PATH) / organization_id / "profiles" / profile_id - if directory == str(stored_folder_path): + stored_folder_path = self._resolve_browser_storage_path(organization_id, "profiles", profile_id) + if stored_folder_path is None: + LOG.warning( + "Refused to store browser profile outside storage base path", + organization_id=organization_id, + profile_id=profile_id, + base_path=settings.BROWSER_SESSION_BASE_PATH, + ) + return + source_directory = Path(directory).resolve() + if source_directory == stored_folder_path: return self._create_directories_if_not_exists(stored_folder_path) LOG.info( "Storing browser profile locally", organization_id=organization_id, profile_id=profile_id, - directory=directory, - browser_profile_path=stored_folder_path, + directory=str(source_directory), + browser_profile_path=str(stored_folder_path), ) - for root, _, files in os.walk(directory): + for root, _, files in os.walk(source_directory): for file in files: source_file_path = Path(root) / file - relative_path = source_file_path.relative_to(directory) + relative_path = source_file_path.relative_to(source_directory) target_file_path = stored_folder_path / relative_path self._create_directories_if_not_exists(target_file_path) shutil.copy2(source_file_path, target_file_path) async def retrieve_browser_profile(self, organization_id: str, profile_id: str) -> str | None: """Retrieve browser profile from local storage.""" - stored_folder_path = Path(settings.BROWSER_SESSION_BASE_PATH) / organization_id / "profiles" / profile_id + stored_folder_path = self._resolve_browser_storage_path(organization_id, "profiles", profile_id) + if stored_folder_path is None: + LOG.warning( + "Refused to retrieve browser profile outside storage base path", + organization_id=organization_id, + profile_id=profile_id, + base_path=settings.BROWSER_SESSION_BASE_PATH, + ) + return None if not stored_folder_path.exists(): return None return str(stored_folder_path) @@ -282,6 +316,30 @@ class LocalStorage(BaseStorage): path = path_including_file_name.parent path.mkdir(parents=True, exist_ok=True) + @staticmethod + def _resolve_browser_storage_path(*relative_parts: str) -> Path | None: + if not relative_parts: + return None + normalized_parts: list[str] = [] + for part in relative_parts: + if part in {"", "."}: + return None + part_path = Path(part) + if part_path.is_absolute() or part_path.drive: + return None + if any(segment in {"", ".", ".."} for segment in part_path.parts): + return None + normalized_parts.extend(part_path.parts) + if not normalized_parts: + return None + base_path = Path(settings.BROWSER_SESSION_BASE_PATH).resolve() + candidate = base_path.joinpath(*normalized_parts).resolve() + try: + candidate.relative_to(base_path) + except ValueError: + return None + return candidate + async def save_legacy_file( self, *, organization_id: str, filename: str, fileObj: BinaryIO ) -> tuple[str, str] | None: diff --git a/skyvern/forge/sdk/db/client.py b/skyvern/forge/sdk/db/client.py index e734eaf0..21311815 100644 --- a/skyvern/forge/sdk/db/client.py +++ b/skyvern/forge/sdk/db/client.py @@ -2239,6 +2239,7 @@ class AgentDB: workflow_id: str, organization_id: str, browser_session_id: str | None = None, + browser_profile_id: str | None = None, proxy_location: ProxyLocation | None = None, webhook_callback_url: str | None = None, totp_verification_url: str | None = None, @@ -2260,6 +2261,7 @@ class AgentDB: workflow_id=workflow_id, organization_id=organization_id, browser_session_id=browser_session_id, + browser_profile_id=browser_profile_id, proxy_location=proxy_location, status="created", webhook_callback_url=webhook_callback_url, diff --git a/skyvern/forge/sdk/db/models.py b/skyvern/forge/sdk/db/models.py index 3527c918..f5564ae8 100644 --- a/skyvern/forge/sdk/db/models.py +++ b/skyvern/forge/sdk/db/models.py @@ -304,6 +304,7 @@ class WorkflowRunModel(Base): parent_workflow_run_id = Column(String, nullable=True, index=True) organization_id = Column(String, nullable=False, index=True) browser_session_id = Column(String, nullable=True, index=True) + browser_profile_id = Column(String, nullable=True, index=True) status = Column(String, nullable=False) failure_reason = Column(String) proxy_location = Column(String) diff --git a/skyvern/forge/sdk/db/utils.py b/skyvern/forge/sdk/db/utils.py index 955ae58e..2ab8ba21 100644 --- a/skyvern/forge/sdk/db/utils.py +++ b/skyvern/forge/sdk/db/utils.py @@ -309,6 +309,7 @@ def convert_to_workflow_run( workflow_id=workflow_run_model.workflow_id, organization_id=workflow_run_model.organization_id, browser_session_id=workflow_run_model.browser_session_id, + browser_profile_id=workflow_run_model.browser_profile_id, status=WorkflowRunStatus[workflow_run_model.status], failure_reason=workflow_run_model.failure_reason, proxy_location=( diff --git a/skyvern/forge/sdk/routes/__init__.py b/skyvern/forge/sdk/routes/__init__.py index 662f0e61..d5199878 100644 --- a/skyvern/forge/sdk/routes/__init__.py +++ b/skyvern/forge/sdk/routes/__init__.py @@ -1,4 +1,5 @@ from skyvern.forge.sdk.routes import agent_protocol # noqa: F401 +from skyvern.forge.sdk.routes import browser_profiles # noqa: F401 from skyvern.forge.sdk.routes import browser_sessions # noqa: F401 from skyvern.forge.sdk.routes import credentials # noqa: F401 from skyvern.forge.sdk.routes import debug_sessions # noqa: F401 diff --git a/skyvern/forge/sdk/routes/agent_protocol.py b/skyvern/forge/sdk/routes/agent_protocol.py index 380007e8..6c230c10 100644 --- a/skyvern/forge/sdk/routes/agent_protocol.py +++ b/skyvern/forge/sdk/routes/agent_protocol.py @@ -348,6 +348,7 @@ async def run_workflow( totp_identifier=workflow_run_request.totp_identifier, totp_verification_url=workflow_run_request.totp_url, browser_session_id=workflow_run_request.browser_session_id, + browser_profile_id=workflow_run_request.browser_profile_id, max_screenshot_scrolls=workflow_run_request.max_screenshot_scrolls, extra_http_headers=workflow_run_request.extra_http_headers, browser_address=workflow_run_request.browser_address, diff --git a/skyvern/forge/sdk/routes/browser_profiles.py b/skyvern/forge/sdk/routes/browser_profiles.py new file mode 100644 index 00000000..65aec516 --- /dev/null +++ b/skyvern/forge/sdk/routes/browser_profiles.py @@ -0,0 +1,387 @@ +import asyncio +from typing import NoReturn + +import structlog +from fastapi import Depends, HTTPException, Path, Query, status +from sqlalchemy.exc import IntegrityError + +from skyvern.exceptions import ( + BrowserProfileNotFound, + BrowserSessionNotFound, + WorkflowNotFound, + WorkflowRunNotFound, +) +from skyvern.forge import app +from skyvern.forge.sdk.routes.routers import base_router +from skyvern.forge.sdk.schemas.browser_profiles import ( + BrowserProfile, + CreateBrowserProfileRequest, +) +from skyvern.forge.sdk.schemas.organizations import Organization +from skyvern.forge.sdk.services import org_auth_service + +LOG = structlog.get_logger() + + +def _handle_duplicate_profile_name(*, organization_id: str, name: str, exc: IntegrityError) -> NoReturn: + LOG.warning( + "Duplicate browser profile name", + organization_id=organization_id, + name=name, + exc_info=True, + ) + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail=f"A browser profile named '{name}' already exists. Use a different name or delete the existing profile.", + ) from exc + + +@base_router.post( + "/browser_profiles", + response_model=BrowserProfile, + tags=["Browser Profiles"], + summary="Create a browser profile", +) +@base_router.post( + "/browser_profiles/", + response_model=BrowserProfile, + include_in_schema=False, +) +async def create_browser_profile( + request: CreateBrowserProfileRequest, + current_org: Organization = Depends(org_auth_service.get_current_org), +) -> BrowserProfile: + organization_id = current_org.organization_id + LOG.info( + "Creating browser profile", + organization_id=organization_id, + browser_session_id=request.browser_session_id, + workflow_run_id=request.workflow_run_id, + ) + + if request.browser_session_id: + browser_session_id = request.browser_session_id + return await _create_profile_from_session( + organization_id=organization_id, + name=request.name, + description=request.description, + browser_session_id=browser_session_id, + ) + + workflow_run_id = request.workflow_run_id + assert workflow_run_id is not None # model validator guarantees one of the sources + return await _create_profile_from_workflow_run( + organization_id=organization_id, + name=request.name, + description=request.description, + workflow_run_id=workflow_run_id, + ) + + +@base_router.get( + "/browser_profiles", + response_model=list[BrowserProfile], + tags=["Browser Profiles"], + summary="List browser profiles", + description="Get all browser profiles for the organization", +) +@base_router.get( + "/browser_profiles/", + response_model=list[BrowserProfile], + include_in_schema=False, +) +async def list_browser_profiles( + include_deleted: bool = Query(default=False, description="Include deleted browser profiles"), + current_org: Organization = Depends(org_auth_service.get_current_org), +) -> list[BrowserProfile]: + """List all browser profiles for the current organization.""" + organization_id = current_org.organization_id + LOG.info( + "Listing browser profiles", + organization_id=organization_id, + include_deleted=include_deleted, + ) + + profiles = await app.DATABASE.list_browser_profiles( + organization_id=organization_id, + include_deleted=include_deleted, + ) + + LOG.info( + "Listed browser profiles", + organization_id=organization_id, + count=len(profiles), + ) + return profiles + + +@base_router.get( + "/browser_profiles/{profile_id}", + response_model=BrowserProfile, + tags=["Browser Profiles"], + summary="Get browser profile", + description="Get a specific browser profile by ID", + responses={ + 200: {"description": "Successfully retrieved browser profile"}, + 404: {"description": "Browser profile not found"}, + }, +) +@base_router.get( + "/browser_profiles/{profile_id}/", + response_model=BrowserProfile, + include_in_schema=False, +) +async def get_browser_profile( + profile_id: str = Path( + ..., + description="The ID of the browser profile. browser_profile_id starts with `bp_`", + examples=["bp_123456"], + ), + current_org: Organization = Depends(org_auth_service.get_current_org), +) -> BrowserProfile: + """Get a browser profile for the current organization.""" + organization_id = current_org.organization_id + LOG.info( + "Getting browser profile", + organization_id=organization_id, + browser_profile_id=profile_id, + ) + + profile = await app.DATABASE.get_browser_profile( + profile_id=profile_id, + organization_id=organization_id, + ) + + if not profile: + LOG.warning( + "Browser profile not found", + organization_id=organization_id, + browser_profile_id=profile_id, + ) + raise BrowserProfileNotFound(profile_id=profile_id, organization_id=organization_id) + + LOG.info( + "Retrieved browser profile", + organization_id=organization_id, + browser_profile_id=profile_id, + ) + return profile + + +@base_router.delete( + "/browser_profiles/{profile_id}", + status_code=status.HTTP_204_NO_CONTENT, + tags=["Browser Profiles"], + summary="Delete browser profile", + description="Delete a browser profile (soft delete)", + responses={ + 204: {"description": "Successfully deleted browser profile"}, + 404: {"description": "Browser profile not found"}, + }, +) +@base_router.delete( + "/browser_profiles/{profile_id}/", + status_code=status.HTTP_204_NO_CONTENT, + include_in_schema=False, +) +async def delete_browser_profile( + profile_id: str = Path( + ..., + description="The ID of the browser profile to delete. browser_profile_id starts with `bp_`", + examples=["bp_123456"], + ), + current_org: Organization = Depends(org_auth_service.get_current_org), +) -> None: + """Delete a browser profile for the current organization.""" + organization_id = current_org.organization_id + LOG.info( + "Deleting browser profile", + organization_id=organization_id, + browser_profile_id=profile_id, + ) + + try: + await app.DATABASE.delete_browser_profile( + profile_id=profile_id, + organization_id=organization_id, + ) + except BrowserProfileNotFound: + LOG.warning( + "Browser profile not found for deletion", + organization_id=organization_id, + browser_profile_id=profile_id, + ) + raise + + LOG.info( + "Deleted browser profile", + organization_id=organization_id, + browser_profile_id=profile_id, + ) + + +async def _create_profile_from_session( + *, + organization_id: str, + name: str, + description: str | None, + browser_session_id: str, +) -> BrowserProfile: + browser_state = await app.PERSISTENT_SESSIONS_MANAGER.get_browser_state(browser_session_id, organization_id) + if browser_state is None: + LOG.warning( + "Browser session not found for profile creation", + organization_id=organization_id, + browser_session_id=browser_session_id, + ) + raise BrowserSessionNotFound(browser_session_id) + + session_dir = browser_state.browser_artifacts.browser_session_dir + if not session_dir: + LOG.warning( + "Browser session has no persisted data", + organization_id=organization_id, + browser_session_id=browser_session_id, + ) + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Browser session does not have persisted data to store", + ) + + try: + profile = await app.DATABASE.create_browser_profile( + organization_id=organization_id, + name=name, + description=description, + ) + except IntegrityError as exc: + _handle_duplicate_profile_name(organization_id=organization_id, name=name, exc=exc) + + try: + await app.STORAGE.store_browser_profile( + organization_id=organization_id, + profile_id=profile.browser_profile_id, + directory=session_dir, + ) + except Exception: + # Rollback: delete the profile if storage fails + await app.DATABASE.delete_browser_profile(profile.browser_profile_id, organization_id=organization_id) + LOG.error( + "Failed to store browser profile artifacts, rolled back profile creation", + organization_id=organization_id, + browser_profile_id=profile.browser_profile_id, + exc_info=True, + ) + raise + + LOG.info( + "Created browser profile from session", + organization_id=organization_id, + browser_profile_id=profile.browser_profile_id, + browser_session_id=browser_session_id, + ) + return profile + + +async def _create_profile_from_workflow_run( + *, + organization_id: str, + name: str, + description: str | None, + workflow_run_id: str, +) -> BrowserProfile: + workflow_run = await app.DATABASE.get_workflow_run(workflow_run_id, organization_id=organization_id) + if not workflow_run: + LOG.warning( + "Workflow run not found for profile creation", + organization_id=organization_id, + workflow_run_id=workflow_run_id, + ) + raise WorkflowRunNotFound(workflow_run_id) + + workflow = await app.DATABASE.get_workflow( + workflow_id=workflow_run.workflow_id, + organization_id=organization_id, + ) + if not workflow: + LOG.warning( + "Workflow not found for profile creation", + organization_id=organization_id, + workflow_id=workflow_run.workflow_id, + workflow_permanent_id=workflow_run.workflow_permanent_id, + ) + raise WorkflowNotFound(workflow_id=workflow_run.workflow_id) + + if not getattr(workflow, "persist_browser_session", False): + LOG.warning( + "Workflow does not persist browser sessions", + organization_id=organization_id, + workflow_run_id=workflow_run_id, + workflow_permanent_id=workflow.workflow_permanent_id, + ) + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Workflow does not persist browser sessions", + ) + + # The session persistence task runs asynchronously after workflow completion. + # Poll for a short grace period so that immediate profile-creation requests + # succeed without forcing clients to implement retry loops. + poll_attempts = 30 # ~30 s max wait + session_dir: str | None = None + for attempt in range(poll_attempts): + session_dir = await app.STORAGE.retrieve_browser_session( + organization_id=organization_id, + workflow_permanent_id=workflow.workflow_permanent_id, + ) + if session_dir: + break # session found + # Avoid busy-waiting; sleep 1 s between attempts (non-blocking asyncio sleep) + if attempt < poll_attempts - 1: + await asyncio.sleep(1) + + if not session_dir: + LOG.warning( + "Workflow run has no persisted session after waiting", + organization_id=organization_id, + workflow_run_id=workflow_run_id, + ) + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Workflow run does not have a persisted session", + ) + + try: + profile = await app.DATABASE.create_browser_profile( + organization_id=organization_id, + name=name, + description=description, + ) + except IntegrityError as exc: + _handle_duplicate_profile_name(organization_id=organization_id, name=name, exc=exc) + + try: + await app.STORAGE.store_browser_profile( + organization_id=organization_id, + profile_id=profile.browser_profile_id, + directory=session_dir, + ) + LOG.info( + "Created browser profile from workflow run", + organization_id=organization_id, + browser_profile_id=profile.browser_profile_id, + workflow_run_id=workflow_run_id, + ) + except Exception: + # Rollback: delete the profile if storage fails + await app.DATABASE.delete_browser_profile(profile.browser_profile_id, organization_id=organization_id) + LOG.error( + "Failed to store browser profile artifacts, rolled back profile creation", + organization_id=organization_id, + browser_profile_id=profile.browser_profile_id, + workflow_run_id=workflow_run_id, + exc_info=True, + ) + raise + + return profile diff --git a/skyvern/forge/sdk/routes/run_blocks.py b/skyvern/forge/sdk/routes/run_blocks.py index 3299d8a5..a1ba180a 100644 --- a/skyvern/forge/sdk/routes/run_blocks.py +++ b/skyvern/forge/sdk/routes/run_blocks.py @@ -201,6 +201,7 @@ async def login( totp_identifier=login_request.totp_identifier, totp_verification_url=totp_verification_url, browser_session_id=login_request.browser_session_id, + browser_profile_id=login_request.browser_profile_id, browser_address=login_request.browser_address, max_screenshot_scrolls=login_request.max_screenshot_scrolling_times, extra_http_headers=login_request.extra_http_headers, @@ -236,8 +237,10 @@ async def login( totp_url=totp_verification_url, totp_identifier=login_request.totp_identifier, browser_session_id=login_request.browser_session_id, + browser_profile_id=login_request.browser_profile_id, max_screenshot_scrolls=login_request.max_screenshot_scrolling_times, ), app_url=f"{settings.SKYVERN_APP_URL.rstrip('/')}/runs/{workflow_run.workflow_run_id}", browser_session_id=login_request.browser_session_id, + browser_profile_id=login_request.browser_profile_id, ) diff --git a/skyvern/forge/sdk/schemas/browser_profiles.py b/skyvern/forge/sdk/schemas/browser_profiles.py index 60f5d3da..9836abca 100644 --- a/skyvern/forge/sdk/schemas/browser_profiles.py +++ b/skyvern/forge/sdk/schemas/browser_profiles.py @@ -1,6 +1,6 @@ from datetime import datetime -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, Field, model_validator class BrowserProfile(BaseModel): @@ -13,3 +13,20 @@ class BrowserProfile(BaseModel): created_at: datetime modified_at: datetime deleted_at: datetime | None = None + + +class CreateBrowserProfileRequest(BaseModel): + name: str = Field(..., description="Name for the browser profile") + description: str | None = Field(None, description="Optional profile description") + browser_session_id: str | None = Field( + default=None, description="Persistent browser session to convert into a profile" + ) + workflow_run_id: str | None = Field( + default=None, description="Workflow run whose persisted session should be captured" + ) + + @model_validator(mode="after") + def _validate_source(self) -> "CreateBrowserProfileRequest": + if bool(self.browser_session_id) == bool(self.workflow_run_id): + raise ValueError("Provide either browser_session_id or workflow_run_id") + return self diff --git a/skyvern/forge/sdk/workflow/models/block.py b/skyvern/forge/sdk/workflow/models/block.py index fadcb789..c04d8637 100644 --- a/skyvern/forge/sdk/workflow/models/block.py +++ b/skyvern/forge/sdk/workflow/models/block.py @@ -645,7 +645,10 @@ class BaseTaskBlock(Block): # the first task block will create the browser state and do the navigation try: browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run( - workflow_run=workflow_run, url=self.url, browser_session_id=browser_session_id + workflow_run=workflow_run, + url=self.url, + browser_session_id=browser_session_id, + browser_profile_id=workflow_run.browser_profile_id, ) working_page = await browser_state.get_working_page() if not working_page: @@ -1586,6 +1589,7 @@ async def wrapper(): workflow_run=workflow_run, url=None, # Code block doesn't need to navigate to a URL initially browser_session_id=browser_session_id, + browser_profile_id=workflow_run.browser_profile_id, ) # Ensure the browser state has a working page await browser_state.check_and_fix_state( @@ -1595,6 +1599,7 @@ async def wrapper(): organization_id=workflow_run.organization_id, extra_http_headers=workflow_run.extra_http_headers, browser_address=workflow_run.browser_address, + browser_profile_id=workflow_run.browser_profile_id, ) except Exception as e: LOG.exception( diff --git a/skyvern/forge/sdk/workflow/models/workflow.py b/skyvern/forge/sdk/workflow/models/workflow.py index b37b417e..4b2ed916 100644 --- a/skyvern/forge/sdk/workflow/models/workflow.py +++ b/skyvern/forge/sdk/workflow/models/workflow.py @@ -2,7 +2,7 @@ from datetime import datetime from enum import StrEnum from typing import Any, List -from pydantic import BaseModel, field_validator +from pydantic import BaseModel, field_validator, model_validator from typing_extensions import deprecated from skyvern.forge.sdk.schemas.files import FileInfo @@ -23,6 +23,7 @@ class WorkflowRequestBody(BaseModel): totp_verification_url: str | None = None totp_identifier: str | None = None browser_session_id: str | None = None + browser_profile_id: str | None = None max_screenshot_scrolls: int | None = None extra_http_headers: dict[str, str] | None = None browser_address: str | None = None @@ -36,6 +37,12 @@ class WorkflowRequestBody(BaseModel): return None return validate_url(url) + @model_validator(mode="after") + def validate_browser_reference(cls, values: "WorkflowRequestBody") -> "WorkflowRequestBody": + if values.browser_session_id and values.browser_profile_id: + raise ValueError("Cannot specify both browser_session_id and browser_profile_id") + return values + @deprecated("Use WorkflowRunResponse instead") class RunWorkflowResponse(BaseModel): @@ -130,6 +137,7 @@ class WorkflowRun(BaseModel): workflow_permanent_id: str organization_id: str browser_session_id: str | None = None + browser_profile_id: str | None = None debug_session_id: str | None = None status: WorkflowRunStatus extra_http_headers: dict[str, str] | None = None @@ -199,6 +207,7 @@ class WorkflowRunResponseBase(BaseModel): task_v2: TaskV2 | None = None workflow_title: str | None = None browser_session_id: str | None = None + browser_profile_id: str | None = None max_screenshot_scrolls: int | None = None browser_address: str | None = None script_run: ScriptRunResponse | None = None diff --git a/skyvern/forge/sdk/workflow/service.py b/skyvern/forge/sdk/workflow/service.py index 7d6d595c..eb4da84e 100644 --- a/skyvern/forge/sdk/workflow/service.py +++ b/skyvern/forge/sdk/workflow/service.py @@ -20,6 +20,7 @@ from skyvern.config import settings from skyvern.constants import GET_DOWNLOADED_FILES_TIMEOUT, SAVE_DOWNLOADED_FILES_TIMEOUT from skyvern.exceptions import ( BlockNotFound, + BrowserProfileNotFound, BrowserSessionNotFound, CannotUpdateWorkflowDueToCodeCache, FailedToSendWebhook, @@ -540,6 +541,7 @@ class WorkflowService: ) workflow_run = await self.get_workflow_run(workflow_run_id=workflow_run_id, organization_id=organization_id) workflow = await self.get_workflow_by_permanent_id(workflow_permanent_id=workflow_run.workflow_permanent_id) + browser_profile_id = workflow_run.browser_profile_id close_browser_on_completion = browser_session_id is None and not workflow_run.browser_address # Set workflow run status to running, create workflow run parameters @@ -608,12 +610,14 @@ class WorkflowService: ) return workflow_run - browser_session = await self.auto_create_browser_session_if_needed( - organization.organization_id, - workflow, - browser_session_id=browser_session_id, - proxy_location=workflow_run.proxy_location, - ) + browser_session = None + if not browser_profile_id: + browser_session = await self.auto_create_browser_session_if_needed( + organization.organization_id, + workflow, + browser_session_id=browser_session_id, + proxy_location=workflow_run.proxy_location, + ) if browser_session: browser_session_id = browser_session.persistent_browser_session_id @@ -640,6 +644,7 @@ class WorkflowService: workflow_run=workflow_run, organization=organization, browser_session_id=browser_session_id, + browser_profile_id=browser_profile_id, block_labels=block_labels, block_outputs=block_outputs, workflow_script=workflow_script, @@ -688,6 +693,7 @@ class WorkflowService: workflow_run: WorkflowRun, organization: Organization, browser_session_id: str | None = None, + browser_profile_id: str | None = None, block_labels: list[str] | None = None, block_outputs: dict[str, Any] | None = None, workflow_script: WorkflowScript | None = None, @@ -1669,7 +1675,7 @@ class WorkflowService: debug_session_id: str | None = None, code_gen: bool | None = None, ) -> WorkflowRun: - # validate the browser session id + # validate the browser session or profile id if workflow_request.browser_session_id: browser_session = await app.DATABASE.get_persistent_browser_session( session_id=workflow_request.browser_session_id, @@ -1678,11 +1684,23 @@ class WorkflowService: if not browser_session: raise BrowserSessionNotFound(browser_session_id=workflow_request.browser_session_id) + if workflow_request.browser_profile_id: + browser_profile = await app.DATABASE.get_browser_profile( + workflow_request.browser_profile_id, + organization_id=organization_id, + ) + if not browser_profile: + raise BrowserProfileNotFound( + profile_id=workflow_request.browser_profile_id, + organization_id=organization_id, + ) + return await app.DATABASE.create_workflow_run( workflow_permanent_id=workflow_permanent_id, workflow_id=workflow_id, organization_id=organization_id, browser_session_id=workflow_request.browser_session_id, + browser_profile_id=workflow_request.browser_profile_id, proxy_location=workflow_request.proxy_location, webhook_callback_url=workflow_request.webhook_callback_url, totp_verification_url=workflow_request.totp_verification_url, @@ -2224,6 +2242,7 @@ class WorkflowService: total_cost=total_cost, workflow_title=workflow.title, browser_session_id=workflow_run.browser_session_id, + browser_profile_id=workflow_run.browser_profile_id, max_screenshot_scrolls=workflow_run.max_screenshot_scrolls, task_v2=task_v2, browser_address=workflow_run.browser_address, diff --git a/skyvern/schemas/runs.py b/skyvern/schemas/runs.py index 9aa41bc3..08e6d586 100644 --- a/skyvern/schemas/runs.py +++ b/skyvern/schemas/runs.py @@ -5,7 +5,7 @@ from enum import StrEnum from typing import Annotated, Any, Literal, Union from zoneinfo import ZoneInfo -from pydantic import BaseModel, Field, field_validator +from pydantic import BaseModel, Field, field_validator, model_validator from skyvern.forge.sdk.schemas.files import FileInfo from skyvern.schemas.docs.doc_examples import ( @@ -387,6 +387,10 @@ class WorkflowRunRequest(BaseModel): default=None, description="ID of a Skyvern browser session to reuse, having it continue from the current screen state", ) + browser_profile_id: str | None = Field( + default=None, + description="ID of a browser profile to reuse for this workflow run", + ) max_screenshot_scrolls: int | None = Field( default=None, description="The maximum number of scrolls for the post action screenshot. When it's None or 0, it takes the current viewpoint screenshot.", @@ -416,6 +420,12 @@ class WorkflowRunRequest(BaseModel): return None return validate_url(url) + @model_validator(mode="after") + def validate_browser_reference(cls, values: WorkflowRunRequest) -> WorkflowRunRequest: + if values.browser_session_id and values.browser_profile_id: + raise ValueError("Cannot specify both browser_session_id and browser_profile_id") + return values + class BlockRunRequest(WorkflowRunRequest): block_labels: list[str] = Field( @@ -477,6 +487,11 @@ class BaseRunResponse(BaseModel): browser_session_id: str | None = Field( default=None, description="ID of the Skyvern persistent browser session used for this run", examples=["pbs_123"] ) + browser_profile_id: str | None = Field( + default=None, + description="ID of the browser profile used for this run", + examples=["bp_123"], + ) max_screenshot_scrolls: int | None = Field( default=None, description="The maximum number of scrolls for the post action screenshot. When it's None or 0, it takes the current viewpoint screenshot", diff --git a/skyvern/services/block_service.py b/skyvern/services/block_service.py index 6ee78f9d..4c18e103 100644 --- a/skyvern/services/block_service.py +++ b/skyvern/services/block_service.py @@ -32,6 +32,7 @@ async def ensure_workflow_run( totp_identifier=block_run_request.totp_identifier, totp_verification_url=block_run_request.totp_url, browser_session_id=block_run_request.browser_session_id, + browser_profile_id=block_run_request.browser_profile_id, max_screenshot_scrolls=block_run_request.max_screenshot_scrolls, extra_http_headers=block_run_request.extra_http_headers, ) diff --git a/skyvern/services/task_v2_service.py b/skyvern/services/task_v2_service.py index edc7b0a1..60485c1b 100644 --- a/skyvern/services/task_v2_service.py +++ b/skyvern/services/task_v2_service.py @@ -553,7 +553,9 @@ async def run_task_v2_helper( current_url: str | None = None browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run( - workflow_run=workflow_run, browser_session_id=browser_session_id + workflow_run=workflow_run, + browser_session_id=browser_session_id, + browser_profile_id=workflow_run.browser_profile_id, ) page = await browser_state.get_working_page() @@ -609,7 +611,9 @@ async def run_task_v2_helper( # Always ensure browser_state is available at the start of the loop fallback_url = settings.TASK_BLOCKED_SITE_FALLBACK_URL browser_state = await app.BROWSER_MANAGER.get_or_create_for_workflow_run( - workflow_run=workflow_run, browser_session_id=browser_session_id + workflow_run=workflow_run, + browser_session_id=browser_session_id, + browser_profile_id=workflow_run.browser_profile_id, ) fallback_occurred = False @@ -623,6 +627,7 @@ async def run_task_v2_helper( script_id=task_v2.script_id, organization_id=organization_id, extra_http_headers=task_v2.extra_http_headers, + browser_profile_id=workflow_run.browser_profile_id, ) else: await browser_state.navigate_to_url(page, url) @@ -895,6 +900,7 @@ async def run_task_v2_helper( workflow_run=workflow_run, url=url, browser_session_id=browser_session_id, + browser_profile_id=workflow_run.browser_profile_id, ) scraped_page = await scrape_website( browser_state, diff --git a/skyvern/services/workflow_service.py b/skyvern/services/workflow_service.py index 9eab4ea6..a56a146b 100644 --- a/skyvern/services/workflow_service.py +++ b/skyvern/services/workflow_service.py @@ -130,6 +130,7 @@ async def get_workflow_run_response( app_url=app_url, created_at=workflow_run.created_at, modified_at=workflow_run.modified_at, + browser_profile_id=workflow_run.browser_profile_id, run_request=WorkflowRunRequest( workflow_id=workflow_run.workflow_permanent_id, title=workflow_run_resp.workflow_title, @@ -140,6 +141,7 @@ async def get_workflow_run_response( totp_identifier=workflow_run.totp_identifier, max_screenshot_scrolls=workflow_run.max_screenshot_scrolls, browser_address=workflow_run.browser_address, + browser_profile_id=workflow_run.browser_profile_id, # TODO: add browser session id ), errors=workflow_run_resp.errors, diff --git a/skyvern/webeye/browser_factory.py b/skyvern/webeye/browser_factory.py index a102000e..dfbd57ad 100644 --- a/skyvern/webeye/browser_factory.py +++ b/skyvern/webeye/browser_factory.py @@ -664,6 +664,7 @@ class BrowserState: organization_id: str | None = None, extra_http_headers: dict[str, str] | None = None, browser_address: str | None = None, + browser_profile_id: str | None = None, ) -> None: if self.browser_context is None: LOG.info("creating browser context") @@ -681,6 +682,7 @@ class BrowserState: organization_id=organization_id, extra_http_headers=extra_http_headers, browser_address=browser_address, + browser_profile_id=browser_profile_id, ) self.browser_context = browser_context self.browser_artifacts = browser_artifacts @@ -844,6 +846,7 @@ class BrowserState: organization_id: str | None = None, extra_http_headers: dict[str, str] | None = None, browser_address: str | None = None, + browser_profile_id: str | None = None, ) -> Page: page = await self.get_working_page() if page is not None: @@ -859,6 +862,7 @@ class BrowserState: organization_id=organization_id, extra_http_headers=extra_http_headers, browser_address=browser_address, + browser_profile_id=browser_profile_id, ) except Exception as e: error_message = str(e) @@ -876,6 +880,7 @@ class BrowserState: organization_id=organization_id, extra_http_headers=extra_http_headers, browser_address=browser_address, + browser_profile_id=browser_profile_id, ) page = await self.__assert_page() @@ -892,6 +897,7 @@ class BrowserState: organization_id=organization_id, extra_http_headers=extra_http_headers, browser_address=browser_address, + browser_profile_id=browser_profile_id, ) page = await self.__assert_page() return page diff --git a/skyvern/webeye/browser_manager.py b/skyvern/webeye/browser_manager.py index bae08c5b..8f280d96 100644 --- a/skyvern/webeye/browser_manager.py +++ b/skyvern/webeye/browser_manager.py @@ -34,6 +34,7 @@ class BrowserManager: organization_id: str | None = None, extra_http_headers: dict[str, str] | None = None, browser_address: str | None = None, + browser_profile_id: str | None = None, ) -> BrowserState: pw = await async_playwright().start() ( @@ -50,6 +51,7 @@ class BrowserManager: organization_id=organization_id, extra_http_headers=extra_http_headers, browser_address=browser_address, + browser_profile_id=browser_profile_id, ) return BrowserState( pw=pw, @@ -145,9 +147,12 @@ class BrowserManager: workflow_run: WorkflowRun, url: str | None = None, browser_session_id: str | None = None, + browser_profile_id: str | None = None, ) -> BrowserState: parent_workflow_run_id = workflow_run.parent_workflow_run_id workflow_run_id = workflow_run.workflow_run_id + if browser_profile_id is None: + browser_profile_id = workflow_run.browser_profile_id browser_state = self.get_for_workflow_run( workflow_run_id=workflow_run_id, parent_workflow_run_id=parent_workflow_run_id ) @@ -192,6 +197,7 @@ class BrowserManager: organization_id=workflow_run.organization_id, extra_http_headers=workflow_run.extra_http_headers, browser_address=workflow_run.browser_address, + browser_profile_id=browser_profile_id, ) if browser_session_id: @@ -213,6 +219,7 @@ class BrowserManager: organization_id=workflow_run.organization_id, extra_http_headers=workflow_run.extra_http_headers, browser_address=workflow_run.browser_address, + browser_profile_id=browser_profile_id, ) return browser_state