From c059f1f1c572fc5a89027c3d5a6a6c2bcb70f172 Mon Sep 17 00:00:00 2001 From: Marc Kelechava Date: Tue, 4 Nov 2025 17:36:41 -0800 Subject: [PATCH] [SKY-6973] [1/3] Browser Profiles - database and s3 storage layer (#3899) --- ...7fbf463be9a7_add_browser_profiles_table.py | 46 ++++++++++ skyvern/exceptions.py | 8 ++ skyvern/forge/sdk/artifact/storage/base.py | 8 ++ skyvern/forge/sdk/artifact/storage/local.py | 29 +++++++ skyvern/forge/sdk/artifact/storage/s3.py | 37 ++++++++ skyvern/forge/sdk/db/client.py | 87 ++++++++++++++++++- skyvern/forge/sdk/db/id.py | 6 ++ skyvern/forge/sdk/db/models.py | 18 ++++ skyvern/forge/sdk/schemas/browser_profiles.py | 15 ++++ 9 files changed, 253 insertions(+), 1 deletion(-) create mode 100644 alembic/versions/2025_11_05_0126-7fbf463be9a7_add_browser_profiles_table.py create mode 100644 skyvern/forge/sdk/schemas/browser_profiles.py diff --git a/alembic/versions/2025_11_05_0126-7fbf463be9a7_add_browser_profiles_table.py b/alembic/versions/2025_11_05_0126-7fbf463be9a7_add_browser_profiles_table.py new file mode 100644 index 00000000..26c01622 --- /dev/null +++ b/alembic/versions/2025_11_05_0126-7fbf463be9a7_add_browser_profiles_table.py @@ -0,0 +1,46 @@ +"""add browser_profiles table + +Revision ID: 7fbf463be9a7 +Revises: 2903b6252f31 +Create Date: 2025-11-05 01:26:01.764838+00:00 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "7fbf463be9a7" +down_revision: Union[str, None] = "2903b6252f31" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "browser_profiles", + sa.Column("browser_profile_id", sa.String(), nullable=False), + sa.Column("organization_id", sa.String(), nullable=False), + sa.Column("name", sa.String(), nullable=False), + sa.Column("description", sa.String(), nullable=True), + sa.Column("created_at", sa.DateTime(), nullable=False), + sa.Column("modified_at", sa.DateTime(), nullable=False), + sa.Column("deleted_at", sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint("browser_profile_id"), + sa.UniqueConstraint("organization_id", "name", name="uc_org_browser_profile_name"), + ) + op.create_index("idx_browser_profiles_org", "browser_profiles", ["organization_id"], unique=False) + op.create_index("idx_browser_profiles_org_name", "browser_profiles", ["organization_id", "name"], unique=False) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index("idx_browser_profiles_org_name", table_name="browser_profiles") + op.drop_index("idx_browser_profiles_org", table_name="browser_profiles") + op.drop_table("browser_profiles") + # ### end Alembic commands ### diff --git a/skyvern/exceptions.py b/skyvern/exceptions.py index 40df493d..8addc366 100644 --- a/skyvern/exceptions.py +++ b/skyvern/exceptions.py @@ -799,6 +799,14 @@ class BrowserSessionNotFound(SkyvernHTTPException): ) +class BrowserProfileNotFound(SkyvernHTTPException): + def __init__(self, profile_id: str, organization_id: str | None = None) -> None: + message = f"Browser profile {profile_id} not found" + if organization_id: + message += f" for organization {organization_id}" + super().__init__(message, status_code=status.HTTP_404_NOT_FOUND) + + class CannotUpdateWorkflowDueToCodeCache(SkyvernException): def __init__(self, workflow_permanent_id: str) -> None: super().__init__(f"No confirmation for code cache deletion on {workflow_permanent_id}.") diff --git a/skyvern/forge/sdk/artifact/storage/base.py b/skyvern/forge/sdk/artifact/storage/base.py index efd33016..de44be46 100644 --- a/skyvern/forge/sdk/artifact/storage/base.py +++ b/skyvern/forge/sdk/artifact/storage/base.py @@ -123,6 +123,14 @@ class BaseStorage(ABC): async def retrieve_browser_session(self, organization_id: str, workflow_permanent_id: str) -> str | None: pass + @abstractmethod + async def store_browser_profile(self, organization_id: str, profile_id: str, directory: str) -> None: + """Store a browser profile from a directory.""" + + @abstractmethod + async def retrieve_browser_profile(self, organization_id: str, profile_id: str) -> str | None: + """Retrieve a browser profile to a temporary directory.""" + @abstractmethod async def list_downloaded_files_in_browser_session( self, organization_id: str, browser_session_id: str diff --git a/skyvern/forge/sdk/artifact/storage/local.py b/skyvern/forge/sdk/artifact/storage/local.py index bdb70ffb..b4637555 100644 --- a/skyvern/forge/sdk/artifact/storage/local.py +++ b/skyvern/forge/sdk/artifact/storage/local.py @@ -207,6 +207,35 @@ class LocalStorage(BaseStorage): return None return str(stored_folder_path) + async def store_browser_profile(self, organization_id: str, profile_id: str, directory: str) -> None: + """Store browser profile locally.""" + stored_folder_path = Path(settings.BROWSER_SESSION_BASE_PATH) / organization_id / "profiles" / profile_id + if directory == str(stored_folder_path): + return + self._create_directories_if_not_exists(stored_folder_path) + LOG.info( + "Storing browser profile locally", + organization_id=organization_id, + profile_id=profile_id, + directory=directory, + browser_profile_path=stored_folder_path, + ) + + for root, _, files in os.walk(directory): + for file in files: + source_file_path = Path(root) / file + relative_path = source_file_path.relative_to(directory) + target_file_path = stored_folder_path / relative_path + self._create_directories_if_not_exists(target_file_path) + shutil.copy2(source_file_path, target_file_path) + + async def retrieve_browser_profile(self, organization_id: str, profile_id: str) -> str | None: + """Retrieve browser profile from local storage.""" + stored_folder_path = Path(settings.BROWSER_SESSION_BASE_PATH) / organization_id / "profiles" / profile_id + if not stored_folder_path.exists(): + return None + return str(stored_folder_path) + async def save_downloaded_files(self, organization_id: str, run_id: str | None) -> None: pass diff --git a/skyvern/forge/sdk/artifact/storage/s3.py b/skyvern/forge/sdk/artifact/storage/s3.py index 4799c863..60b773ad 100644 --- a/skyvern/forge/sdk/artifact/storage/s3.py +++ b/skyvern/forge/sdk/artifact/storage/s3.py @@ -195,6 +195,43 @@ class S3Storage(BaseStorage): temp_zip_file.close() return temp_dir + async def store_browser_profile(self, organization_id: str, profile_id: str, directory: str) -> None: + """Store browser profile to S3.""" + temp_zip_file = create_named_temporary_file() + zip_file_path = shutil.make_archive(temp_zip_file.name, "zip", directory) + profile_uri = ( + f"s3://{settings.AWS_S3_BUCKET_BROWSER_SESSIONS}/{settings.ENV}/{organization_id}/profiles/{profile_id}.zip" + ) + sc = await self._get_storage_class_for_org(organization_id) + tags = await self._get_tags_for_org(organization_id) + LOG.debug( + "Storing browser profile", + organization_id=organization_id, + profile_id=profile_id, + zip_file_path=zip_file_path, + profile_uri=profile_uri, + storage_class=sc, + tags=tags, + ) + await self.async_client.upload_file_from_path(profile_uri, zip_file_path, storage_class=sc, tags=tags) + + async def retrieve_browser_profile(self, organization_id: str, profile_id: str) -> str | None: + """Retrieve browser profile from S3.""" + profile_uri = ( + f"s3://{settings.AWS_S3_BUCKET_BROWSER_SESSIONS}/{settings.ENV}/{organization_id}/profiles/{profile_id}.zip" + ) + downloaded_zip_bytes = await self.async_client.download_file(profile_uri, log_exception=True) + if not downloaded_zip_bytes: + return None + temp_zip_file = create_named_temporary_file(delete=False) + temp_zip_file.write(downloaded_zip_bytes) + temp_zip_file_path = temp_zip_file.name + + temp_dir = make_temp_directory(prefix="skyvern_browser_profile_") + unzip_files(temp_zip_file_path, temp_dir) + temp_zip_file.close() + return temp_dir + async def list_downloaded_files_in_browser_session( self, organization_id: str, browser_session_id: str ) -> list[str]: diff --git a/skyvern/forge/sdk/db/client.py b/skyvern/forge/sdk/db/client.py index fc6fac92..6f9fe814 100644 --- a/skyvern/forge/sdk/db/client.py +++ b/skyvern/forge/sdk/db/client.py @@ -9,7 +9,7 @@ from sqlalchemy.ext.asyncio import AsyncEngine, async_sessionmaker, create_async from skyvern.config import settings from skyvern.constants import DEFAULT_SCRIPT_RUN_ID -from skyvern.exceptions import WorkflowParameterNotFound, WorkflowRunNotFound +from skyvern.exceptions import BrowserProfileNotFound, WorkflowParameterNotFound, WorkflowRunNotFound from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType from skyvern.forge.sdk.db.enums import OrganizationAuthTokenType, TaskType from skyvern.forge.sdk.db.exceptions import NotFoundError @@ -23,6 +23,7 @@ from skyvern.forge.sdk.db.models import ( BitwardenLoginCredentialParameterModel, BitwardenSensitiveInformationParameterModel, BlockRunModel, + BrowserProfileModel, CredentialModel, CredentialParameterModel, DebugSessionModel, @@ -77,6 +78,7 @@ from skyvern.forge.sdk.encrypt.base import EncryptMethod from skyvern.forge.sdk.log_artifacts import save_workflow_run_logs from skyvern.forge.sdk.models import Step, StepStatus from skyvern.forge.sdk.schemas.ai_suggestions import AISuggestion +from skyvern.forge.sdk.schemas.browser_profiles import BrowserProfile from skyvern.forge.sdk.schemas.credentials import Credential, CredentialType, CredentialVaultType from skyvern.forge.sdk.schemas.debug_sessions import BlockRun, DebugSession, DebugSessionRun from skyvern.forge.sdk.schemas.organization_bitwarden_collections import OrganizationBitwardenCollection @@ -3500,6 +3502,89 @@ class AgentDB: for workflow_run_block in workflow_run_blocks ] + async def create_browser_profile( + self, + organization_id: str, + name: str, + description: str | None = None, + ) -> BrowserProfile: + try: + async with self.Session() as session: + browser_profile = BrowserProfileModel( + organization_id=organization_id, + name=name, + description=description, + ) + session.add(browser_profile) + await session.commit() + await session.refresh(browser_profile) + return BrowserProfile.model_validate(browser_profile) + except SQLAlchemyError: + LOG.error("SQLAlchemyError in create_browser_profile", exc_info=True) + raise + + async def get_browser_profile( + self, + profile_id: str, + organization_id: str, + include_deleted: bool = False, + ) -> BrowserProfile | None: + try: + async with self.Session() as session: + query = ( + select(BrowserProfileModel) + .filter_by(browser_profile_id=profile_id) + .filter_by(organization_id=organization_id) + ) + if not include_deleted: + query = query.filter(BrowserProfileModel.deleted_at.is_(None)) + + browser_profile = (await session.scalars(query)).first() + if not browser_profile: + return None + return BrowserProfile.model_validate(browser_profile) + except SQLAlchemyError: + LOG.error("SQLAlchemyError in get_browser_profile", exc_info=True) + raise + + async def list_browser_profiles( + self, + organization_id: str, + include_deleted: bool = False, + ) -> list[BrowserProfile]: + try: + async with self.Session() as session: + query = select(BrowserProfileModel).filter_by(organization_id=organization_id) + if not include_deleted: + query = query.filter(BrowserProfileModel.deleted_at.is_(None)) + browser_profiles = await session.scalars(query.order_by(asc(BrowserProfileModel.created_at))) + return [BrowserProfile.model_validate(profile) for profile in browser_profiles.all()] + except SQLAlchemyError: + LOG.error("SQLAlchemyError in list_browser_profiles", exc_info=True) + raise + + async def delete_browser_profile( + self, + profile_id: str, + organization_id: str, + ) -> None: + try: + async with self.Session() as session: + query = ( + select(BrowserProfileModel) + .filter_by(browser_profile_id=profile_id) + .filter_by(organization_id=organization_id) + .filter(BrowserProfileModel.deleted_at.is_(None)) + ) + browser_profile = (await session.scalars(query)).first() + if not browser_profile: + raise BrowserProfileNotFound(profile_id=profile_id, organization_id=organization_id) + browser_profile.deleted_at = datetime.utcnow() + await session.commit() + except SQLAlchemyError: + LOG.error("SQLAlchemyError in delete_browser_profile", exc_info=True) + raise + async def get_active_persistent_browser_sessions( self, organization_id: str, diff --git a/skyvern/forge/sdk/db/id.py b/skyvern/forge/sdk/db/id.py index 48a92f75..293d8fa4 100644 --- a/skyvern/forge/sdk/db/id.py +++ b/skyvern/forge/sdk/db/id.py @@ -39,6 +39,7 @@ CREDENTIAL_AZURE_VAULT_PARAMETER_PREFIX = "azcp" CREDENTIAL_PARAMETER_PREFIX = "cp" CREDENTIAL_PREFIX = "cred" DEBUG_SESSION_PREFIX = "ds" +BROWSER_PROFILE_PREFIX = "bp" ORGANIZATION_BITWARDEN_COLLECTION_PREFIX = "obc" TASK_V2_ID = "tsk_v2" THOUGHT_ID = "ot" @@ -194,6 +195,11 @@ def generate_persistent_browser_session_id() -> str: return f"{PERSISTENT_BROWSER_SESSION_ID}_{int_id}" +def generate_browser_profile_id() -> str: + int_id = generate_id() + return f"{BROWSER_PROFILE_PREFIX}_{int_id}" + + def generate_task_run_id() -> str: int_id = generate_id() return f"{TASK_RUN_PREFIX}_{int_id}" diff --git a/skyvern/forge/sdk/db/models.py b/skyvern/forge/sdk/db/models.py index cbf62a85..d4af78ad 100644 --- a/skyvern/forge/sdk/db/models.py +++ b/skyvern/forge/sdk/db/models.py @@ -28,6 +28,7 @@ from skyvern.forge.sdk.db.id import ( generate_bitwarden_credit_card_data_parameter_id, generate_bitwarden_login_credential_parameter_id, generate_bitwarden_sensitive_information_parameter_id, + generate_browser_profile_id, generate_credential_id, generate_credential_parameter_id, generate_debug_session_id, @@ -784,6 +785,23 @@ class PersistentBrowserSessionModel(Base): deleted_at = Column(DateTime, nullable=True) +class BrowserProfileModel(Base): + __tablename__ = "browser_profiles" + __table_args__ = ( + Index("idx_browser_profiles_org", "organization_id"), + Index("idx_browser_profiles_org_name", "organization_id", "name"), + UniqueConstraint("organization_id", "name", name="uc_org_browser_profile_name"), + ) + + browser_profile_id = Column(String, primary_key=True, default=generate_browser_profile_id) + organization_id = Column(String, nullable=False) + name = Column(String, nullable=False) + description = Column(String, nullable=True) + created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False) + modified_at = Column(DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow, nullable=False) + deleted_at = Column(DateTime, nullable=True) + + class TaskRunModel(Base): __tablename__ = "task_runs" __table_args__ = ( diff --git a/skyvern/forge/sdk/schemas/browser_profiles.py b/skyvern/forge/sdk/schemas/browser_profiles.py new file mode 100644 index 00000000..60f5d3da --- /dev/null +++ b/skyvern/forge/sdk/schemas/browser_profiles.py @@ -0,0 +1,15 @@ +from datetime import datetime + +from pydantic import BaseModel, ConfigDict + + +class BrowserProfile(BaseModel): + model_config = ConfigDict(from_attributes=True) + + browser_profile_id: str + organization_id: str + name: str + description: str | None = None + created_at: datetime + modified_at: datetime + deleted_at: datetime | None = None