[SKY-6973] [1/3] Browser Profiles - database and s3 storage layer (#3899)

This commit is contained in:
Marc Kelechava
2025-11-04 17:36:41 -08:00
committed by GitHub
parent 16f61af6cf
commit c059f1f1c5
9 changed files with 253 additions and 1 deletions

View File

@@ -0,0 +1,46 @@
"""add browser_profiles table
Revision ID: 7fbf463be9a7
Revises: 2903b6252f31
Create Date: 2025-11-05 01:26:01.764838+00:00
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "7fbf463be9a7"
down_revision: Union[str, None] = "2903b6252f31"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"browser_profiles",
sa.Column("browser_profile_id", sa.String(), nullable=False),
sa.Column("organization_id", sa.String(), nullable=False),
sa.Column("name", sa.String(), nullable=False),
sa.Column("description", sa.String(), nullable=True),
sa.Column("created_at", sa.DateTime(), nullable=False),
sa.Column("modified_at", sa.DateTime(), nullable=False),
sa.Column("deleted_at", sa.DateTime(), nullable=True),
sa.PrimaryKeyConstraint("browser_profile_id"),
sa.UniqueConstraint("organization_id", "name", name="uc_org_browser_profile_name"),
)
op.create_index("idx_browser_profiles_org", "browser_profiles", ["organization_id"], unique=False)
op.create_index("idx_browser_profiles_org_name", "browser_profiles", ["organization_id", "name"], unique=False)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index("idx_browser_profiles_org_name", table_name="browser_profiles")
op.drop_index("idx_browser_profiles_org", table_name="browser_profiles")
op.drop_table("browser_profiles")
# ### end Alembic commands ###

View File

@@ -799,6 +799,14 @@ class BrowserSessionNotFound(SkyvernHTTPException):
) )
class BrowserProfileNotFound(SkyvernHTTPException):
def __init__(self, profile_id: str, organization_id: str | None = None) -> None:
message = f"Browser profile {profile_id} not found"
if organization_id:
message += f" for organization {organization_id}"
super().__init__(message, status_code=status.HTTP_404_NOT_FOUND)
class CannotUpdateWorkflowDueToCodeCache(SkyvernException): class CannotUpdateWorkflowDueToCodeCache(SkyvernException):
def __init__(self, workflow_permanent_id: str) -> None: def __init__(self, workflow_permanent_id: str) -> None:
super().__init__(f"No confirmation for code cache deletion on {workflow_permanent_id}.") super().__init__(f"No confirmation for code cache deletion on {workflow_permanent_id}.")

View File

@@ -123,6 +123,14 @@ class BaseStorage(ABC):
async def retrieve_browser_session(self, organization_id: str, workflow_permanent_id: str) -> str | None: async def retrieve_browser_session(self, organization_id: str, workflow_permanent_id: str) -> str | None:
pass pass
@abstractmethod
async def store_browser_profile(self, organization_id: str, profile_id: str, directory: str) -> None:
"""Store a browser profile from a directory."""
@abstractmethod
async def retrieve_browser_profile(self, organization_id: str, profile_id: str) -> str | None:
"""Retrieve a browser profile to a temporary directory."""
@abstractmethod @abstractmethod
async def list_downloaded_files_in_browser_session( async def list_downloaded_files_in_browser_session(
self, organization_id: str, browser_session_id: str self, organization_id: str, browser_session_id: str

View File

@@ -207,6 +207,35 @@ class LocalStorage(BaseStorage):
return None return None
return str(stored_folder_path) return str(stored_folder_path)
async def store_browser_profile(self, organization_id: str, profile_id: str, directory: str) -> None:
"""Store browser profile locally."""
stored_folder_path = Path(settings.BROWSER_SESSION_BASE_PATH) / organization_id / "profiles" / profile_id
if directory == str(stored_folder_path):
return
self._create_directories_if_not_exists(stored_folder_path)
LOG.info(
"Storing browser profile locally",
organization_id=organization_id,
profile_id=profile_id,
directory=directory,
browser_profile_path=stored_folder_path,
)
for root, _, files in os.walk(directory):
for file in files:
source_file_path = Path(root) / file
relative_path = source_file_path.relative_to(directory)
target_file_path = stored_folder_path / relative_path
self._create_directories_if_not_exists(target_file_path)
shutil.copy2(source_file_path, target_file_path)
async def retrieve_browser_profile(self, organization_id: str, profile_id: str) -> str | None:
"""Retrieve browser profile from local storage."""
stored_folder_path = Path(settings.BROWSER_SESSION_BASE_PATH) / organization_id / "profiles" / profile_id
if not stored_folder_path.exists():
return None
return str(stored_folder_path)
async def save_downloaded_files(self, organization_id: str, run_id: str | None) -> None: async def save_downloaded_files(self, organization_id: str, run_id: str | None) -> None:
pass pass

View File

@@ -195,6 +195,43 @@ class S3Storage(BaseStorage):
temp_zip_file.close() temp_zip_file.close()
return temp_dir return temp_dir
async def store_browser_profile(self, organization_id: str, profile_id: str, directory: str) -> None:
"""Store browser profile to S3."""
temp_zip_file = create_named_temporary_file()
zip_file_path = shutil.make_archive(temp_zip_file.name, "zip", directory)
profile_uri = (
f"s3://{settings.AWS_S3_BUCKET_BROWSER_SESSIONS}/{settings.ENV}/{organization_id}/profiles/{profile_id}.zip"
)
sc = await self._get_storage_class_for_org(organization_id)
tags = await self._get_tags_for_org(organization_id)
LOG.debug(
"Storing browser profile",
organization_id=organization_id,
profile_id=profile_id,
zip_file_path=zip_file_path,
profile_uri=profile_uri,
storage_class=sc,
tags=tags,
)
await self.async_client.upload_file_from_path(profile_uri, zip_file_path, storage_class=sc, tags=tags)
async def retrieve_browser_profile(self, organization_id: str, profile_id: str) -> str | None:
"""Retrieve browser profile from S3."""
profile_uri = (
f"s3://{settings.AWS_S3_BUCKET_BROWSER_SESSIONS}/{settings.ENV}/{organization_id}/profiles/{profile_id}.zip"
)
downloaded_zip_bytes = await self.async_client.download_file(profile_uri, log_exception=True)
if not downloaded_zip_bytes:
return None
temp_zip_file = create_named_temporary_file(delete=False)
temp_zip_file.write(downloaded_zip_bytes)
temp_zip_file_path = temp_zip_file.name
temp_dir = make_temp_directory(prefix="skyvern_browser_profile_")
unzip_files(temp_zip_file_path, temp_dir)
temp_zip_file.close()
return temp_dir
async def list_downloaded_files_in_browser_session( async def list_downloaded_files_in_browser_session(
self, organization_id: str, browser_session_id: str self, organization_id: str, browser_session_id: str
) -> list[str]: ) -> list[str]:

View File

@@ -9,7 +9,7 @@ from sqlalchemy.ext.asyncio import AsyncEngine, async_sessionmaker, create_async
from skyvern.config import settings from skyvern.config import settings
from skyvern.constants import DEFAULT_SCRIPT_RUN_ID from skyvern.constants import DEFAULT_SCRIPT_RUN_ID
from skyvern.exceptions import WorkflowParameterNotFound, WorkflowRunNotFound from skyvern.exceptions import BrowserProfileNotFound, WorkflowParameterNotFound, WorkflowRunNotFound
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType
from skyvern.forge.sdk.db.enums import OrganizationAuthTokenType, TaskType from skyvern.forge.sdk.db.enums import OrganizationAuthTokenType, TaskType
from skyvern.forge.sdk.db.exceptions import NotFoundError from skyvern.forge.sdk.db.exceptions import NotFoundError
@@ -23,6 +23,7 @@ from skyvern.forge.sdk.db.models import (
BitwardenLoginCredentialParameterModel, BitwardenLoginCredentialParameterModel,
BitwardenSensitiveInformationParameterModel, BitwardenSensitiveInformationParameterModel,
BlockRunModel, BlockRunModel,
BrowserProfileModel,
CredentialModel, CredentialModel,
CredentialParameterModel, CredentialParameterModel,
DebugSessionModel, DebugSessionModel,
@@ -77,6 +78,7 @@ from skyvern.forge.sdk.encrypt.base import EncryptMethod
from skyvern.forge.sdk.log_artifacts import save_workflow_run_logs from skyvern.forge.sdk.log_artifacts import save_workflow_run_logs
from skyvern.forge.sdk.models import Step, StepStatus from skyvern.forge.sdk.models import Step, StepStatus
from skyvern.forge.sdk.schemas.ai_suggestions import AISuggestion from skyvern.forge.sdk.schemas.ai_suggestions import AISuggestion
from skyvern.forge.sdk.schemas.browser_profiles import BrowserProfile
from skyvern.forge.sdk.schemas.credentials import Credential, CredentialType, CredentialVaultType from skyvern.forge.sdk.schemas.credentials import Credential, CredentialType, CredentialVaultType
from skyvern.forge.sdk.schemas.debug_sessions import BlockRun, DebugSession, DebugSessionRun from skyvern.forge.sdk.schemas.debug_sessions import BlockRun, DebugSession, DebugSessionRun
from skyvern.forge.sdk.schemas.organization_bitwarden_collections import OrganizationBitwardenCollection from skyvern.forge.sdk.schemas.organization_bitwarden_collections import OrganizationBitwardenCollection
@@ -3500,6 +3502,89 @@ class AgentDB:
for workflow_run_block in workflow_run_blocks for workflow_run_block in workflow_run_blocks
] ]
async def create_browser_profile(
self,
organization_id: str,
name: str,
description: str | None = None,
) -> BrowserProfile:
try:
async with self.Session() as session:
browser_profile = BrowserProfileModel(
organization_id=organization_id,
name=name,
description=description,
)
session.add(browser_profile)
await session.commit()
await session.refresh(browser_profile)
return BrowserProfile.model_validate(browser_profile)
except SQLAlchemyError:
LOG.error("SQLAlchemyError in create_browser_profile", exc_info=True)
raise
async def get_browser_profile(
self,
profile_id: str,
organization_id: str,
include_deleted: bool = False,
) -> BrowserProfile | None:
try:
async with self.Session() as session:
query = (
select(BrowserProfileModel)
.filter_by(browser_profile_id=profile_id)
.filter_by(organization_id=organization_id)
)
if not include_deleted:
query = query.filter(BrowserProfileModel.deleted_at.is_(None))
browser_profile = (await session.scalars(query)).first()
if not browser_profile:
return None
return BrowserProfile.model_validate(browser_profile)
except SQLAlchemyError:
LOG.error("SQLAlchemyError in get_browser_profile", exc_info=True)
raise
async def list_browser_profiles(
self,
organization_id: str,
include_deleted: bool = False,
) -> list[BrowserProfile]:
try:
async with self.Session() as session:
query = select(BrowserProfileModel).filter_by(organization_id=organization_id)
if not include_deleted:
query = query.filter(BrowserProfileModel.deleted_at.is_(None))
browser_profiles = await session.scalars(query.order_by(asc(BrowserProfileModel.created_at)))
return [BrowserProfile.model_validate(profile) for profile in browser_profiles.all()]
except SQLAlchemyError:
LOG.error("SQLAlchemyError in list_browser_profiles", exc_info=True)
raise
async def delete_browser_profile(
self,
profile_id: str,
organization_id: str,
) -> None:
try:
async with self.Session() as session:
query = (
select(BrowserProfileModel)
.filter_by(browser_profile_id=profile_id)
.filter_by(organization_id=organization_id)
.filter(BrowserProfileModel.deleted_at.is_(None))
)
browser_profile = (await session.scalars(query)).first()
if not browser_profile:
raise BrowserProfileNotFound(profile_id=profile_id, organization_id=organization_id)
browser_profile.deleted_at = datetime.utcnow()
await session.commit()
except SQLAlchemyError:
LOG.error("SQLAlchemyError in delete_browser_profile", exc_info=True)
raise
async def get_active_persistent_browser_sessions( async def get_active_persistent_browser_sessions(
self, self,
organization_id: str, organization_id: str,

View File

@@ -39,6 +39,7 @@ CREDENTIAL_AZURE_VAULT_PARAMETER_PREFIX = "azcp"
CREDENTIAL_PARAMETER_PREFIX = "cp" CREDENTIAL_PARAMETER_PREFIX = "cp"
CREDENTIAL_PREFIX = "cred" CREDENTIAL_PREFIX = "cred"
DEBUG_SESSION_PREFIX = "ds" DEBUG_SESSION_PREFIX = "ds"
BROWSER_PROFILE_PREFIX = "bp"
ORGANIZATION_BITWARDEN_COLLECTION_PREFIX = "obc" ORGANIZATION_BITWARDEN_COLLECTION_PREFIX = "obc"
TASK_V2_ID = "tsk_v2" TASK_V2_ID = "tsk_v2"
THOUGHT_ID = "ot" THOUGHT_ID = "ot"
@@ -194,6 +195,11 @@ def generate_persistent_browser_session_id() -> str:
return f"{PERSISTENT_BROWSER_SESSION_ID}_{int_id}" return f"{PERSISTENT_BROWSER_SESSION_ID}_{int_id}"
def generate_browser_profile_id() -> str:
int_id = generate_id()
return f"{BROWSER_PROFILE_PREFIX}_{int_id}"
def generate_task_run_id() -> str: def generate_task_run_id() -> str:
int_id = generate_id() int_id = generate_id()
return f"{TASK_RUN_PREFIX}_{int_id}" return f"{TASK_RUN_PREFIX}_{int_id}"

View File

@@ -28,6 +28,7 @@ from skyvern.forge.sdk.db.id import (
generate_bitwarden_credit_card_data_parameter_id, generate_bitwarden_credit_card_data_parameter_id,
generate_bitwarden_login_credential_parameter_id, generate_bitwarden_login_credential_parameter_id,
generate_bitwarden_sensitive_information_parameter_id, generate_bitwarden_sensitive_information_parameter_id,
generate_browser_profile_id,
generate_credential_id, generate_credential_id,
generate_credential_parameter_id, generate_credential_parameter_id,
generate_debug_session_id, generate_debug_session_id,
@@ -784,6 +785,23 @@ class PersistentBrowserSessionModel(Base):
deleted_at = Column(DateTime, nullable=True) deleted_at = Column(DateTime, nullable=True)
class BrowserProfileModel(Base):
__tablename__ = "browser_profiles"
__table_args__ = (
Index("idx_browser_profiles_org", "organization_id"),
Index("idx_browser_profiles_org_name", "organization_id", "name"),
UniqueConstraint("organization_id", "name", name="uc_org_browser_profile_name"),
)
browser_profile_id = Column(String, primary_key=True, default=generate_browser_profile_id)
organization_id = Column(String, nullable=False)
name = Column(String, nullable=False)
description = Column(String, nullable=True)
created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False)
modified_at = Column(DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow, nullable=False)
deleted_at = Column(DateTime, nullable=True)
class TaskRunModel(Base): class TaskRunModel(Base):
__tablename__ = "task_runs" __tablename__ = "task_runs"
__table_args__ = ( __table_args__ = (

View File

@@ -0,0 +1,15 @@
from datetime import datetime
from pydantic import BaseModel, ConfigDict
class BrowserProfile(BaseModel):
model_config = ConfigDict(from_attributes=True)
browser_profile_id: str
organization_id: str
name: str
description: str | None = None
created_at: datetime
modified_at: datetime
deleted_at: datetime | None = None