[SKY-6973] [1/3] Browser Profiles - database and s3 storage layer (#3899)

This commit is contained in:
Marc Kelechava
2025-11-04 17:36:41 -08:00
committed by GitHub
parent 16f61af6cf
commit c059f1f1c5
9 changed files with 253 additions and 1 deletions

View File

@@ -799,6 +799,14 @@ class BrowserSessionNotFound(SkyvernHTTPException):
)
class BrowserProfileNotFound(SkyvernHTTPException):
def __init__(self, profile_id: str, organization_id: str | None = None) -> None:
message = f"Browser profile {profile_id} not found"
if organization_id:
message += f" for organization {organization_id}"
super().__init__(message, status_code=status.HTTP_404_NOT_FOUND)
class CannotUpdateWorkflowDueToCodeCache(SkyvernException):
def __init__(self, workflow_permanent_id: str) -> None:
super().__init__(f"No confirmation for code cache deletion on {workflow_permanent_id}.")

View File

@@ -123,6 +123,14 @@ class BaseStorage(ABC):
async def retrieve_browser_session(self, organization_id: str, workflow_permanent_id: str) -> str | None:
pass
@abstractmethod
async def store_browser_profile(self, organization_id: str, profile_id: str, directory: str) -> None:
"""Store a browser profile from a directory."""
@abstractmethod
async def retrieve_browser_profile(self, organization_id: str, profile_id: str) -> str | None:
"""Retrieve a browser profile to a temporary directory."""
@abstractmethod
async def list_downloaded_files_in_browser_session(
self, organization_id: str, browser_session_id: str

View File

@@ -207,6 +207,35 @@ class LocalStorage(BaseStorage):
return None
return str(stored_folder_path)
async def store_browser_profile(self, organization_id: str, profile_id: str, directory: str) -> None:
"""Store browser profile locally."""
stored_folder_path = Path(settings.BROWSER_SESSION_BASE_PATH) / organization_id / "profiles" / profile_id
if directory == str(stored_folder_path):
return
self._create_directories_if_not_exists(stored_folder_path)
LOG.info(
"Storing browser profile locally",
organization_id=organization_id,
profile_id=profile_id,
directory=directory,
browser_profile_path=stored_folder_path,
)
for root, _, files in os.walk(directory):
for file in files:
source_file_path = Path(root) / file
relative_path = source_file_path.relative_to(directory)
target_file_path = stored_folder_path / relative_path
self._create_directories_if_not_exists(target_file_path)
shutil.copy2(source_file_path, target_file_path)
async def retrieve_browser_profile(self, organization_id: str, profile_id: str) -> str | None:
"""Retrieve browser profile from local storage."""
stored_folder_path = Path(settings.BROWSER_SESSION_BASE_PATH) / organization_id / "profiles" / profile_id
if not stored_folder_path.exists():
return None
return str(stored_folder_path)
async def save_downloaded_files(self, organization_id: str, run_id: str | None) -> None:
pass

View File

@@ -195,6 +195,43 @@ class S3Storage(BaseStorage):
temp_zip_file.close()
return temp_dir
async def store_browser_profile(self, organization_id: str, profile_id: str, directory: str) -> None:
"""Store browser profile to S3."""
temp_zip_file = create_named_temporary_file()
zip_file_path = shutil.make_archive(temp_zip_file.name, "zip", directory)
profile_uri = (
f"s3://{settings.AWS_S3_BUCKET_BROWSER_SESSIONS}/{settings.ENV}/{organization_id}/profiles/{profile_id}.zip"
)
sc = await self._get_storage_class_for_org(organization_id)
tags = await self._get_tags_for_org(organization_id)
LOG.debug(
"Storing browser profile",
organization_id=organization_id,
profile_id=profile_id,
zip_file_path=zip_file_path,
profile_uri=profile_uri,
storage_class=sc,
tags=tags,
)
await self.async_client.upload_file_from_path(profile_uri, zip_file_path, storage_class=sc, tags=tags)
async def retrieve_browser_profile(self, organization_id: str, profile_id: str) -> str | None:
"""Retrieve browser profile from S3."""
profile_uri = (
f"s3://{settings.AWS_S3_BUCKET_BROWSER_SESSIONS}/{settings.ENV}/{organization_id}/profiles/{profile_id}.zip"
)
downloaded_zip_bytes = await self.async_client.download_file(profile_uri, log_exception=True)
if not downloaded_zip_bytes:
return None
temp_zip_file = create_named_temporary_file(delete=False)
temp_zip_file.write(downloaded_zip_bytes)
temp_zip_file_path = temp_zip_file.name
temp_dir = make_temp_directory(prefix="skyvern_browser_profile_")
unzip_files(temp_zip_file_path, temp_dir)
temp_zip_file.close()
return temp_dir
async def list_downloaded_files_in_browser_session(
self, organization_id: str, browser_session_id: str
) -> list[str]:

View File

@@ -9,7 +9,7 @@ from sqlalchemy.ext.asyncio import AsyncEngine, async_sessionmaker, create_async
from skyvern.config import settings
from skyvern.constants import DEFAULT_SCRIPT_RUN_ID
from skyvern.exceptions import WorkflowParameterNotFound, WorkflowRunNotFound
from skyvern.exceptions import BrowserProfileNotFound, WorkflowParameterNotFound, WorkflowRunNotFound
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType
from skyvern.forge.sdk.db.enums import OrganizationAuthTokenType, TaskType
from skyvern.forge.sdk.db.exceptions import NotFoundError
@@ -23,6 +23,7 @@ from skyvern.forge.sdk.db.models import (
BitwardenLoginCredentialParameterModel,
BitwardenSensitiveInformationParameterModel,
BlockRunModel,
BrowserProfileModel,
CredentialModel,
CredentialParameterModel,
DebugSessionModel,
@@ -77,6 +78,7 @@ from skyvern.forge.sdk.encrypt.base import EncryptMethod
from skyvern.forge.sdk.log_artifacts import save_workflow_run_logs
from skyvern.forge.sdk.models import Step, StepStatus
from skyvern.forge.sdk.schemas.ai_suggestions import AISuggestion
from skyvern.forge.sdk.schemas.browser_profiles import BrowserProfile
from skyvern.forge.sdk.schemas.credentials import Credential, CredentialType, CredentialVaultType
from skyvern.forge.sdk.schemas.debug_sessions import BlockRun, DebugSession, DebugSessionRun
from skyvern.forge.sdk.schemas.organization_bitwarden_collections import OrganizationBitwardenCollection
@@ -3500,6 +3502,89 @@ class AgentDB:
for workflow_run_block in workflow_run_blocks
]
async def create_browser_profile(
self,
organization_id: str,
name: str,
description: str | None = None,
) -> BrowserProfile:
try:
async with self.Session() as session:
browser_profile = BrowserProfileModel(
organization_id=organization_id,
name=name,
description=description,
)
session.add(browser_profile)
await session.commit()
await session.refresh(browser_profile)
return BrowserProfile.model_validate(browser_profile)
except SQLAlchemyError:
LOG.error("SQLAlchemyError in create_browser_profile", exc_info=True)
raise
async def get_browser_profile(
self,
profile_id: str,
organization_id: str,
include_deleted: bool = False,
) -> BrowserProfile | None:
try:
async with self.Session() as session:
query = (
select(BrowserProfileModel)
.filter_by(browser_profile_id=profile_id)
.filter_by(organization_id=organization_id)
)
if not include_deleted:
query = query.filter(BrowserProfileModel.deleted_at.is_(None))
browser_profile = (await session.scalars(query)).first()
if not browser_profile:
return None
return BrowserProfile.model_validate(browser_profile)
except SQLAlchemyError:
LOG.error("SQLAlchemyError in get_browser_profile", exc_info=True)
raise
async def list_browser_profiles(
self,
organization_id: str,
include_deleted: bool = False,
) -> list[BrowserProfile]:
try:
async with self.Session() as session:
query = select(BrowserProfileModel).filter_by(organization_id=organization_id)
if not include_deleted:
query = query.filter(BrowserProfileModel.deleted_at.is_(None))
browser_profiles = await session.scalars(query.order_by(asc(BrowserProfileModel.created_at)))
return [BrowserProfile.model_validate(profile) for profile in browser_profiles.all()]
except SQLAlchemyError:
LOG.error("SQLAlchemyError in list_browser_profiles", exc_info=True)
raise
async def delete_browser_profile(
self,
profile_id: str,
organization_id: str,
) -> None:
try:
async with self.Session() as session:
query = (
select(BrowserProfileModel)
.filter_by(browser_profile_id=profile_id)
.filter_by(organization_id=organization_id)
.filter(BrowserProfileModel.deleted_at.is_(None))
)
browser_profile = (await session.scalars(query)).first()
if not browser_profile:
raise BrowserProfileNotFound(profile_id=profile_id, organization_id=organization_id)
browser_profile.deleted_at = datetime.utcnow()
await session.commit()
except SQLAlchemyError:
LOG.error("SQLAlchemyError in delete_browser_profile", exc_info=True)
raise
async def get_active_persistent_browser_sessions(
self,
organization_id: str,

View File

@@ -39,6 +39,7 @@ CREDENTIAL_AZURE_VAULT_PARAMETER_PREFIX = "azcp"
CREDENTIAL_PARAMETER_PREFIX = "cp"
CREDENTIAL_PREFIX = "cred"
DEBUG_SESSION_PREFIX = "ds"
BROWSER_PROFILE_PREFIX = "bp"
ORGANIZATION_BITWARDEN_COLLECTION_PREFIX = "obc"
TASK_V2_ID = "tsk_v2"
THOUGHT_ID = "ot"
@@ -194,6 +195,11 @@ def generate_persistent_browser_session_id() -> str:
return f"{PERSISTENT_BROWSER_SESSION_ID}_{int_id}"
def generate_browser_profile_id() -> str:
int_id = generate_id()
return f"{BROWSER_PROFILE_PREFIX}_{int_id}"
def generate_task_run_id() -> str:
int_id = generate_id()
return f"{TASK_RUN_PREFIX}_{int_id}"

View File

@@ -28,6 +28,7 @@ from skyvern.forge.sdk.db.id import (
generate_bitwarden_credit_card_data_parameter_id,
generate_bitwarden_login_credential_parameter_id,
generate_bitwarden_sensitive_information_parameter_id,
generate_browser_profile_id,
generate_credential_id,
generate_credential_parameter_id,
generate_debug_session_id,
@@ -784,6 +785,23 @@ class PersistentBrowserSessionModel(Base):
deleted_at = Column(DateTime, nullable=True)
class BrowserProfileModel(Base):
__tablename__ = "browser_profiles"
__table_args__ = (
Index("idx_browser_profiles_org", "organization_id"),
Index("idx_browser_profiles_org_name", "organization_id", "name"),
UniqueConstraint("organization_id", "name", name="uc_org_browser_profile_name"),
)
browser_profile_id = Column(String, primary_key=True, default=generate_browser_profile_id)
organization_id = Column(String, nullable=False)
name = Column(String, nullable=False)
description = Column(String, nullable=True)
created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False)
modified_at = Column(DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow, nullable=False)
deleted_at = Column(DateTime, nullable=True)
class TaskRunModel(Base):
__tablename__ = "task_runs"
__table_args__ = (

View File

@@ -0,0 +1,15 @@
from datetime import datetime
from pydantic import BaseModel, ConfigDict
class BrowserProfile(BaseModel):
model_config = ConfigDict(from_attributes=True)
browser_profile_id: str
organization_id: str
name: str
description: str | None = None
created_at: datetime
modified_at: datetime
deleted_at: datetime | None = None