[SKY-6973] [1/3] Browser Profiles - database and s3 storage layer (#3899)
This commit is contained in:
@@ -0,0 +1,46 @@
|
|||||||
|
"""add browser_profiles table
|
||||||
|
|
||||||
|
Revision ID: 7fbf463be9a7
|
||||||
|
Revises: 2903b6252f31
|
||||||
|
Create Date: 2025-11-05 01:26:01.764838+00:00
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision: str = "7fbf463be9a7"
|
||||||
|
down_revision: Union[str, None] = "2903b6252f31"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.create_table(
|
||||||
|
"browser_profiles",
|
||||||
|
sa.Column("browser_profile_id", sa.String(), nullable=False),
|
||||||
|
sa.Column("organization_id", sa.String(), nullable=False),
|
||||||
|
sa.Column("name", sa.String(), nullable=False),
|
||||||
|
sa.Column("description", sa.String(), nullable=True),
|
||||||
|
sa.Column("created_at", sa.DateTime(), nullable=False),
|
||||||
|
sa.Column("modified_at", sa.DateTime(), nullable=False),
|
||||||
|
sa.Column("deleted_at", sa.DateTime(), nullable=True),
|
||||||
|
sa.PrimaryKeyConstraint("browser_profile_id"),
|
||||||
|
sa.UniqueConstraint("organization_id", "name", name="uc_org_browser_profile_name"),
|
||||||
|
)
|
||||||
|
op.create_index("idx_browser_profiles_org", "browser_profiles", ["organization_id"], unique=False)
|
||||||
|
op.create_index("idx_browser_profiles_org_name", "browser_profiles", ["organization_id", "name"], unique=False)
|
||||||
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.drop_index("idx_browser_profiles_org_name", table_name="browser_profiles")
|
||||||
|
op.drop_index("idx_browser_profiles_org", table_name="browser_profiles")
|
||||||
|
op.drop_table("browser_profiles")
|
||||||
|
# ### end Alembic commands ###
|
||||||
@@ -799,6 +799,14 @@ class BrowserSessionNotFound(SkyvernHTTPException):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BrowserProfileNotFound(SkyvernHTTPException):
|
||||||
|
def __init__(self, profile_id: str, organization_id: str | None = None) -> None:
|
||||||
|
message = f"Browser profile {profile_id} not found"
|
||||||
|
if organization_id:
|
||||||
|
message += f" for organization {organization_id}"
|
||||||
|
super().__init__(message, status_code=status.HTTP_404_NOT_FOUND)
|
||||||
|
|
||||||
|
|
||||||
class CannotUpdateWorkflowDueToCodeCache(SkyvernException):
|
class CannotUpdateWorkflowDueToCodeCache(SkyvernException):
|
||||||
def __init__(self, workflow_permanent_id: str) -> None:
|
def __init__(self, workflow_permanent_id: str) -> None:
|
||||||
super().__init__(f"No confirmation for code cache deletion on {workflow_permanent_id}.")
|
super().__init__(f"No confirmation for code cache deletion on {workflow_permanent_id}.")
|
||||||
|
|||||||
@@ -123,6 +123,14 @@ class BaseStorage(ABC):
|
|||||||
async def retrieve_browser_session(self, organization_id: str, workflow_permanent_id: str) -> str | None:
|
async def retrieve_browser_session(self, organization_id: str, workflow_permanent_id: str) -> str | None:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def store_browser_profile(self, organization_id: str, profile_id: str, directory: str) -> None:
|
||||||
|
"""Store a browser profile from a directory."""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def retrieve_browser_profile(self, organization_id: str, profile_id: str) -> str | None:
|
||||||
|
"""Retrieve a browser profile to a temporary directory."""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def list_downloaded_files_in_browser_session(
|
async def list_downloaded_files_in_browser_session(
|
||||||
self, organization_id: str, browser_session_id: str
|
self, organization_id: str, browser_session_id: str
|
||||||
|
|||||||
@@ -207,6 +207,35 @@ class LocalStorage(BaseStorage):
|
|||||||
return None
|
return None
|
||||||
return str(stored_folder_path)
|
return str(stored_folder_path)
|
||||||
|
|
||||||
|
async def store_browser_profile(self, organization_id: str, profile_id: str, directory: str) -> None:
|
||||||
|
"""Store browser profile locally."""
|
||||||
|
stored_folder_path = Path(settings.BROWSER_SESSION_BASE_PATH) / organization_id / "profiles" / profile_id
|
||||||
|
if directory == str(stored_folder_path):
|
||||||
|
return
|
||||||
|
self._create_directories_if_not_exists(stored_folder_path)
|
||||||
|
LOG.info(
|
||||||
|
"Storing browser profile locally",
|
||||||
|
organization_id=organization_id,
|
||||||
|
profile_id=profile_id,
|
||||||
|
directory=directory,
|
||||||
|
browser_profile_path=stored_folder_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
for root, _, files in os.walk(directory):
|
||||||
|
for file in files:
|
||||||
|
source_file_path = Path(root) / file
|
||||||
|
relative_path = source_file_path.relative_to(directory)
|
||||||
|
target_file_path = stored_folder_path / relative_path
|
||||||
|
self._create_directories_if_not_exists(target_file_path)
|
||||||
|
shutil.copy2(source_file_path, target_file_path)
|
||||||
|
|
||||||
|
async def retrieve_browser_profile(self, organization_id: str, profile_id: str) -> str | None:
|
||||||
|
"""Retrieve browser profile from local storage."""
|
||||||
|
stored_folder_path = Path(settings.BROWSER_SESSION_BASE_PATH) / organization_id / "profiles" / profile_id
|
||||||
|
if not stored_folder_path.exists():
|
||||||
|
return None
|
||||||
|
return str(stored_folder_path)
|
||||||
|
|
||||||
async def save_downloaded_files(self, organization_id: str, run_id: str | None) -> None:
|
async def save_downloaded_files(self, organization_id: str, run_id: str | None) -> None:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|||||||
@@ -195,6 +195,43 @@ class S3Storage(BaseStorage):
|
|||||||
temp_zip_file.close()
|
temp_zip_file.close()
|
||||||
return temp_dir
|
return temp_dir
|
||||||
|
|
||||||
|
async def store_browser_profile(self, organization_id: str, profile_id: str, directory: str) -> None:
|
||||||
|
"""Store browser profile to S3."""
|
||||||
|
temp_zip_file = create_named_temporary_file()
|
||||||
|
zip_file_path = shutil.make_archive(temp_zip_file.name, "zip", directory)
|
||||||
|
profile_uri = (
|
||||||
|
f"s3://{settings.AWS_S3_BUCKET_BROWSER_SESSIONS}/{settings.ENV}/{organization_id}/profiles/{profile_id}.zip"
|
||||||
|
)
|
||||||
|
sc = await self._get_storage_class_for_org(organization_id)
|
||||||
|
tags = await self._get_tags_for_org(organization_id)
|
||||||
|
LOG.debug(
|
||||||
|
"Storing browser profile",
|
||||||
|
organization_id=organization_id,
|
||||||
|
profile_id=profile_id,
|
||||||
|
zip_file_path=zip_file_path,
|
||||||
|
profile_uri=profile_uri,
|
||||||
|
storage_class=sc,
|
||||||
|
tags=tags,
|
||||||
|
)
|
||||||
|
await self.async_client.upload_file_from_path(profile_uri, zip_file_path, storage_class=sc, tags=tags)
|
||||||
|
|
||||||
|
async def retrieve_browser_profile(self, organization_id: str, profile_id: str) -> str | None:
|
||||||
|
"""Retrieve browser profile from S3."""
|
||||||
|
profile_uri = (
|
||||||
|
f"s3://{settings.AWS_S3_BUCKET_BROWSER_SESSIONS}/{settings.ENV}/{organization_id}/profiles/{profile_id}.zip"
|
||||||
|
)
|
||||||
|
downloaded_zip_bytes = await self.async_client.download_file(profile_uri, log_exception=True)
|
||||||
|
if not downloaded_zip_bytes:
|
||||||
|
return None
|
||||||
|
temp_zip_file = create_named_temporary_file(delete=False)
|
||||||
|
temp_zip_file.write(downloaded_zip_bytes)
|
||||||
|
temp_zip_file_path = temp_zip_file.name
|
||||||
|
|
||||||
|
temp_dir = make_temp_directory(prefix="skyvern_browser_profile_")
|
||||||
|
unzip_files(temp_zip_file_path, temp_dir)
|
||||||
|
temp_zip_file.close()
|
||||||
|
return temp_dir
|
||||||
|
|
||||||
async def list_downloaded_files_in_browser_session(
|
async def list_downloaded_files_in_browser_session(
|
||||||
self, organization_id: str, browser_session_id: str
|
self, organization_id: str, browser_session_id: str
|
||||||
) -> list[str]:
|
) -> list[str]:
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ from sqlalchemy.ext.asyncio import AsyncEngine, async_sessionmaker, create_async
|
|||||||
|
|
||||||
from skyvern.config import settings
|
from skyvern.config import settings
|
||||||
from skyvern.constants import DEFAULT_SCRIPT_RUN_ID
|
from skyvern.constants import DEFAULT_SCRIPT_RUN_ID
|
||||||
from skyvern.exceptions import WorkflowParameterNotFound, WorkflowRunNotFound
|
from skyvern.exceptions import BrowserProfileNotFound, WorkflowParameterNotFound, WorkflowRunNotFound
|
||||||
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType
|
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType
|
||||||
from skyvern.forge.sdk.db.enums import OrganizationAuthTokenType, TaskType
|
from skyvern.forge.sdk.db.enums import OrganizationAuthTokenType, TaskType
|
||||||
from skyvern.forge.sdk.db.exceptions import NotFoundError
|
from skyvern.forge.sdk.db.exceptions import NotFoundError
|
||||||
@@ -23,6 +23,7 @@ from skyvern.forge.sdk.db.models import (
|
|||||||
BitwardenLoginCredentialParameterModel,
|
BitwardenLoginCredentialParameterModel,
|
||||||
BitwardenSensitiveInformationParameterModel,
|
BitwardenSensitiveInformationParameterModel,
|
||||||
BlockRunModel,
|
BlockRunModel,
|
||||||
|
BrowserProfileModel,
|
||||||
CredentialModel,
|
CredentialModel,
|
||||||
CredentialParameterModel,
|
CredentialParameterModel,
|
||||||
DebugSessionModel,
|
DebugSessionModel,
|
||||||
@@ -77,6 +78,7 @@ from skyvern.forge.sdk.encrypt.base import EncryptMethod
|
|||||||
from skyvern.forge.sdk.log_artifacts import save_workflow_run_logs
|
from skyvern.forge.sdk.log_artifacts import save_workflow_run_logs
|
||||||
from skyvern.forge.sdk.models import Step, StepStatus
|
from skyvern.forge.sdk.models import Step, StepStatus
|
||||||
from skyvern.forge.sdk.schemas.ai_suggestions import AISuggestion
|
from skyvern.forge.sdk.schemas.ai_suggestions import AISuggestion
|
||||||
|
from skyvern.forge.sdk.schemas.browser_profiles import BrowserProfile
|
||||||
from skyvern.forge.sdk.schemas.credentials import Credential, CredentialType, CredentialVaultType
|
from skyvern.forge.sdk.schemas.credentials import Credential, CredentialType, CredentialVaultType
|
||||||
from skyvern.forge.sdk.schemas.debug_sessions import BlockRun, DebugSession, DebugSessionRun
|
from skyvern.forge.sdk.schemas.debug_sessions import BlockRun, DebugSession, DebugSessionRun
|
||||||
from skyvern.forge.sdk.schemas.organization_bitwarden_collections import OrganizationBitwardenCollection
|
from skyvern.forge.sdk.schemas.organization_bitwarden_collections import OrganizationBitwardenCollection
|
||||||
@@ -3500,6 +3502,89 @@ class AgentDB:
|
|||||||
for workflow_run_block in workflow_run_blocks
|
for workflow_run_block in workflow_run_blocks
|
||||||
]
|
]
|
||||||
|
|
||||||
|
async def create_browser_profile(
|
||||||
|
self,
|
||||||
|
organization_id: str,
|
||||||
|
name: str,
|
||||||
|
description: str | None = None,
|
||||||
|
) -> BrowserProfile:
|
||||||
|
try:
|
||||||
|
async with self.Session() as session:
|
||||||
|
browser_profile = BrowserProfileModel(
|
||||||
|
organization_id=organization_id,
|
||||||
|
name=name,
|
||||||
|
description=description,
|
||||||
|
)
|
||||||
|
session.add(browser_profile)
|
||||||
|
await session.commit()
|
||||||
|
await session.refresh(browser_profile)
|
||||||
|
return BrowserProfile.model_validate(browser_profile)
|
||||||
|
except SQLAlchemyError:
|
||||||
|
LOG.error("SQLAlchemyError in create_browser_profile", exc_info=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def get_browser_profile(
|
||||||
|
self,
|
||||||
|
profile_id: str,
|
||||||
|
organization_id: str,
|
||||||
|
include_deleted: bool = False,
|
||||||
|
) -> BrowserProfile | None:
|
||||||
|
try:
|
||||||
|
async with self.Session() as session:
|
||||||
|
query = (
|
||||||
|
select(BrowserProfileModel)
|
||||||
|
.filter_by(browser_profile_id=profile_id)
|
||||||
|
.filter_by(organization_id=organization_id)
|
||||||
|
)
|
||||||
|
if not include_deleted:
|
||||||
|
query = query.filter(BrowserProfileModel.deleted_at.is_(None))
|
||||||
|
|
||||||
|
browser_profile = (await session.scalars(query)).first()
|
||||||
|
if not browser_profile:
|
||||||
|
return None
|
||||||
|
return BrowserProfile.model_validate(browser_profile)
|
||||||
|
except SQLAlchemyError:
|
||||||
|
LOG.error("SQLAlchemyError in get_browser_profile", exc_info=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def list_browser_profiles(
|
||||||
|
self,
|
||||||
|
organization_id: str,
|
||||||
|
include_deleted: bool = False,
|
||||||
|
) -> list[BrowserProfile]:
|
||||||
|
try:
|
||||||
|
async with self.Session() as session:
|
||||||
|
query = select(BrowserProfileModel).filter_by(organization_id=organization_id)
|
||||||
|
if not include_deleted:
|
||||||
|
query = query.filter(BrowserProfileModel.deleted_at.is_(None))
|
||||||
|
browser_profiles = await session.scalars(query.order_by(asc(BrowserProfileModel.created_at)))
|
||||||
|
return [BrowserProfile.model_validate(profile) for profile in browser_profiles.all()]
|
||||||
|
except SQLAlchemyError:
|
||||||
|
LOG.error("SQLAlchemyError in list_browser_profiles", exc_info=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def delete_browser_profile(
|
||||||
|
self,
|
||||||
|
profile_id: str,
|
||||||
|
organization_id: str,
|
||||||
|
) -> None:
|
||||||
|
try:
|
||||||
|
async with self.Session() as session:
|
||||||
|
query = (
|
||||||
|
select(BrowserProfileModel)
|
||||||
|
.filter_by(browser_profile_id=profile_id)
|
||||||
|
.filter_by(organization_id=organization_id)
|
||||||
|
.filter(BrowserProfileModel.deleted_at.is_(None))
|
||||||
|
)
|
||||||
|
browser_profile = (await session.scalars(query)).first()
|
||||||
|
if not browser_profile:
|
||||||
|
raise BrowserProfileNotFound(profile_id=profile_id, organization_id=organization_id)
|
||||||
|
browser_profile.deleted_at = datetime.utcnow()
|
||||||
|
await session.commit()
|
||||||
|
except SQLAlchemyError:
|
||||||
|
LOG.error("SQLAlchemyError in delete_browser_profile", exc_info=True)
|
||||||
|
raise
|
||||||
|
|
||||||
async def get_active_persistent_browser_sessions(
|
async def get_active_persistent_browser_sessions(
|
||||||
self,
|
self,
|
||||||
organization_id: str,
|
organization_id: str,
|
||||||
|
|||||||
@@ -39,6 +39,7 @@ CREDENTIAL_AZURE_VAULT_PARAMETER_PREFIX = "azcp"
|
|||||||
CREDENTIAL_PARAMETER_PREFIX = "cp"
|
CREDENTIAL_PARAMETER_PREFIX = "cp"
|
||||||
CREDENTIAL_PREFIX = "cred"
|
CREDENTIAL_PREFIX = "cred"
|
||||||
DEBUG_SESSION_PREFIX = "ds"
|
DEBUG_SESSION_PREFIX = "ds"
|
||||||
|
BROWSER_PROFILE_PREFIX = "bp"
|
||||||
ORGANIZATION_BITWARDEN_COLLECTION_PREFIX = "obc"
|
ORGANIZATION_BITWARDEN_COLLECTION_PREFIX = "obc"
|
||||||
TASK_V2_ID = "tsk_v2"
|
TASK_V2_ID = "tsk_v2"
|
||||||
THOUGHT_ID = "ot"
|
THOUGHT_ID = "ot"
|
||||||
@@ -194,6 +195,11 @@ def generate_persistent_browser_session_id() -> str:
|
|||||||
return f"{PERSISTENT_BROWSER_SESSION_ID}_{int_id}"
|
return f"{PERSISTENT_BROWSER_SESSION_ID}_{int_id}"
|
||||||
|
|
||||||
|
|
||||||
|
def generate_browser_profile_id() -> str:
|
||||||
|
int_id = generate_id()
|
||||||
|
return f"{BROWSER_PROFILE_PREFIX}_{int_id}"
|
||||||
|
|
||||||
|
|
||||||
def generate_task_run_id() -> str:
|
def generate_task_run_id() -> str:
|
||||||
int_id = generate_id()
|
int_id = generate_id()
|
||||||
return f"{TASK_RUN_PREFIX}_{int_id}"
|
return f"{TASK_RUN_PREFIX}_{int_id}"
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ from skyvern.forge.sdk.db.id import (
|
|||||||
generate_bitwarden_credit_card_data_parameter_id,
|
generate_bitwarden_credit_card_data_parameter_id,
|
||||||
generate_bitwarden_login_credential_parameter_id,
|
generate_bitwarden_login_credential_parameter_id,
|
||||||
generate_bitwarden_sensitive_information_parameter_id,
|
generate_bitwarden_sensitive_information_parameter_id,
|
||||||
|
generate_browser_profile_id,
|
||||||
generate_credential_id,
|
generate_credential_id,
|
||||||
generate_credential_parameter_id,
|
generate_credential_parameter_id,
|
||||||
generate_debug_session_id,
|
generate_debug_session_id,
|
||||||
@@ -784,6 +785,23 @@ class PersistentBrowserSessionModel(Base):
|
|||||||
deleted_at = Column(DateTime, nullable=True)
|
deleted_at = Column(DateTime, nullable=True)
|
||||||
|
|
||||||
|
|
||||||
|
class BrowserProfileModel(Base):
|
||||||
|
__tablename__ = "browser_profiles"
|
||||||
|
__table_args__ = (
|
||||||
|
Index("idx_browser_profiles_org", "organization_id"),
|
||||||
|
Index("idx_browser_profiles_org_name", "organization_id", "name"),
|
||||||
|
UniqueConstraint("organization_id", "name", name="uc_org_browser_profile_name"),
|
||||||
|
)
|
||||||
|
|
||||||
|
browser_profile_id = Column(String, primary_key=True, default=generate_browser_profile_id)
|
||||||
|
organization_id = Column(String, nullable=False)
|
||||||
|
name = Column(String, nullable=False)
|
||||||
|
description = Column(String, nullable=True)
|
||||||
|
created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False)
|
||||||
|
modified_at = Column(DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow, nullable=False)
|
||||||
|
deleted_at = Column(DateTime, nullable=True)
|
||||||
|
|
||||||
|
|
||||||
class TaskRunModel(Base):
|
class TaskRunModel(Base):
|
||||||
__tablename__ = "task_runs"
|
__tablename__ = "task_runs"
|
||||||
__table_args__ = (
|
__table_args__ = (
|
||||||
|
|||||||
15
skyvern/forge/sdk/schemas/browser_profiles.py
Normal file
15
skyvern/forge/sdk/schemas/browser_profiles.py
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from pydantic import BaseModel, ConfigDict
|
||||||
|
|
||||||
|
|
||||||
|
class BrowserProfile(BaseModel):
|
||||||
|
model_config = ConfigDict(from_attributes=True)
|
||||||
|
|
||||||
|
browser_profile_id: str
|
||||||
|
organization_id: str
|
||||||
|
name: str
|
||||||
|
description: str | None = None
|
||||||
|
created_at: datetime
|
||||||
|
modified_at: datetime
|
||||||
|
deleted_at: datetime | None = None
|
||||||
Reference in New Issue
Block a user