From 88018418afbf422ff02806c8cf3b19906faa32aa Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Thu, 31 Jul 2025 21:25:17 -0700 Subject: [PATCH] create_project + update/deploy_project (#3075) --- skyvern/forge/sdk/artifact/manager.py | 37 +++ skyvern/forge/sdk/artifact/models.py | 3 + skyvern/forge/sdk/artifact/storage/base.py | 6 + skyvern/forge/sdk/artifact/storage/local.py | 5 + skyvern/forge/sdk/artifact/storage/s3.py | 16 ++ skyvern/forge/sdk/db/client.py | 231 +++++++++++++++- skyvern/forge/sdk/db/utils.py | 16 ++ skyvern/forge/sdk/routes/__init__.py | 1 + skyvern/forge/sdk/routes/projects.py | 279 ++++++++++++++++++++ skyvern/schemas/peojects.py | 15 -- skyvern/schemas/projects.py | 100 +++++++ skyvern/services/project_service.py | 92 +++++++ 12 files changed, 785 insertions(+), 16 deletions(-) create mode 100644 skyvern/forge/sdk/routes/projects.py delete mode 100644 skyvern/schemas/peojects.py create mode 100644 skyvern/schemas/projects.py create mode 100644 skyvern/services/project_service.py diff --git a/skyvern/forge/sdk/artifact/manager.py b/skyvern/forge/sdk/artifact/manager.py index 2cd0bc85..4467790c 100644 --- a/skyvern/forge/sdk/artifact/manager.py +++ b/skyvern/forge/sdk/artifact/manager.py @@ -238,6 +238,43 @@ class ArtifactManager: path=path, ) + async def create_project_file_artifact( + self, + *, + organization_id: str, + project_id: str, + project_version: int, + file_path: str, + data: bytes, + ) -> str: + """Create an artifact for a project file. + + Args: + organization_id: The organization ID + project_id: The project ID + project_version: The project version + file_path: The file path relative to project root + data: The file content as bytes + + Returns: + The artifact ID + """ + artifact_id = generate_artifact_id() + uri = app.STORAGE.build_project_file_uri( + organization_id=organization_id, + project_id=project_id, + project_version=project_version, + file_path=file_path, + ) + return await self._create_artifact( + aio_task_primary_key=f"{project_id}_{project_version}", + artifact_id=artifact_id, + artifact_type=ArtifactType.PROJECT_FILE, + uri=uri, + organization_id=organization_id, + data=data, + ) + async def create_llm_artifact( self, data: bytes, diff --git a/skyvern/forge/sdk/artifact/models.py b/skyvern/forge/sdk/artifact/models.py index fdaaa3bc..ff6fc27f 100644 --- a/skyvern/forge/sdk/artifact/models.py +++ b/skyvern/forge/sdk/artifact/models.py @@ -49,6 +49,9 @@ class ArtifactType(StrEnum): TRACE = "trace" HAR = "har" + # Project files + PROJECT_FILE = "project_file" + class Artifact(BaseModel): created_at: datetime = Field( diff --git a/skyvern/forge/sdk/artifact/storage/base.py b/skyvern/forge/sdk/artifact/storage/base.py index d9087ab5..9de3770f 100644 --- a/skyvern/forge/sdk/artifact/storage/base.py +++ b/skyvern/forge/sdk/artifact/storage/base.py @@ -81,6 +81,12 @@ class BaseStorage(ABC): ) -> str: pass + @abstractmethod + def build_project_file_uri( + self, *, organization_id: str, project_id: str, project_version: int, file_path: str + ) -> str: + pass + @abstractmethod async def store_artifact(self, artifact: Artifact, data: bytes) -> None: pass diff --git a/skyvern/forge/sdk/artifact/storage/local.py b/skyvern/forge/sdk/artifact/storage/local.py index 15e6b044..0255ea8d 100644 --- a/skyvern/forge/sdk/artifact/storage/local.py +++ b/skyvern/forge/sdk/artifact/storage/local.py @@ -78,6 +78,11 @@ class LocalStorage(BaseStorage): file_ext = FILE_EXTENTSION_MAP[artifact_type] return f"file://{self.artifact_path}/{settings.ENV}/{organization_id}/ai_suggestions/{ai_suggestion.ai_suggestion_id}/{datetime.utcnow().isoformat()}_{artifact_id}_{artifact_type}.{file_ext}" + def build_project_file_uri( + self, *, organization_id: str, project_id: str, project_version: int, file_path: str + ) -> str: + return f"file://{self.artifact_path}/{settings.ENV}/{organization_id}/projects/{project_id}/{project_version}/{file_path}" + async def store_artifact(self, artifact: Artifact, data: bytes) -> None: file_path = None try: diff --git a/skyvern/forge/sdk/artifact/storage/s3.py b/skyvern/forge/sdk/artifact/storage/s3.py index 6175ffa0..3d0ffe1a 100644 --- a/skyvern/forge/sdk/artifact/storage/s3.py +++ b/skyvern/forge/sdk/artifact/storage/s3.py @@ -84,6 +84,22 @@ class S3Storage(BaseStorage): file_ext = FILE_EXTENTSION_MAP[artifact_type] return f"{self._build_base_uri(organization_id)}/ai_suggestions/{ai_suggestion.ai_suggestion_id}/{datetime.utcnow().isoformat()}_{artifact_id}_{artifact_type}.{file_ext}" + def build_project_file_uri( + self, *, organization_id: str, project_id: str, project_version: int, file_path: str + ) -> str: + """Build the S3 URI for a project file. + + Args: + organization_id: The organization ID + project_id: The project ID + project_version: The project version + file_path: The file path relative to project root + + Returns: + The S3 URI for the project file + """ + return f"{self._build_base_uri(organization_id)}/projects/{project_id}/{project_version}/{file_path}" + async def store_artifact(self, artifact: Artifact, data: bytes) -> None: sc = await self._get_storage_class_for_org(artifact.organization_id) tags = await self._get_tags_for_org(artifact.organization_id) diff --git a/skyvern/forge/sdk/db/client.py b/skyvern/forge/sdk/db/client.py index cbf00de0..ee819ae8 100644 --- a/skyvern/forge/sdk/db/client.py +++ b/skyvern/forge/sdk/db/client.py @@ -3,7 +3,7 @@ from datetime import datetime, timedelta from typing import Any, List, Sequence import structlog -from sqlalchemy import and_, delete, distinct, func, or_, pool, select, tuple_, update +from sqlalchemy import and_, delete, desc, distinct, func, or_, pool, select, tuple_, update from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.ext.asyncio import AsyncEngine, async_sessionmaker, create_async_engine @@ -29,6 +29,8 @@ from skyvern.forge.sdk.db.models import ( OrganizationModel, OutputParameterModel, PersistentBrowserSessionModel, + ProjectFileModel, + ProjectModel, StepModel, TaskGenerationModel, TaskModel, @@ -52,6 +54,7 @@ from skyvern.forge.sdk.db.utils import ( convert_to_organization, convert_to_organization_auth_token, convert_to_output_parameter, + convert_to_project, convert_to_step, convert_to_task, convert_to_workflow, @@ -96,6 +99,7 @@ from skyvern.forge.sdk.workflow.models.workflow import ( WorkflowRunStatus, WorkflowStatus, ) +from skyvern.schemas.projects import Project from skyvern.schemas.runs import ProxyLocation, RunEngine, RunType from skyvern.webeye.actions.actions import Action from skyvern.webeye.actions.models import AgentStepOutput @@ -3481,3 +3485,228 @@ class AgentDB: await session.refresh(debug_session) return DebugSession.model_validate(debug_session) + + async def update_debug_session( + self, + *, + debug_session_id: str, + browser_session_id: str | None = None, + ) -> DebugSession: + async with self.Session() as session: + debug_session = ( + await session.scalars(select(DebugSessionModel).filter_by(debug_session_id=debug_session_id)) + ).first() + + if not debug_session: + raise NotFoundError(f"Debug session {debug_session_id} not found") + + if browser_session_id: + debug_session.browser_session_id = browser_session_id + + await session.commit() + await session.refresh(debug_session) + + return DebugSession.model_validate(debug_session) + + async def create_project( + self, + organization_id: str, + workflow_permanent_id: str | None = None, + run_id: str | None = None, + project_id: str | None = None, + version: int | None = None, + ) -> Project: + try: + async with self.Session() as session: + project = ProjectModel( + organization_id=organization_id, + workflow_permanent_id=workflow_permanent_id, + run_id=run_id, + ) + if project_id: + project.project_id = project_id + if version: + project.version = version + session.add(project) + await session.commit() + await session.refresh(project) + return convert_to_project(project) + except SQLAlchemyError: + LOG.error("SQLAlchemyError", exc_info=True) + raise + + async def update_project( + self, + project_id: str, + organization_id: str, + artifact_id: str | None = None, + workflow_permanent_id: str | None = None, + run_id: str | None = None, + version: int | None = None, + ) -> Project: + try: + async with self.Session() as session: + get_project_query = ( + select(ProjectModel) + .filter_by(project_id=project_id) + .filter_by(organization_id=organization_id) + .filter(ProjectModel.deleted_at.is_(None)) + ) + if project := (await session.scalars(get_project_query)).first(): + if artifact_id: + project.artifact_id = artifact_id + if workflow_permanent_id: + project.workflow_permanent_id = workflow_permanent_id + if run_id: + project.run_id = run_id + if version: + project.version = version + await session.commit() + await session.refresh(project) + return convert_to_project(project) + else: + raise NotFoundError("Project not found") + except SQLAlchemyError: + LOG.error("SQLAlchemyError", exc_info=True) + raise + except NotFoundError: + LOG.error("No project found to update", project_id=project_id) + raise + except Exception: + LOG.error("UnexpectedError", exc_info=True) + raise + + async def get_projects( + self, + organization_id: str, + page: int = 1, + page_size: int = 10, + ) -> list[Project]: + try: + async with self.Session() as session: + # Calculate offset for pagination + offset = (page - 1) * page_size + + # Subquery to get the latest version of each project + latest_versions_subquery = ( + select(ProjectModel.project_id, func.max(ProjectModel.version).label("latest_version")) + .filter_by(organization_id=organization_id) + .filter(ProjectModel.deleted_at.is_(None)) + .group_by(ProjectModel.project_id) + .subquery() + ) + + # Main query to get projects with their latest versions + get_projects_query = ( + select(ProjectModel) + .join( + latest_versions_subquery, + and_( + ProjectModel.project_id == latest_versions_subquery.c.project_id, + ProjectModel.version == latest_versions_subquery.c.latest_version, + ), + ) + .filter_by(organization_id=organization_id) + .filter(ProjectModel.deleted_at.is_(None)) + .order_by(ProjectModel.created_at.desc()) + .limit(page_size) + .offset(offset) + ) + projects = (await session.scalars(get_projects_query)).all() + return [convert_to_project(project) for project in projects] + except SQLAlchemyError: + LOG.error("SQLAlchemyError", exc_info=True) + raise + + async def get_project( + self, + project_id: str, + organization_id: str, + version: int | None = None, + ) -> Project | None: + """Get a specific project by ID and optionally by version.""" + try: + async with self.Session() as session: + get_project_query = ( + select(ProjectModel) + .filter_by(project_id=project_id) + .filter_by(organization_id=organization_id) + .filter(ProjectModel.deleted_at.is_(None)) + ) + + if version is not None: + get_project_query = get_project_query.filter_by(version=version) + else: + # Get the latest version + get_project_query = get_project_query.order_by(desc(ProjectModel.version)).limit(1) + + if project := (await session.scalars(get_project_query)).first(): + return convert_to_project(project) + return None + except SQLAlchemyError: + LOG.error("SQLAlchemyError", exc_info=True) + raise + except Exception: + LOG.error("UnexpectedError", exc_info=True) + raise + + async def create_project_file( + self, + project_revision_id: str, + project_id: str, + organization_id: str, + file_path: str, + file_name: str, + file_type: str, + content_hash: str | None = None, + file_size: int | None = None, + mime_type: str | None = None, + encoding: str = "utf-8", + artifact_id: str | None = None, + ) -> None: + """Create a project file record.""" + try: + async with self.Session() as session: + project_file = ProjectFileModel( + project_revision_id=project_revision_id, + project_id=project_id, + organization_id=organization_id, + file_path=file_path, + file_name=file_name, + file_type=file_type, + content_hash=content_hash, + file_size=file_size, + mime_type=mime_type, + encoding=encoding, + artifact_id=artifact_id, + ) + session.add(project_file) + await session.commit() + except SQLAlchemyError: + LOG.error("SQLAlchemyError", exc_info=True) + raise + except Exception: + LOG.error("UnexpectedError", exc_info=True) + raise + + async def delete_project_files( + self, + project_revision_id: str, + organization_id: str, + ) -> None: + """Delete all files for a project revision.""" + try: + async with self.Session() as session: + delete_files_query = ( + delete(ProjectFileModel) + .where(ProjectFileModel.project_revision_id == project_revision_id) + .where(ProjectFileModel.organization_id == organization_id) + ) + await session.execute(delete_files_query) + await session.commit() + except SQLAlchemyError: + LOG.error("SQLAlchemyError", exc_info=True) + raise + except Exception: + LOG.error("UnexpectedError", exc_info=True) + raise diff --git a/skyvern/forge/sdk/db/utils.py b/skyvern/forge/sdk/db/utils.py index 1829de2a..65d2c279 100644 --- a/skyvern/forge/sdk/db/utils.py +++ b/skyvern/forge/sdk/db/utils.py @@ -15,6 +15,7 @@ from skyvern.forge.sdk.db.models import ( OrganizationAuthTokenModel, OrganizationModel, OutputParameterModel, + ProjectModel, StepModel, TaskModel, WorkflowModel, @@ -46,6 +47,7 @@ from skyvern.forge.sdk.workflow.models.workflow import ( WorkflowRunStatus, WorkflowStatus, ) +from skyvern.schemas.projects import Project from skyvern.schemas.runs import ProxyLocation from skyvern.webeye.actions.actions import ( Action, @@ -492,6 +494,20 @@ def convert_to_workflow_run_block( return block +def convert_to_project(project_model: ProjectModel) -> Project: + return Project( + project_revision_id=project_model.project_revision_id, + project_id=project_model.project_id, + organization_id=project_model.organization_id, + workflow_id=project_model.workflow_permanent_id, + run_id=project_model.run_id, + version=project_model.version, + created_at=project_model.created_at, + modified_at=project_model.modified_at, + deleted_at=project_model.deleted_at, + ) + + def hydrate_action(action_model: ActionModel) -> Action: """ Convert ActionModel to the appropriate Action type based on action_type. diff --git a/skyvern/forge/sdk/routes/__init__.py b/skyvern/forge/sdk/routes/__init__.py index 86a9344f..da8b1099 100644 --- a/skyvern/forge/sdk/routes/__init__.py +++ b/skyvern/forge/sdk/routes/__init__.py @@ -1,6 +1,7 @@ from skyvern.forge.sdk.routes import agent_protocol # noqa: F401 from skyvern.forge.sdk.routes import browser_sessions # noqa: F401 from skyvern.forge.sdk.routes import credentials # noqa: F401 +from skyvern.forge.sdk.routes import projects # noqa: F401 from skyvern.forge.sdk.routes import pylon # noqa: F401 from skyvern.forge.sdk.routes import run_blocks # noqa: F401 from skyvern.forge.sdk.routes import streaming # noqa: F401 diff --git a/skyvern/forge/sdk/routes/projects.py b/skyvern/forge/sdk/routes/projects.py new file mode 100644 index 00000000..ac900cae --- /dev/null +++ b/skyvern/forge/sdk/routes/projects.py @@ -0,0 +1,279 @@ +import base64 +import hashlib + +import structlog +from fastapi import Depends, HTTPException, Path, Query + +from skyvern.exceptions import WorkflowNotFound +from skyvern.forge import app +from skyvern.forge.sdk.routes.routers import base_router, legacy_base_router +from skyvern.forge.sdk.schemas.organizations import Organization +from skyvern.forge.sdk.services import org_auth_service +from skyvern.schemas.projects import CreateProjectRequest, CreateProjectResponse, DeployProjectRequest, Project +from skyvern.services import project_service + +LOG = structlog.get_logger() + + +@base_router.post( + "/projects", + response_model=CreateProjectResponse, + summary="Create project", + description="Create a new project with optional files and metadata", + tags=["Projects"], + openapi_extra={ + "x-fern-sdk-method-name": "create_project", + }, +) +@base_router.post( + "/projects/", + response_model=CreateProjectResponse, + include_in_schema=False, +) +async def create_project( + data: CreateProjectRequest, + current_org: Organization = Depends(org_auth_service.get_current_org), +) -> CreateProjectResponse: + """Create a new project with optional files and metadata.""" + organization_id = current_org.organization_id + LOG.info( + "Creating project", + organization_id=organization_id, + file_count=len(data.files) if data.files else 0, + ) + # validate workflow_id and run_id + if data.workflow_id: + if not await app.DATABASE.get_workflow_by_permanent_id( + workflow_permanent_id=data.workflow_id, organization_id=organization_id + ): + raise WorkflowNotFound(workflow_permanent_id=data.workflow_id) + if data.run_id: + if not await app.DATABASE.get_run(run_id=data.run_id, organization_id=organization_id): + raise HTTPException(status_code=404, detail=f"Run_id {data.run_id} not found") + try: + # Create the project in the database + project = await app.DATABASE.create_project( + organization_id=organization_id, + workflow_permanent_id=data.workflow_id, + run_id=data.run_id, + ) + # Process files if provided + file_tree = {} + file_count = 0 + if data.files: + file_tree = await project_service.build_file_tree( + data.files, + organization_id=organization_id, + project_id=project.project_id, + project_version=project.version, + project_revision_id=project.project_revision_id, + ) + file_count = len(data.files) + return CreateProjectResponse( + project_id=project.project_id, + version=project.version, + workflow_id=project.workflow_id, + run_id=project.run_id, + file_count=file_count, + created_at=project.created_at, + file_tree=file_tree, + ) + except Exception as e: + LOG.error("Failed to create project", error=str(e), exc_info=True) + raise HTTPException(status_code=500, detail="Failed to create project") + + +@legacy_base_router.get("/projects/{project_id}") +@legacy_base_router.get("/projects/{project_id}/", include_in_schema=False) +@base_router.get( + "/projects/{project_id}", + response_model=Project, + summary="Get project by ID", + description="Retrieves a specific project by its ID", + tags=["Projects"], + openapi_extra={ + "x-fern-sdk-method-name": "get_project", + }, +) +@base_router.get( + "/projects/{project_id}/", + response_model=Project, + include_in_schema=False, +) +async def get_project( + project_id: str = Path( + ..., + description="The unique identifier of the project", + examples=["proj_abc123"], + ), + current_org: Organization = Depends(org_auth_service.get_current_org), +) -> Project: + """Get a project by its ID.""" + LOG.info( + "Getting project", + organization_id=current_org.organization_id, + project_id=project_id, + ) + + project = await app.DATABASE.get_project( + project_id=project_id, + organization_id=current_org.organization_id, + ) + + if not project: + raise HTTPException(status_code=404, detail="Project not found") + + return project + + +@legacy_base_router.get("/projects") +@legacy_base_router.get("/projects/", include_in_schema=False) +@base_router.get( + "/projects", + response_model=list[Project], + summary="Get all projects", + description="Retrieves a paginated list of projects for the current organization", + tags=["Projects"], + openapi_extra={ + "x-fern-sdk-method-name": "get_projects", + }, +) +@base_router.get( + "/projects/", + response_model=list[Project], + include_in_schema=False, +) +async def get_projects( + current_org: Organization = Depends(org_auth_service.get_current_org), + page: int = Query( + 1, + ge=1, + description="Page number for pagination", + examples=[1], + ), + page_size: int = Query( + 10, + ge=1, + description="Number of items per page", + examples=[10], + ), +) -> list[Project]: + """Get all projects for the current organization.""" + LOG.info( + "Getting projects", + organization_id=current_org.organization_id, + page=page, + page_size=page_size, + ) + + projects = await app.DATABASE.get_projects( + organization_id=current_org.organization_id, + page=page, + page_size=page_size, + ) + + return projects + + +@base_router.post( + "/projects/{project_id}/deploy", + response_model=CreateProjectResponse, + summary="Deploy project", + description="Deploy a project with updated files, creating a new version", + tags=["Projects"], + openapi_extra={ + "x-fern-sdk-method-name": "deploy_project", + }, +) +@base_router.post( + "/projects/{project_id}/deploy/", + response_model=CreateProjectResponse, + include_in_schema=False, +) +async def deploy_project( + data: DeployProjectRequest, + project_id: str = Path( + ..., + description="The unique identifier of the project", + examples=["proj_abc123"], + ), + current_org: Organization = Depends(org_auth_service.get_current_org), +) -> CreateProjectResponse: + """Deploy a project with updated files, creating a new version.""" + LOG.info( + "Deploying project", + organization_id=current_org.organization_id, + project_id=project_id, + file_count=len(data.files) if data.files else 0, + ) + + try: + # Get the latest version of the project + latest_project = await app.DATABASE.get_project( + project_id=project_id, + organization_id=current_org.organization_id, + ) + + if not latest_project: + raise HTTPException(status_code=404, detail="Project not found") + + # Create a new version of the project + new_version = latest_project.version + 1 + new_project_revision = await app.DATABASE.create_project( + organization_id=current_org.organization_id, + workflow_permanent_id=latest_project.workflow_id, + run_id=latest_project.run_id, + project_id=project_id, # Use the same project_id for versioning + version=new_version, + ) + + # Process files if provided + file_tree = {} + file_count = 0 + if data.files: + file_tree = await project_service.build_file_tree( + data.files, + organization_id=current_org.organization_id, + project_id=new_project_revision.project_id, + project_version=new_project_revision.version, + project_revision_id=new_project_revision.project_revision_id, + ) + file_count = len(data.files) + + # Create project file records + for file in data.files: + content_bytes = base64.b64decode(file.content) + content_hash = hashlib.sha256(content_bytes).hexdigest() + file_size = len(content_bytes) + + # Extract file name from path + file_name = file.path.split("/")[-1] + + await app.DATABASE.create_project_file( + project_revision_id=new_project_revision.project_revision_id, + project_id=new_project_revision.project_id, + organization_id=new_project_revision.organization_id, + file_path=file.path, + file_name=file_name, + file_type="file", + content_hash=f"sha256:{content_hash}", + file_size=file_size, + mime_type=file.mime_type, + encoding=file.encoding, + ) + + return CreateProjectResponse( + project_id=new_project_revision.project_id, + version=new_project_revision.version, + workflow_id=new_project_revision.workflow_id, + run_id=new_project_revision.run_id, + file_count=file_count, + created_at=new_project_revision.created_at, + file_tree=file_tree, + ) + + except HTTPException: + raise + except Exception as e: + LOG.error("Failed to deploy project", error=str(e), exc_info=True) + raise HTTPException(status_code=500, detail="Failed to deploy project") diff --git a/skyvern/schemas/peojects.py b/skyvern/schemas/peojects.py deleted file mode 100644 index 5d2dad80..00000000 --- a/skyvern/schemas/peojects.py +++ /dev/null @@ -1,15 +0,0 @@ -from datetime import datetime - -from pydantic import BaseModel, ConfigDict - - -class Project(BaseModel): - model_config = ConfigDict(from_attributes=True) - - project_revision_id: str - project_id: str - organization_id: str - artifact_id: str | None = None - version: int | None = None - created_at: datetime - modified_at: datetime diff --git a/skyvern/schemas/projects.py b/skyvern/schemas/projects.py new file mode 100644 index 00000000..34f58982 --- /dev/null +++ b/skyvern/schemas/projects.py @@ -0,0 +1,100 @@ +from __future__ import annotations + +from datetime import datetime +from enum import StrEnum + +from pydantic import BaseModel, ConfigDict, Field + + +class FileEncoding(StrEnum): + """Supported file content encodings.""" + + BASE64 = "base64" + + +class ProjectFile(BaseModel): + """Model representing a file in a project.""" + + path: str = Field(..., description="File path relative to project root", examples=["src/main.py"]) + content: str = Field(..., description="Base64 encoded file content") + encoding: FileEncoding = Field(default=FileEncoding.BASE64, description="Content encoding") + mime_type: str | None = Field(default=None, description="MIME type (auto-detected if not provided)") + + +class CreateProjectRequest(BaseModel): + workflow_id: str | None = Field(default=None, description="Associated workflow ID") + run_id: str | None = Field(default=None, description="Associated run ID") + files: list[ProjectFile] | None = Field( + default=None, + description="Array of files to include in the project", + examples=[ + { + "path": "main.py", + "content": "cHJpbnQoIkhlbGxvLCBXb3JsZCEiKQ==", # base64 encoded "print('Hello, World!')" + "encoding": "base64", + "mime_type": "text/x-python", + }, + { + "path": "requirements.txt", + "content": "cmVxdWVzdHM9PTIuMjguMQ==", # base64 encoded "requests==2.28.1" + "encoding": "base64", + "mime_type": "text/plain", + }, + ], + ) + + +class FileNode(BaseModel): + """Model representing a file or directory in the file tree.""" + + type: str = Field(..., description="Type of node: 'file' or 'directory'") + size: int | None = Field(default=None, description="File size in bytes") + mime_type: str | None = Field(default=None, description="MIME type of the file") + content_hash: str | None = Field(default=None, description="SHA256 hash of file content") + created_at: datetime = Field(..., description="Timestamp when the file was created") + children: dict[str, FileNode] | None = Field(default=None, description="Child nodes for directories") + + +class DeployProjectRequest(BaseModel): + """Request model for deploying a project with updated files.""" + + files: list[ProjectFile] = Field( + ..., + description="Array of files to include in the project", + examples=[ + { + "path": "src/main.py", + "content": "cHJpbnQoIkhlbGxvLCBXb3JsZCEiKQ==", # base64 encoded "print('Hello, World!')" + "encoding": "base64", + "mime_type": "text/x-python", + } + ], + ) + + +class CreateProjectResponse(BaseModel): + project_id: str = Field(..., description="Unique project identifier", examples=["proj_abc123"]) + version: int = Field(..., description="Project version number", examples=[1]) + workflow_id: str | None = Field(default=None, description="ID of the workflow this project is associated with") + run_id: str | None = Field( + default=None, description="ID of the workflow run or task run that generated this project" + ) + file_count: int = Field(..., description="Total number of files in the project") + file_tree: dict[str, FileNode] = Field(..., description="Hierarchical file tree structure") + created_at: datetime = Field(..., description="Timestamp when the project was created") + + +class Project(BaseModel): + model_config = ConfigDict(from_attributes=True) + + project_revision_id: str = Field(description="Unique identifier for this specific project revision") + project_id: str = Field(description="User-facing project identifier, consistent across versions") + organization_id: str = Field(description="ID of the organization that owns this project") + workflow_id: str | None = Field(default=None, description="ID of the workflow this project is associated with") + run_id: str | None = Field( + default=None, description="ID of the workflow run or task run that generated this project" + ) + version: int = Field(description="Version number of the project") + created_at: datetime = Field(description="Timestamp when the project was created") + modified_at: datetime = Field(description="Timestamp when the project was last modified") + deleted_at: datetime | None = Field(default=None, description="Timestamp when the project was soft deleted") diff --git a/skyvern/services/project_service.py b/skyvern/services/project_service.py new file mode 100644 index 00000000..6a536176 --- /dev/null +++ b/skyvern/services/project_service.py @@ -0,0 +1,92 @@ +import base64 +import hashlib +from datetime import datetime + +import structlog + +from skyvern.forge import app +from skyvern.schemas.projects import FileNode, ProjectFile + +LOG = structlog.get_logger(__name__) + + +async def build_file_tree( + files: list[ProjectFile], + organization_id: str, + project_id: str, + project_version: int, + project_revision_id: str, +) -> dict[str, FileNode]: + """Build a hierarchical file tree from a list of files and upload the files to s3 with the same tree structure.""" + file_tree: dict[str, FileNode] = {} + + for file in files: + # Decode content to calculate size and hash + content_bytes = base64.b64decode(file.content) + content_hash = hashlib.sha256(content_bytes).hexdigest() + file_size = len(content_bytes) + + # Create artifact and upload to S3 + try: + artifact_id = await app.ARTIFACT_MANAGER.create_project_file_artifact( + organization_id=organization_id, + project_id=project_id, + project_version=project_version, + file_path=file.path, + data=content_bytes, + ) + LOG.debug( + "Created project file artifact", + artifact_id=artifact_id, + file_path=file.path, + project_id=project_id, + project_version=project_version, + ) + # create a project file record + await app.DATABASE.create_project_file( + project_revision_id=project_revision_id, + project_id=project_id, + organization_id=organization_id, + file_path=file.path, + file_name=file.path.split("/")[-1], + file_type="file", + content_hash=f"sha256:{content_hash}", + file_size=file_size, + mime_type=file.mime_type, + artifact_id=artifact_id, + ) + except Exception: + LOG.exception( + "Failed to create project file artifact", + file_path=file.path, + project_id=project_id, + project_version=project_version, + project_revision_id=project_revision_id, + ) + raise + + # Split path into components + path_parts = file.path.split("/") + current_level = file_tree + + # Create directory structure + for _, part in enumerate(path_parts[:-1]): + if part not in current_level: + current_level[part] = FileNode(type="directory", created_at=datetime.utcnow(), children={}) + elif current_level[part].type == "file": + # Convert file to directory if needed + current_level[part] = FileNode(type="directory", created_at=current_level[part].created_at, children={}) + + current_level = current_level[part].children or {} + + # Add the file + filename = path_parts[-1] + current_level[filename] = FileNode( + type="file", + size=file_size, + mime_type=file.mime_type, + content_hash=f"sha256:{content_hash}", + created_at=datetime.utcnow(), + ) + + return file_tree