diff --git a/skyvern/exceptions.py b/skyvern/exceptions.py index 6a0634d8..96e7f800 100644 --- a/skyvern/exceptions.py +++ b/skyvern/exceptions.py @@ -504,11 +504,9 @@ class CachedActionPlanError(SkyvernException): super().__init__(message) -class InvalidUrl(SkyvernHTTPException): +class InvalidUrl(SkyvernException): def __init__(self, url: str) -> None: - super().__init__( - f"Invalid URL: {url}. Skyvern supports HTTP and HTTPS urls.", status_code=status.HTTP_400_BAD_REQUEST - ) + super().__init__(f"Invalid URL: {url}. Skyvern supports HTTP and HTTPS urls with max 2083 character length.") class BlockedHost(SkyvernHTTPException): diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index 89cb9465..2da5b0a6 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -48,7 +48,7 @@ from skyvern.forge.sdk.api.files import get_path_for_workflow_download_directory from skyvern.forge.sdk.artifact.models import ArtifactType from skyvern.forge.sdk.core import skyvern_context from skyvern.forge.sdk.core.security import generate_skyvern_signature -from skyvern.forge.sdk.core.validators import validate_url +from skyvern.forge.sdk.core.validators import prepend_scheme_and_validate_url from skyvern.forge.sdk.db.enums import TaskType from skyvern.forge.sdk.models import Organization, Step, StepStatus from skyvern.forge.sdk.schemas.tasks import Task, TaskRequest, TaskResponse, TaskStatus @@ -139,7 +139,11 @@ class ForgeAgent: task_url = working_page.url - task_url = validate_url(task_url) + task_url = prepend_scheme_and_validate_url(task_url) + totp_verification_url = task_block.totp_verification_url + if totp_verification_url: + totp_verification_url = prepend_scheme_and_validate_url(totp_verification_url) + task = await app.DATABASE.create_task( url=task_url, task_type=task_block.task_type, @@ -147,7 +151,7 @@ class ForgeAgent: terminate_criterion=task_block.terminate_criterion, title=task_block.title or task_block.label, webhook_callback_url=None, - totp_verification_url=task_block.totp_verification_url, + totp_verification_url=totp_verification_url, totp_identifier=task_block.totp_identifier, navigation_goal=task_block.navigation_goal, data_extraction_goal=task_block.data_extraction_goal, diff --git a/skyvern/forge/sdk/core/validators.py b/skyvern/forge/sdk/core/validators.py index 54f47210..7da7c990 100644 --- a/skyvern/forge/sdk/core/validators.py +++ b/skyvern/forge/sdk/core/validators.py @@ -1,4 +1,5 @@ import ipaddress +from urllib.parse import urlparse from pydantic import HttpUrl, ValidationError, parse_obj_as @@ -6,6 +7,26 @@ from skyvern.config import settings from skyvern.exceptions import InvalidUrl +def prepend_scheme_and_validate_url(url: str) -> str: + if not url: + return url + + parsed_url = urlparse(url=url) + if parsed_url.scheme and parsed_url.scheme not in ["http", "https"]: + raise InvalidUrl(url=url) + + # if url doesn't contain any scheme, we prepend `https` to it by default + if not parsed_url.scheme: + url = f"https://{url}" + + try: + HttpUrl(url) + except ValidationError: + raise InvalidUrl(url=url) + + return url + + def validate_url(url: str) -> str: try: if url: diff --git a/skyvern/forge/sdk/schemas/tasks.py b/skyvern/forge/sdk/schemas/tasks.py index 17654658..a71e2169 100644 --- a/skyvern/forge/sdk/schemas/tasks.py +++ b/skyvern/forge/sdk/schemas/tasks.py @@ -4,10 +4,11 @@ from datetime import datetime from enum import StrEnum from typing import Any +from fastapi import status from pydantic import BaseModel, Field, HttpUrl, field_validator -from skyvern.exceptions import BlockedHost, InvalidTaskStatusTransition, TaskAlreadyCanceled -from skyvern.forge.sdk.core.validators import is_blocked_host +from skyvern.exceptions import BlockedHost, InvalidTaskStatusTransition, SkyvernHTTPException, TaskAlreadyCanceled +from skyvern.forge.sdk.core.validators import is_blocked_host, prepend_scheme_and_validate_url from skyvern.forge.sdk.db.enums import TaskType @@ -99,28 +100,37 @@ class TaskBase(BaseModel): class TaskRequest(TaskBase): - url: HttpUrl = Field( + url: str = Field( ..., description="Starting URL for the task.", examples=["https://www.geico.com"], ) - webhook_callback_url: HttpUrl | None = Field( + webhook_callback_url: str | None = Field( default=None, description="The URL to call when the task is completed.", examples=["https://my-webhook.com"], ) - totp_verification_url: HttpUrl | None = None + totp_verification_url: str | None = None @field_validator("url", "webhook_callback_url", "totp_verification_url") @classmethod - def validate_urls(cls, v: HttpUrl | None) -> HttpUrl | None: - if not v or not v.host: + def validate_urls(cls, url: str | None) -> str | None: + if url is None: + return None + + try: + url = prepend_scheme_and_validate_url(url=url) + v = HttpUrl(url=url) + except Exception as e: + raise SkyvernHTTPException(message=str(e), status_code=status.HTTP_400_BAD_REQUEST) + + if not v.host: return None host = v.host blocked = is_blocked_host(host) if blocked: raise BlockedHost(host=host) - return v + return str(v) class TaskStatus(StrEnum): diff --git a/skyvern/forge/sdk/workflow/models/workflow.py b/skyvern/forge/sdk/workflow/models/workflow.py index d8d1dc0a..bf563390 100644 --- a/skyvern/forge/sdk/workflow/models/workflow.py +++ b/skyvern/forge/sdk/workflow/models/workflow.py @@ -2,8 +2,11 @@ from datetime import datetime from enum import StrEnum from typing import Any, List -from pydantic import BaseModel +from fastapi import status +from pydantic import BaseModel, HttpUrl, field_validator +from skyvern.exceptions import BlockedHost, SkyvernHTTPException +from skyvern.forge.sdk.core.validators import is_blocked_host, prepend_scheme_and_validate_url from skyvern.forge.sdk.schemas.tasks import ProxyLocation from skyvern.forge.sdk.workflow.exceptions import WorkflowDefinitionHasDuplicateBlockLabels from skyvern.forge.sdk.workflow.models.block import BlockTypeVar @@ -17,6 +20,26 @@ class WorkflowRequestBody(BaseModel): totp_verification_url: str | None = None totp_identifier: str | None = None + @field_validator("webhook_callback_url", "totp_verification_url") + @classmethod + def validate_urls(cls, url: str | None) -> str | None: + if url is None: + return None + + try: + url = prepend_scheme_and_validate_url(url=url) + v = HttpUrl(url=url) + except Exception as e: + raise SkyvernHTTPException(message=str(e), status_code=status.HTTP_400_BAD_REQUEST) + + if not v.host: + return None + host = v.host + blocked = is_blocked_host(host) + if blocked: + raise BlockedHost(host=host) + return str(v) + class RunWorkflowResponse(BaseModel): workflow_id: str