diff --git a/skyvern/forge/sdk/api/files.py b/skyvern/forge/sdk/api/files.py index ea5c61de..91b5abbc 100644 --- a/skyvern/forge/sdk/api/files.py +++ b/skyvern/forge/sdk/api/files.py @@ -12,11 +12,13 @@ from urllib.parse import unquote, urlparse import aiohttp import structlog from multidict import CIMultiDictProxy +from yarl import URL from skyvern.config import settings from skyvern.constants import BROWSER_DOWNLOAD_TIMEOUT, BROWSER_DOWNLOADING_SUFFIX, REPO_ROOT_DIR from skyvern.exceptions import DownloadFileMaxSizeExceeded, DownloadFileMaxWaitingTime from skyvern.forge.sdk.api.aws import AsyncAWSClient +from skyvern.utils.url_validators import encode_url LOG = structlog.get_logger() @@ -83,7 +85,8 @@ async def download_file(url: str, max_size_mb: int | None = None) -> str: async with aiohttp.ClientSession(raise_for_status=True) as session: LOG.info("Starting to download file", url=url) - async with session.get(url) as response: + encoded_url = encode_url(url) + async with session.get(URL(encoded_url, encoded=True)) as response: # Check the content length if available if max_size_mb and response.content_length and response.content_length > max_size_mb * 1024 * 1024: # todo: move to root exception.py diff --git a/skyvern/utils/url_validators.py b/skyvern/utils/url_validators.py index 2882ddb9..8100ac05 100644 --- a/skyvern/utils/url_validators.py +++ b/skyvern/utils/url_validators.py @@ -1,5 +1,5 @@ import ipaddress -from urllib.parse import urlparse +from urllib.parse import quote, urlparse, urlsplit, urlunsplit from fastapi import status from pydantic import HttpUrl, ValidationError @@ -59,3 +59,10 @@ def validate_url(url: str) -> str | None: if blocked: raise BlockedHost(host=host) return str(v) + + +def encode_url(url: str) -> str: + parts = list(urlsplit(url)) + # Encode the path while preserving "/" and "%" + parts[2] = quote(parts[2], safe="/%") + return urlunsplit(parts)