From 80cdc1e0e22e68ec5ab0048352c2169ed91349d6 Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Sat, 12 Apr 2025 15:18:15 -0700 Subject: [PATCH] Retry url encode fix (#2136) --- skyvern/forge/sdk/api/files.py | 5 ++++- skyvern/utils/url_validators.py | 9 ++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/skyvern/forge/sdk/api/files.py b/skyvern/forge/sdk/api/files.py index ea5c61de..91b5abbc 100644 --- a/skyvern/forge/sdk/api/files.py +++ b/skyvern/forge/sdk/api/files.py @@ -12,11 +12,13 @@ from urllib.parse import unquote, urlparse import aiohttp import structlog from multidict import CIMultiDictProxy +from yarl import URL from skyvern.config import settings from skyvern.constants import BROWSER_DOWNLOAD_TIMEOUT, BROWSER_DOWNLOADING_SUFFIX, REPO_ROOT_DIR from skyvern.exceptions import DownloadFileMaxSizeExceeded, DownloadFileMaxWaitingTime from skyvern.forge.sdk.api.aws import AsyncAWSClient +from skyvern.utils.url_validators import encode_url LOG = structlog.get_logger() @@ -83,7 +85,8 @@ async def download_file(url: str, max_size_mb: int | None = None) -> str: async with aiohttp.ClientSession(raise_for_status=True) as session: LOG.info("Starting to download file", url=url) - async with session.get(url) as response: + encoded_url = encode_url(url) + async with session.get(URL(encoded_url, encoded=True)) as response: # Check the content length if available if max_size_mb and response.content_length and response.content_length > max_size_mb * 1024 * 1024: # todo: move to root exception.py diff --git a/skyvern/utils/url_validators.py b/skyvern/utils/url_validators.py index 2882ddb9..8100ac05 100644 --- a/skyvern/utils/url_validators.py +++ b/skyvern/utils/url_validators.py @@ -1,5 +1,5 @@ import ipaddress -from urllib.parse import urlparse +from urllib.parse import quote, urlparse, urlsplit, urlunsplit from fastapi import status from pydantic import HttpUrl, ValidationError @@ -59,3 +59,10 @@ def validate_url(url: str) -> str | None: if blocked: raise BlockedHost(host=host) return str(v) + + +def encode_url(url: str) -> str: + parts = list(urlsplit(url)) + # Encode the path while preserving "/" and "%" + parts[2] = quote(parts[2], safe="/%") + return urlunsplit(parts)