Retry url encode fix (#2136)

This commit is contained in:
Shuchang Zheng
2025-04-12 15:18:15 -07:00
committed by GitHub
parent 6cf39446eb
commit 80cdc1e0e2
2 changed files with 12 additions and 2 deletions

View File

@@ -12,11 +12,13 @@ from urllib.parse import unquote, urlparse
import aiohttp
import structlog
from multidict import CIMultiDictProxy
from yarl import URL
from skyvern.config import settings
from skyvern.constants import BROWSER_DOWNLOAD_TIMEOUT, BROWSER_DOWNLOADING_SUFFIX, REPO_ROOT_DIR
from skyvern.exceptions import DownloadFileMaxSizeExceeded, DownloadFileMaxWaitingTime
from skyvern.forge.sdk.api.aws import AsyncAWSClient
from skyvern.utils.url_validators import encode_url
LOG = structlog.get_logger()
@@ -83,7 +85,8 @@ async def download_file(url: str, max_size_mb: int | None = None) -> str:
async with aiohttp.ClientSession(raise_for_status=True) as session:
LOG.info("Starting to download file", url=url)
async with session.get(url) as response:
encoded_url = encode_url(url)
async with session.get(URL(encoded_url, encoded=True)) as response:
# Check the content length if available
if max_size_mb and response.content_length and response.content_length > max_size_mb * 1024 * 1024:
# todo: move to root exception.py

View File

@@ -1,5 +1,5 @@
import ipaddress
from urllib.parse import urlparse
from urllib.parse import quote, urlparse, urlsplit, urlunsplit
from fastapi import status
from pydantic import HttpUrl, ValidationError
@@ -59,3 +59,10 @@ def validate_url(url: str) -> str | None:
if blocked:
raise BlockedHost(host=host)
return str(v)
def encode_url(url: str) -> str:
parts = list(urlsplit(url))
# Encode the path while preserving "/" and "%"
parts[2] = quote(parts[2], safe="/%")
return urlunsplit(parts)