Retry url encode fix (#2136)

This commit is contained in:
Shuchang Zheng
2025-04-12 15:18:15 -07:00
committed by GitHub
parent 6cf39446eb
commit 80cdc1e0e2
2 changed files with 12 additions and 2 deletions

View File

@@ -12,11 +12,13 @@ from urllib.parse import unquote, urlparse
import aiohttp import aiohttp
import structlog import structlog
from multidict import CIMultiDictProxy from multidict import CIMultiDictProxy
from yarl import URL
from skyvern.config import settings from skyvern.config import settings
from skyvern.constants import BROWSER_DOWNLOAD_TIMEOUT, BROWSER_DOWNLOADING_SUFFIX, REPO_ROOT_DIR from skyvern.constants import BROWSER_DOWNLOAD_TIMEOUT, BROWSER_DOWNLOADING_SUFFIX, REPO_ROOT_DIR
from skyvern.exceptions import DownloadFileMaxSizeExceeded, DownloadFileMaxWaitingTime from skyvern.exceptions import DownloadFileMaxSizeExceeded, DownloadFileMaxWaitingTime
from skyvern.forge.sdk.api.aws import AsyncAWSClient from skyvern.forge.sdk.api.aws import AsyncAWSClient
from skyvern.utils.url_validators import encode_url
LOG = structlog.get_logger() LOG = structlog.get_logger()
@@ -83,7 +85,8 @@ async def download_file(url: str, max_size_mb: int | None = None) -> str:
async with aiohttp.ClientSession(raise_for_status=True) as session: async with aiohttp.ClientSession(raise_for_status=True) as session:
LOG.info("Starting to download file", url=url) LOG.info("Starting to download file", url=url)
async with session.get(url) as response: encoded_url = encode_url(url)
async with session.get(URL(encoded_url, encoded=True)) as response:
# Check the content length if available # Check the content length if available
if max_size_mb and response.content_length and response.content_length > max_size_mb * 1024 * 1024: if max_size_mb and response.content_length and response.content_length > max_size_mb * 1024 * 1024:
# todo: move to root exception.py # todo: move to root exception.py

View File

@@ -1,5 +1,5 @@
import ipaddress import ipaddress
from urllib.parse import urlparse from urllib.parse import quote, urlparse, urlsplit, urlunsplit
from fastapi import status from fastapi import status
from pydantic import HttpUrl, ValidationError from pydantic import HttpUrl, ValidationError
@@ -59,3 +59,10 @@ def validate_url(url: str) -> str | None:
if blocked: if blocked:
raise BlockedHost(host=host) raise BlockedHost(host=host)
return str(v) return str(v)
def encode_url(url: str) -> str:
parts = list(urlsplit(url))
# Encode the path while preserving "/" and "%"
parts[2] = quote(parts[2], safe="/%")
return urlunsplit(parts)