SDK: file uploading (public url only) (#3867)

This commit is contained in:
Stanislav Novosad
2025-11-07 15:21:40 -07:00
committed by GitHub
parent 581d6e5332
commit d1d0c9414b
14 changed files with 379 additions and 12 deletions

View File

@@ -69,6 +69,56 @@ def is_valid_mime_type(file_path: str) -> bool:
return mime_type is not None
def validate_download_url(url: str) -> bool:
"""Validate if a URL is supported for downloading.
Security validation for URL downloads to prevent:
- File system access outside allowed directories
- Access to local file system in non-local environments
- Unsupported or dangerous URL schemes
Args:
url: The URL to validate
Returns:
True if valid, False otherwise.
"""
try:
parsed_url = urlparse(url)
scheme = parsed_url.scheme.lower()
# Allow http/https URLs (includes Google Drive which uses https)
if scheme in ("http", "https"):
return True
# Allow S3 URIs for Skyvern uploads bucket
if scheme == "s3":
if url.startswith(f"s3://{settings.AWS_S3_BUCKET_UPLOADS}/{settings.ENV}/o_"):
return True
return False
# Allow file:// URLs only in local environment
if scheme == "file":
if settings.ENV != "local":
return False
# Validate the file path is within allowed directories
try:
file_path = parse_uri_to_path(url)
allowed_prefix = f"{REPO_ROOT_DIR}/downloads"
if not file_path.startswith(allowed_prefix):
return False
return True
except ValueError:
return False
# Reject unsupported schemes
return False
except Exception:
return False
async def download_file(url: str, max_size_mb: int | None = None) -> str:
try:
# Check if URL is a Google Drive link