Files
Dorod-Sky/skyvern/utils/url_validators.py

70 lines
2.0 KiB
Python
Raw Normal View History

import ipaddress
2025-04-12 15:18:15 -07:00
from urllib.parse import quote, urlparse, urlsplit, urlunsplit
2024-12-11 00:57:55 -08:00
from fastapi import status
from pydantic import HttpUrl, ValidationError
2024-10-17 23:47:59 -07:00
from skyvern.config import settings
2024-12-11 00:57:55 -08:00
from skyvern.exceptions import BlockedHost, InvalidUrl, SkyvernHTTPException
2024-10-17 23:47:59 -07:00
2024-12-06 11:56:12 +08:00
def prepend_scheme_and_validate_url(url: str) -> str:
if not url:
return url
parsed_url = urlparse(url=url)
if parsed_url.scheme and parsed_url.scheme not in ["http", "https"]:
raise InvalidUrl(url=url)
# if url doesn't contain any scheme, we prepend `https` to it by default
if not parsed_url.scheme:
url = f"https://{url}"
try:
HttpUrl(url)
except ValidationError:
raise InvalidUrl(url=url)
return url
def is_blocked_host(host: str) -> bool:
if host.lower() in (h.lower() for h in settings.ALLOWED_HOSTS):
return False
try:
ip = ipaddress.ip_address(host)
# Check if the IP is private, link-local, loopback, or reserved
return ip.is_private or ip.is_link_local or ip.is_loopback or ip.is_reserved
except ValueError:
# If the host is not a valid IP address (e.g., it's a domain name like localhost), handle it here
for blocked_host in settings.BLOCKED_HOSTS:
if blocked_host == host:
return True
return False
except Exception:
return False
2024-12-11 00:57:55 -08:00
def validate_url(url: str) -> str | None:
try:
url = prepend_scheme_and_validate_url(url=url)
v = HttpUrl(url=url)
except Exception as e:
raise SkyvernHTTPException(message=str(e), status_code=status.HTTP_400_BAD_REQUEST)
if not v.host:
return None
host = v.host
blocked = is_blocked_host(host)
if blocked:
raise BlockedHost(host=host)
return str(v)
2025-04-12 15:18:15 -07:00
def encode_url(url: str) -> str:
parts = list(urlsplit(url))
# Encode the path while preserving "/" and "%"
parts[2] = quote(parts[2], safe="/%")
parts[3] = quote(parts[3], safe="=&/%")
2025-04-12 15:18:15 -07:00
return urlunsplit(parts)