Feature/self hosted proxy support (#2115)

This commit is contained in:
Piyush
2025-04-19 08:54:42 +05:30
committed by GitHub
parent 89f1e8d4dd
commit cebb04c736
2 changed files with 69 additions and 0 deletions

View File

@@ -44,6 +44,10 @@ class Settings(BaseSettings):
BLOCKED_HOSTS: list[str] = ["localhost"] BLOCKED_HOSTS: list[str] = ["localhost"]
ALLOWED_HOSTS: list[str] = [] ALLOWED_HOSTS: list[str] = []
# Format: "http://<username>:<password>@host:port, http://<username>:<password>@host:port, ...."
HOSTED_PROXY_POOL: str = ""
ENABLE_PROXY: bool = False
# Secret key for JWT. Please generate your own secret key in production # Secret key for JWT. Please generate your own secret key in production
SECRET_KEY: str = "PLACEHOLDER" SECRET_KEY: str = "PLACEHOLDER"
# Algorithm used to sign the JWT # Algorithm used to sign the JWT

View File

@@ -2,6 +2,8 @@ from __future__ import annotations
import asyncio import asyncio
import os import os
import random
import re
import socket import socket
import subprocess import subprocess
import time import time
@@ -9,6 +11,7 @@ import uuid
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from typing import Any, Awaitable, Callable, Protocol from typing import Any, Awaitable, Callable, Protocol
from urllib.parse import urlparse
import aiofiles import aiofiles
import psutil import psutil
@@ -210,6 +213,11 @@ class BrowserContextFactory:
}, },
} }
if settings.ENABLE_PROXY:
proxy_config = setup_proxy()
if proxy_config:
args["proxy"] = proxy_config
if proxy_location: if proxy_location:
if tz_info := get_tzinfo_from_proxy(proxy_location=proxy_location): if tz_info := get_tzinfo_from_proxy(proxy_location=proxy_location):
args["timezone_id"] = tz_info.key args["timezone_id"] = tz_info.key
@@ -311,6 +319,63 @@ class BrowserArtifacts(BaseModel):
return await f.read() return await f.read()
def setup_proxy() -> dict | None:
if not settings.HOSTED_PROXY_POOL or settings.HOSTED_PROXY_POOL.strip() == "":
LOG.warning("No proxy server value found. Continuing without using proxy...")
return None
proxy_servers = [server.strip() for server in settings.HOSTED_PROXY_POOL.split(",") if server.strip()]
if not proxy_servers:
LOG.warning("Proxy pool contains only empty values. Continuing without proxy...")
return None
valid_proxies = []
for proxy in proxy_servers:
if _is_valid_proxy_url(proxy):
valid_proxies.append(proxy)
else:
LOG.warning(f"Invalid proxy URL format: {proxy}")
if not valid_proxies:
LOG.warning("No valid proxy URLs found. Continuing without proxy...")
return None
try:
proxy_server = random.choice(valid_proxies)
proxy_creds = _get_proxy_server_creds(proxy_server)
LOG.info("Found proxy server creds, using them...")
return {
"server": proxy_server,
"username": proxy_creds.get("username", ""),
"password": proxy_creds.get("password", ""),
}
except Exception as e:
LOG.warning(f"Error setting up proxy: {e}. Continuing without proxy...")
return None
def _is_valid_proxy_url(url: str) -> bool:
PROXY_PATTERN = re.compile(r"^(http|https|socks5):\/\/([^:@]+(:[^@]*)?@)?[^\s:\/]+(:\d+)?$")
try:
parsed = urlparse(url)
if not parsed.scheme or not parsed.netloc:
return False
return bool(PROXY_PATTERN.match(url))
except Exception:
return False
def _get_proxy_server_creds(proxy: str) -> dict:
parsed_url = urlparse(proxy)
if parsed_url.username and parsed_url.password:
return {"username": parsed_url.username, "password": parsed_url.password}
LOG.warning("No credentials found in the proxy URL.")
return {}
def _get_cdp_port(kwargs: dict) -> int | None: def _get_cdp_port(kwargs: dict) -> int | None:
raw_cdp_port = kwargs.get("cdp_port") raw_cdp_port = kwargs.get("cdp_port")
if isinstance(raw_cdp_port, (int, str)): if isinstance(raw_cdp_port, (int, str)):