add azure blob storage (#4338)

Signed-off-by: Benji Visser <benji@093b.org>
Co-authored-by: Benji Visser <benji@093b.org>
Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>
This commit is contained in:
Shuchang Zheng
2025-12-20 00:16:16 +08:00
committed by GitHub
parent 71db86cbf0
commit 55f366ba93
17 changed files with 1641 additions and 136 deletions

View File

@@ -17,7 +17,8 @@ from yarl import URL
from skyvern.config import settings
from skyvern.constants import BROWSER_DOWNLOAD_TIMEOUT, BROWSER_DOWNLOADING_SUFFIX, REPO_ROOT_DIR
from skyvern.exceptions import DownloadFileMaxSizeExceeded, DownloadFileMaxWaitingTime
from skyvern.forge.sdk.api.aws import AsyncAWSClient, aws_client
from skyvern.forge import app
from skyvern.forge.sdk.api.aws import AsyncAWSClient
from skyvern.utils.url_validators import encode_url
LOG = structlog.get_logger()
@@ -97,6 +98,12 @@ def validate_download_url(url: str) -> bool:
return True
return False
# Allow Azure URIs for Skyvern uploads container
if scheme == "azure":
if url.startswith(f"azure://{settings.AZURE_STORAGE_CONTAINER_UPLOADS}/{settings.ENV}/o_"):
return True
return False
# Allow file:// URLs only in local environment
if scheme == "file":
if settings.ENV != "local":
@@ -129,20 +136,41 @@ async def download_file(url: str, max_size_mb: int | None = None) -> str:
url = f"https://drive.google.com/uc?export=download&id={file_id}"
LOG.info("Converting Google Drive link to direct download", url=url)
# Check if URL is an S3 URI
if url.startswith(f"s3://{settings.AWS_S3_BUCKET_UPLOADS}/{settings.ENV}/o_"):
LOG.info("Downloading Skyvern file from S3", url=url)
client = AsyncAWSClient()
return await download_from_s3(client, url)
# Check if URL is a cloud storage URI (S3 or Azure)
parsed = urlparse(url)
if parsed.scheme == "s3":
uploads_prefix = f"s3://{settings.AWS_S3_BUCKET_UPLOADS}/{settings.ENV}/o_"
if url.startswith(uploads_prefix):
LOG.info("Downloading Skyvern file from S3", url=url)
data = await app.STORAGE.download_uploaded_file(url)
if data is None:
raise Exception(f"Failed to download file from S3: {url}")
filename = url.split("/")[-1]
temp_file = create_named_temporary_file(delete=False, file_name=filename)
LOG.info(f"Downloaded file to {temp_file.name}")
temp_file.write(data)
return temp_file.name
elif parsed.scheme == "azure":
uploads_prefix = f"azure://{settings.AZURE_STORAGE_CONTAINER_UPLOADS}/{settings.ENV}/o_"
if url.startswith(uploads_prefix):
LOG.info("Downloading Skyvern file from Azure Blob Storage", url=url)
data = await app.STORAGE.download_uploaded_file(url)
if data is None:
raise Exception(f"Failed to download file from Azure Blob Storage: {url}")
filename = url.split("/")[-1]
temp_file = create_named_temporary_file(delete=False, file_name=filename)
LOG.info(f"Downloaded file to {temp_file.name}")
temp_file.write(data)
return temp_file.name
# Check if URL is a file:// URI
# we only support to download local files when the environment is local
# and the file is in the skyvern downloads directory
if url.startswith("file://") and settings.ENV == "local":
file_path = parse_uri_to_path(url)
if file_path.startswith(f"{REPO_ROOT_DIR}/downloads"):
local_path = parse_uri_to_path(url)
if local_path.startswith(f"{REPO_ROOT_DIR}/downloads"):
LOG.info("Downloading file from local file system", url=url)
return file_path
return local_path
async with aiohttp.ClientSession(raise_for_status=True) as session:
LOG.info("Starting to download file", url=url)
@@ -262,12 +290,13 @@ async def wait_for_download_finished(downloading_files: list[str], timeout: floa
while len(cur_downloading_files) > 0:
new_downloading_files: list[str] = []
for path in cur_downloading_files:
if path.startswith("s3://"):
try:
await aws_client.get_object_info(path)
except Exception:
# Check for cloud storage URIs (S3 or Azure)
parsed = urlparse(path)
if parsed.scheme in ("s3", "azure"):
if not await app.STORAGE.file_exists(path):
LOG.debug(
"downloading file is not found in s3, means the file finished downloading", path=path
"downloading file is not found in cloud storage, means the file finished downloading",
path=path,
)
continue
else: