add azure blob storage (#4338)
Signed-off-by: Benji Visser <benji@093b.org> Co-authored-by: Benji Visser <benji@093b.org> Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>
This commit is contained in:
@@ -1,15 +1,29 @@
|
||||
"""Real implementations of Azure clients (Vault and Storage) and their factories."""
|
||||
|
||||
from typing import Self
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from mimetypes import add_type, guess_type
|
||||
from typing import IO, Self
|
||||
|
||||
import structlog
|
||||
from azure.core.exceptions import ResourceNotFoundError
|
||||
from azure.identity.aio import ClientSecretCredential, DefaultAzureCredential
|
||||
from azure.keyvault.secrets.aio import SecretClient
|
||||
from azure.storage.blob import BlobSasPermissions, ContentSettings, StandardBlobTier, generate_blob_sas
|
||||
from azure.storage.blob.aio import BlobServiceClient
|
||||
|
||||
from skyvern.forge.sdk.api.azure import AsyncAzureStorageClient, AsyncAzureVaultClient, AzureClientFactory
|
||||
from skyvern.config import settings
|
||||
from skyvern.forge.sdk.api.azure import (
|
||||
AsyncAzureStorageClient,
|
||||
AsyncAzureVaultClient,
|
||||
AzureClientFactory,
|
||||
AzureUri,
|
||||
)
|
||||
from skyvern.forge.sdk.schemas.organizations import AzureClientSecretCredential
|
||||
|
||||
# Register custom mime types for mimetypes guessing
|
||||
add_type("application/json", ".har")
|
||||
add_type("text/plain", ".log")
|
||||
|
||||
LOG = structlog.get_logger()
|
||||
|
||||
|
||||
@@ -73,37 +87,256 @@ class RealAsyncAzureVaultClient(AsyncAzureVaultClient):
|
||||
|
||||
|
||||
class RealAsyncAzureStorageClient(AsyncAzureStorageClient):
|
||||
"""Real implementation of Azure Storage client using Azure SDK."""
|
||||
"""Async client for Azure Blob Storage operations. Implements AsyncAzureStorageClient protocol."""
|
||||
|
||||
def __init__(self, storage_account_name: str, storage_account_key: str):
|
||||
self.blob_service_client = BlobServiceClient(
|
||||
account_url=f"https://{storage_account_name}.blob.core.windows.net",
|
||||
credential=storage_account_key,
|
||||
def __init__(
|
||||
self,
|
||||
account_name: str | None = None,
|
||||
account_key: str | None = None,
|
||||
) -> None:
|
||||
self.account_name = account_name or settings.AZURE_STORAGE_ACCOUNT_NAME
|
||||
self.account_key = account_key or settings.AZURE_STORAGE_ACCOUNT_KEY
|
||||
|
||||
if not self.account_name or not self.account_key:
|
||||
raise ValueError("Azure Storage account name and key are required")
|
||||
|
||||
self._blob_service_client: BlobServiceClient | None = None
|
||||
self._verified_containers: set[str] = set()
|
||||
|
||||
def _get_blob_service_client(self) -> BlobServiceClient:
|
||||
if self._blob_service_client is None:
|
||||
self._blob_service_client = BlobServiceClient(
|
||||
account_url=f"https://{self.account_name}.blob.core.windows.net",
|
||||
credential=self.account_key,
|
||||
)
|
||||
return self._blob_service_client
|
||||
|
||||
async def _ensure_container_exists(self, container: str) -> None:
|
||||
if container in self._verified_containers:
|
||||
return
|
||||
client = self._get_blob_service_client()
|
||||
container_client = client.get_container_client(container)
|
||||
try:
|
||||
if not await container_client.exists():
|
||||
await container_client.create_container()
|
||||
LOG.info("Created Azure container", container=container)
|
||||
except Exception:
|
||||
LOG.debug("Container may already exist", container=container)
|
||||
self._verified_containers.add(container)
|
||||
|
||||
async def upload_file(
|
||||
self,
|
||||
uri: str,
|
||||
data: bytes,
|
||||
tier: StandardBlobTier = StandardBlobTier.HOT,
|
||||
tags: dict[str, str] | None = None,
|
||||
) -> None:
|
||||
parsed = AzureUri(uri)
|
||||
await self._ensure_container_exists(parsed.container)
|
||||
client = self._get_blob_service_client()
|
||||
container_client = client.get_container_client(parsed.container)
|
||||
await container_client.upload_blob(
|
||||
name=parsed.blob_path,
|
||||
data=data,
|
||||
overwrite=True,
|
||||
standard_blob_tier=tier,
|
||||
tags=tags,
|
||||
)
|
||||
|
||||
async def upload_file_from_path(self, container_name: str, blob_name: str, file_path: str) -> None:
|
||||
try:
|
||||
container_client = self.blob_service_client.get_container_client(container_name)
|
||||
# Create the container if it doesn't exist
|
||||
try:
|
||||
await container_client.create_container()
|
||||
except Exception as e:
|
||||
LOG.info("Azure container already exists or failed to create", container_name=container_name, error=e)
|
||||
|
||||
with open(file_path, "rb") as data:
|
||||
await container_client.upload_blob(name=blob_name, data=data, overwrite=True)
|
||||
LOG.info("File uploaded to Azure Blob Storage", container_name=container_name, blob_name=blob_name)
|
||||
except Exception as e:
|
||||
LOG.error(
|
||||
"Failed to upload file to Azure Blob Storage",
|
||||
container_name=container_name,
|
||||
blob_name=blob_name,
|
||||
error=e,
|
||||
async def upload_file_from_path(
|
||||
self,
|
||||
uri: str,
|
||||
file_path: str,
|
||||
tier: StandardBlobTier = StandardBlobTier.HOT,
|
||||
tags: dict[str, str] | None = None,
|
||||
metadata: dict[str, str] | None = None,
|
||||
) -> None:
|
||||
parsed = AzureUri(uri)
|
||||
await self._ensure_container_exists(parsed.container)
|
||||
client = self._get_blob_service_client()
|
||||
container_client = client.get_container_client(parsed.container)
|
||||
content_type, _ = guess_type(file_path)
|
||||
content_settings = ContentSettings(content_type=content_type) if content_type else None
|
||||
with open(file_path, "rb") as f:
|
||||
await container_client.upload_blob(
|
||||
name=parsed.blob_path,
|
||||
data=f,
|
||||
overwrite=True,
|
||||
standard_blob_tier=tier,
|
||||
tags=tags,
|
||||
metadata=metadata,
|
||||
content_settings=content_settings,
|
||||
)
|
||||
raise e
|
||||
|
||||
async def upload_file_stream(
|
||||
self,
|
||||
uri: str,
|
||||
file_obj: IO[bytes],
|
||||
tier: StandardBlobTier = StandardBlobTier.HOT,
|
||||
tags: dict[str, str] | None = None,
|
||||
metadata: dict[str, str] | None = None,
|
||||
) -> str:
|
||||
parsed = AzureUri(uri)
|
||||
await self._ensure_container_exists(parsed.container)
|
||||
client = self._get_blob_service_client()
|
||||
container_client = client.get_container_client(parsed.container)
|
||||
await container_client.upload_blob(
|
||||
name=parsed.blob_path,
|
||||
data=file_obj,
|
||||
overwrite=True,
|
||||
standard_blob_tier=tier,
|
||||
tags=tags,
|
||||
metadata=metadata,
|
||||
)
|
||||
return uri
|
||||
|
||||
async def download_file(self, uri: str, log_exception: bool = True) -> bytes | None:
|
||||
parsed = AzureUri(uri)
|
||||
try:
|
||||
client = self._get_blob_service_client()
|
||||
container_client = client.get_container_client(parsed.container)
|
||||
blob_client = container_client.get_blob_client(parsed.blob_path)
|
||||
download = await blob_client.download_blob()
|
||||
return await download.readall()
|
||||
except ResourceNotFoundError:
|
||||
if log_exception:
|
||||
LOG.warning("Azure blob not found", uri=uri)
|
||||
return None
|
||||
except Exception:
|
||||
if log_exception:
|
||||
LOG.exception("Failed to download from Azure", uri=uri)
|
||||
return None
|
||||
|
||||
async def get_blob_properties(self, uri: str) -> dict | None:
|
||||
parsed = AzureUri(uri)
|
||||
try:
|
||||
client = self._get_blob_service_client()
|
||||
container_client = client.get_container_client(parsed.container)
|
||||
blob_client = container_client.get_blob_client(parsed.blob_path)
|
||||
props = await blob_client.get_blob_properties()
|
||||
return {
|
||||
"size": props.size,
|
||||
"content_type": props.content_settings.content_type if props.content_settings else None,
|
||||
"last_modified": props.last_modified,
|
||||
"etag": props.etag,
|
||||
"metadata": props.metadata,
|
||||
}
|
||||
except ResourceNotFoundError:
|
||||
return None
|
||||
except Exception:
|
||||
LOG.exception("Failed to get blob properties", uri=uri)
|
||||
return None
|
||||
|
||||
async def blob_exists(self, uri: str) -> bool:
|
||||
parsed = AzureUri(uri)
|
||||
try:
|
||||
client = self._get_blob_service_client()
|
||||
container_client = client.get_container_client(parsed.container)
|
||||
blob_client = container_client.get_blob_client(parsed.blob_path)
|
||||
return await blob_client.exists()
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
async def delete_blob(self, uri: str) -> None:
|
||||
parsed = AzureUri(uri)
|
||||
try:
|
||||
client = self._get_blob_service_client()
|
||||
container_client = client.get_container_client(parsed.container)
|
||||
blob_client = container_client.get_blob_client(parsed.blob_path)
|
||||
await blob_client.delete_blob()
|
||||
except ResourceNotFoundError:
|
||||
LOG.debug("Azure blob not found for deletion", uri=uri)
|
||||
except Exception:
|
||||
LOG.exception("Failed to delete Azure blob", uri=uri)
|
||||
raise
|
||||
|
||||
async def list_blobs(self, container: str, prefix: str | None = None) -> list[str]:
|
||||
try:
|
||||
client = self._get_blob_service_client()
|
||||
container_client = client.get_container_client(container)
|
||||
blobs = []
|
||||
async for blob in container_client.list_blobs(name_starts_with=prefix):
|
||||
blobs.append(blob.name)
|
||||
return blobs
|
||||
except ResourceNotFoundError:
|
||||
return []
|
||||
except Exception:
|
||||
LOG.exception("Failed to list Azure blobs", container=container, prefix=prefix)
|
||||
return []
|
||||
|
||||
def create_sas_url(self, uri: str, expiry_hours: int = 24) -> str | None:
|
||||
parsed = AzureUri(uri)
|
||||
try:
|
||||
sas_token = generate_blob_sas(
|
||||
account_name=self.account_name,
|
||||
container_name=parsed.container,
|
||||
blob_name=parsed.blob_path,
|
||||
account_key=self.account_key,
|
||||
permission=BlobSasPermissions(read=True),
|
||||
expiry=datetime.now(timezone.utc) + timedelta(hours=expiry_hours),
|
||||
)
|
||||
return (
|
||||
f"https://{self.account_name}.blob.core.windows.net/{parsed.container}/{parsed.blob_path}?{sas_token}"
|
||||
)
|
||||
except Exception:
|
||||
LOG.exception("Failed to create SAS URL", uri=uri)
|
||||
return None
|
||||
|
||||
async def create_sas_urls(self, uris: list[str], expiry_hours: int = 24) -> list[str] | None:
|
||||
try:
|
||||
sas_urls: list[str] = []
|
||||
for uri in uris:
|
||||
url = self.create_sas_url(uri, expiry_hours)
|
||||
if url is None:
|
||||
LOG.warning("SAS URL generation failed, aborting batch", failed_uri=uri, uris=uris)
|
||||
return None
|
||||
sas_urls.append(url)
|
||||
return sas_urls
|
||||
except Exception:
|
||||
LOG.exception("Failed to create SAS URLs")
|
||||
return None
|
||||
|
||||
async def close(self) -> None:
|
||||
await self.blob_service_client.close()
|
||||
if self._blob_service_client:
|
||||
await self._blob_service_client.close()
|
||||
self._blob_service_client = None
|
||||
|
||||
async def list_files(self, uri: str) -> list[str]:
|
||||
"""List files under a URI prefix. Returns blob names relative to container."""
|
||||
parsed = AzureUri(uri)
|
||||
return await self.list_blobs(parsed.container, parsed.blob_path)
|
||||
|
||||
async def get_object_info(self, uri: str) -> dict | None:
|
||||
"""Get object info including metadata. Returns dict with Metadata and LastModified keys."""
|
||||
props = await self.get_blob_properties(uri)
|
||||
if props is None:
|
||||
return None
|
||||
return {
|
||||
"Metadata": props.get("metadata", {}),
|
||||
"LastModified": props.get("last_modified"),
|
||||
}
|
||||
|
||||
async def delete_file(self, uri: str) -> None:
|
||||
"""Delete a file at the given URI."""
|
||||
await self.delete_blob(uri)
|
||||
|
||||
async def get_file_metadata(self, uri: str, log_exception: bool = True) -> dict[str, str] | None:
|
||||
"""Get only the metadata for a file."""
|
||||
parsed = AzureUri(uri)
|
||||
try:
|
||||
client = self._get_blob_service_client()
|
||||
container_client = client.get_container_client(parsed.container)
|
||||
blob_client = container_client.get_blob_client(parsed.blob_path)
|
||||
props = await blob_client.get_blob_properties()
|
||||
return props.metadata or {}
|
||||
except ResourceNotFoundError:
|
||||
if log_exception:
|
||||
LOG.warning("Azure blob not found for metadata", uri=uri)
|
||||
return None
|
||||
except Exception:
|
||||
if log_exception:
|
||||
LOG.exception("Failed to get blob metadata", uri=uri)
|
||||
return None
|
||||
|
||||
|
||||
class RealAzureClientFactory(AzureClientFactory):
|
||||
@@ -124,4 +357,4 @@ class RealAzureClientFactory(AzureClientFactory):
|
||||
|
||||
def create_storage_client(self, storage_account_name: str, storage_account_key: str) -> AsyncAzureStorageClient:
|
||||
"""Create an Azure Storage client with the provided credentials."""
|
||||
return RealAsyncAzureStorageClient(storage_account_name, storage_account_key)
|
||||
return RealAsyncAzureStorageClient(account_name=storage_account_name, account_key=storage_account_key)
|
||||
|
||||
Reference in New Issue
Block a user