upload all downloaded files when using s3 (#1289)

This commit is contained in:
LawyZheng
2024-11-29 16:05:44 +08:00
committed by GitHub
parent d697023994
commit 87061f5bb6
12 changed files with 211 additions and 50 deletions

View File

@@ -104,6 +104,18 @@ class AsyncAWSClient:
LOG.exception("Failed to create presigned url for S3 objects.", uris=uris)
return None
@execute_with_async_client(client_type=AWSClientType.S3)
async def list_files(self, uri: str, client: AioBaseClient = None) -> list[str]:
object_keys: list[str] = []
parsed_uri = S3Uri(uri)
async for page in client.get_paginator("list_objects_v2").paginate(
Bucket=parsed_uri.bucket, Prefix=parsed_uri.key
):
if "Contents" in page:
for obj in page["Contents"]:
object_keys.append(obj["Key"])
return object_keys
class S3Uri(object):
# From: https://stackoverflow.com/questions/42641315/s3-urls-get-bucket-name-and-path

View File

@@ -114,7 +114,13 @@ def unzip_files(zip_file_path: str, output_dir: str) -> None:
def get_path_for_workflow_download_directory(workflow_run_id: str) -> Path:
return Path(f"{REPO_ROOT_DIR}/downloads/{workflow_run_id}/")
return Path(get_download_dir(workflow_run_id=workflow_run_id, task_id=None))
def get_download_dir(workflow_run_id: str | None, task_id: str | None) -> str:
download_dir = f"{REPO_ROOT_DIR}/downloads/{workflow_run_id or task_id}"
os.makedirs(download_dir, exist_ok=True)
return download_dir
def list_files_in_directory(directory: Path, recursive: bool = False) -> list[str]: