Support Google Cloud Workload Identity for vertex models (#3956)

This commit is contained in:
Marc Kelechava
2025-11-10 15:56:57 -08:00
committed by GitHub
parent 4ee3a52522
commit ab162397cd

View File

@@ -1049,7 +1049,8 @@ if settings.ENABLE_NOVITA:
# NOTE: If you want to specify a location, make sure the model is available in the target location. # NOTE: If you want to specify a location, make sure the model is available in the target location.
# If you want to use the global location, you must set the VERTEX_PROJECT_ID environment variable. # If you want to use the global location, you must set the VERTEX_PROJECT_ID environment variable.
# See documentation: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations#united-states # See documentation: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations#united-states
if settings.ENABLE_VERTEX_AI and settings.VERTEX_CREDENTIALS: # Support both explicit service account credentials and Google Cloud Workload Identity (metadata server fallback)
if settings.ENABLE_VERTEX_AI:
api_base: str | None = None api_base: str | None = None
if settings.VERTEX_LOCATION == "global" and settings.VERTEX_PROJECT_ID: if settings.VERTEX_LOCATION == "global" and settings.VERTEX_PROJECT_ID:
api_base = f"https://aiplatform.googleapis.com/v1/projects/{settings.VERTEX_PROJECT_ID}/locations/global/publishers/google/models" api_base = f"https://aiplatform.googleapis.com/v1/projects/{settings.VERTEX_PROJECT_ID}/locations/global/publishers/google/models"
@@ -1058,18 +1059,18 @@ if settings.ENABLE_VERTEX_AI and settings.VERTEX_CREDENTIALS:
"VERTEX_GEMINI_2.5_PRO", "VERTEX_GEMINI_2.5_PRO",
LLMConfig( LLMConfig(
"vertex_ai/gemini-2.5-pro", "vertex_ai/gemini-2.5-pro",
["VERTEX_CREDENTIALS"], [],
supports_vision=True, supports_vision=True,
add_assistant_prefix=False, add_assistant_prefix=False,
max_completion_tokens=65535, max_completion_tokens=65535,
litellm_params=LiteLLMParams( litellm_params=LiteLLMParams(
vertex_credentials=settings.VERTEX_CREDENTIALS,
api_base=f"{api_base}/gemini-2.5-pro" if api_base else None, api_base=f"{api_base}/gemini-2.5-pro" if api_base else None,
vertex_location=settings.VERTEX_LOCATION, vertex_location=settings.VERTEX_LOCATION,
thinking={ thinking={
"budget_tokens": settings.GEMINI_THINKING_BUDGET, "budget_tokens": settings.GEMINI_THINKING_BUDGET,
"type": "enabled" if settings.GEMINI_INCLUDE_THOUGHT else None, "type": "enabled" if settings.GEMINI_INCLUDE_THOUGHT else None,
}, },
vertex_credentials=settings.VERTEX_CREDENTIALS,
), ),
), ),
) )
@@ -1077,18 +1078,18 @@ if settings.ENABLE_VERTEX_AI and settings.VERTEX_CREDENTIALS:
"VERTEX_GEMINI_2.5_PRO_PREVIEW", "VERTEX_GEMINI_2.5_PRO_PREVIEW",
LLMConfig( LLMConfig(
"vertex_ai/gemini-2.5-pro-preview-05-06", "vertex_ai/gemini-2.5-pro-preview-05-06",
["VERTEX_CREDENTIALS"], [],
supports_vision=True, supports_vision=True,
add_assistant_prefix=False, add_assistant_prefix=False,
max_completion_tokens=65535, max_completion_tokens=65535,
litellm_params=LiteLLMParams( litellm_params=LiteLLMParams(
vertex_credentials=settings.VERTEX_CREDENTIALS,
api_base=f"{api_base}/gemini-2.5-pro-preview-05-06" if api_base else None, api_base=f"{api_base}/gemini-2.5-pro-preview-05-06" if api_base else None,
vertex_location=settings.VERTEX_LOCATION, vertex_location=settings.VERTEX_LOCATION,
thinking={ thinking={
"budget_tokens": settings.GEMINI_THINKING_BUDGET, "budget_tokens": settings.GEMINI_THINKING_BUDGET,
"type": "enabled" if settings.GEMINI_INCLUDE_THOUGHT else None, "type": "enabled" if settings.GEMINI_INCLUDE_THOUGHT else None,
}, },
vertex_credentials=settings.VERTEX_CREDENTIALS,
), ),
), ),
) )
@@ -1096,18 +1097,18 @@ if settings.ENABLE_VERTEX_AI and settings.VERTEX_CREDENTIALS:
"VERTEX_GEMINI_2.5_FLASH_DEPRECATED", "VERTEX_GEMINI_2.5_FLASH_DEPRECATED",
LLMConfig( LLMConfig(
"vertex_ai/gemini-2.5-flash", "vertex_ai/gemini-2.5-flash",
["VERTEX_CREDENTIALS"], [],
supports_vision=True, supports_vision=True,
add_assistant_prefix=False, add_assistant_prefix=False,
max_completion_tokens=65535, max_completion_tokens=65535,
litellm_params=LiteLLMParams( litellm_params=LiteLLMParams(
vertex_credentials=settings.VERTEX_CREDENTIALS,
api_base=f"{api_base}/gemini-2.5-flash" if api_base else None, api_base=f"{api_base}/gemini-2.5-flash" if api_base else None,
vertex_location=settings.VERTEX_LOCATION, vertex_location=settings.VERTEX_LOCATION,
thinking={ thinking={
"budget_tokens": settings.GEMINI_THINKING_BUDGET, "budget_tokens": settings.GEMINI_THINKING_BUDGET,
"type": "enabled" if settings.GEMINI_INCLUDE_THOUGHT else None, "type": "enabled" if settings.GEMINI_INCLUDE_THOUGHT else None,
}, },
vertex_credentials=settings.VERTEX_CREDENTIALS,
), ),
), ),
) )
@@ -1115,18 +1116,18 @@ if settings.ENABLE_VERTEX_AI and settings.VERTEX_CREDENTIALS:
"VERTEX_GEMINI_2.5_FLASH_LITE_DEPRECATED", "VERTEX_GEMINI_2.5_FLASH_LITE_DEPRECATED",
LLMConfig( LLMConfig(
"vertex_ai/gemini-2.5-flash-lite", "vertex_ai/gemini-2.5-flash-lite",
["VERTEX_CREDENTIALS"], [],
supports_vision=True, supports_vision=True,
add_assistant_prefix=False, add_assistant_prefix=False,
max_completion_tokens=65535, max_completion_tokens=65535,
litellm_params=LiteLLMParams( litellm_params=LiteLLMParams(
vertex_credentials=settings.VERTEX_CREDENTIALS,
api_base=f"{api_base}/gemini-2.5-flash-lite" if api_base else None, api_base=f"{api_base}/gemini-2.5-flash-lite" if api_base else None,
vertex_location=settings.VERTEX_LOCATION, vertex_location=settings.VERTEX_LOCATION,
thinking={ thinking={
"budget_tokens": settings.GEMINI_THINKING_BUDGET, "budget_tokens": settings.GEMINI_THINKING_BUDGET,
"type": "enabled" if settings.GEMINI_INCLUDE_THOUGHT else None, "type": "enabled" if settings.GEMINI_INCLUDE_THOUGHT else None,
}, },
vertex_credentials=settings.VERTEX_CREDENTIALS,
), ),
), ),
) )
@@ -1134,18 +1135,18 @@ if settings.ENABLE_VERTEX_AI and settings.VERTEX_CREDENTIALS:
"VERTEX_GEMINI_2.5_FLASH_PREVIEW", "VERTEX_GEMINI_2.5_FLASH_PREVIEW",
LLMConfig( LLMConfig(
"vertex_ai/gemini-2.5-flash-preview-05-20", "vertex_ai/gemini-2.5-flash-preview-05-20",
["VERTEX_CREDENTIALS"], [],
supports_vision=True, supports_vision=True,
add_assistant_prefix=False, add_assistant_prefix=False,
max_completion_tokens=65535, max_completion_tokens=65535,
litellm_params=LiteLLMParams( litellm_params=LiteLLMParams(
vertex_credentials=settings.VERTEX_CREDENTIALS,
api_base=f"{api_base}/gemini-2.5-flash-preview-05-20" if api_base else None, api_base=f"{api_base}/gemini-2.5-flash-preview-05-20" if api_base else None,
vertex_location=settings.VERTEX_LOCATION, vertex_location=settings.VERTEX_LOCATION,
thinking={ thinking={
"budget_tokens": settings.GEMINI_THINKING_BUDGET, "budget_tokens": settings.GEMINI_THINKING_BUDGET,
"type": "enabled" if settings.GEMINI_INCLUDE_THOUGHT else None, "type": "enabled" if settings.GEMINI_INCLUDE_THOUGHT else None,
}, },
vertex_credentials=settings.VERTEX_CREDENTIALS,
), ),
), ),
) )
@@ -1153,18 +1154,18 @@ if settings.ENABLE_VERTEX_AI and settings.VERTEX_CREDENTIALS:
"VERTEX_GEMINI_2.5_FLASH_PREVIEW_04_17", "VERTEX_GEMINI_2.5_FLASH_PREVIEW_04_17",
LLMConfig( LLMConfig(
"vertex_ai/gemini-2.5-flash-preview-04-17", "vertex_ai/gemini-2.5-flash-preview-04-17",
["VERTEX_CREDENTIALS"], [],
supports_vision=True, supports_vision=True,
add_assistant_prefix=False, add_assistant_prefix=False,
max_completion_tokens=65535, max_completion_tokens=65535,
litellm_params=LiteLLMParams( litellm_params=LiteLLMParams(
vertex_credentials=settings.VERTEX_CREDENTIALS,
api_base=f"{api_base}/gemini-2.5-flash-preview-04-17" if api_base else None, api_base=f"{api_base}/gemini-2.5-flash-preview-04-17" if api_base else None,
vertex_location=settings.VERTEX_LOCATION, vertex_location=settings.VERTEX_LOCATION,
thinking={ thinking={
"budget_tokens": settings.GEMINI_THINKING_BUDGET, "budget_tokens": settings.GEMINI_THINKING_BUDGET,
"type": "enabled" if settings.GEMINI_INCLUDE_THOUGHT else None, "type": "enabled" if settings.GEMINI_INCLUDE_THOUGHT else None,
}, },
vertex_credentials=settings.VERTEX_CREDENTIALS,
), ),
), ),
) )
@@ -1172,18 +1173,18 @@ if settings.ENABLE_VERTEX_AI and settings.VERTEX_CREDENTIALS:
"VERTEX_GEMINI_2.5_FLASH_PREVIEW_05_20", "VERTEX_GEMINI_2.5_FLASH_PREVIEW_05_20",
LLMConfig( LLMConfig(
"vertex_ai/gemini-2.5-flash-preview-05-20", "vertex_ai/gemini-2.5-flash-preview-05-20",
["VERTEX_CREDENTIALS"], [],
supports_vision=True, supports_vision=True,
add_assistant_prefix=False, add_assistant_prefix=False,
max_completion_tokens=65535, max_completion_tokens=65535,
litellm_params=LiteLLMParams( litellm_params=LiteLLMParams(
vertex_credentials=settings.VERTEX_CREDENTIALS,
api_base=f"{api_base}/gemini-2.5-flash-preview-05-20" if api_base else None, api_base=f"{api_base}/gemini-2.5-flash-preview-05-20" if api_base else None,
vertex_location=settings.VERTEX_LOCATION, vertex_location=settings.VERTEX_LOCATION,
thinking={ thinking={
"budget_tokens": settings.GEMINI_THINKING_BUDGET, "budget_tokens": settings.GEMINI_THINKING_BUDGET,
"type": "enabled" if settings.GEMINI_INCLUDE_THOUGHT else None, "type": "enabled" if settings.GEMINI_INCLUDE_THOUGHT else None,
}, },
vertex_credentials=settings.VERTEX_CREDENTIALS,
), ),
), ),
) )
@@ -1191,18 +1192,18 @@ if settings.ENABLE_VERTEX_AI and settings.VERTEX_CREDENTIALS:
"VERTEX_GEMINI_2.5_FLASH", "VERTEX_GEMINI_2.5_FLASH",
LLMConfig( LLMConfig(
"vertex_ai/gemini-2.5-flash", "vertex_ai/gemini-2.5-flash",
["VERTEX_CREDENTIALS"], [],
supports_vision=True, supports_vision=True,
add_assistant_prefix=False, add_assistant_prefix=False,
max_completion_tokens=65535, max_completion_tokens=65535,
litellm_params=LiteLLMParams( litellm_params=LiteLLMParams(
vertex_credentials=settings.VERTEX_CREDENTIALS,
api_base=f"{api_base}/gemini-2.5-flash" if api_base else None, api_base=f"{api_base}/gemini-2.5-flash" if api_base else None,
vertex_location=settings.VERTEX_LOCATION, vertex_location=settings.VERTEX_LOCATION,
thinking={ thinking={
"budget_tokens": settings.GEMINI_THINKING_BUDGET, "budget_tokens": settings.GEMINI_THINKING_BUDGET,
"type": "enabled" if settings.GEMINI_INCLUDE_THOUGHT else None, "type": "enabled" if settings.GEMINI_INCLUDE_THOUGHT else None,
}, },
vertex_credentials=settings.VERTEX_CREDENTIALS,
), ),
), ),
) )
@@ -1210,18 +1211,18 @@ if settings.ENABLE_VERTEX_AI and settings.VERTEX_CREDENTIALS:
"VERTEX_GEMINI_2.5_FLASH_LITE", "VERTEX_GEMINI_2.5_FLASH_LITE",
LLMConfig( LLMConfig(
"vertex_ai/gemini-2.5-flash-lite", "vertex_ai/gemini-2.5-flash-lite",
["VERTEX_CREDENTIALS"], [],
supports_vision=True, supports_vision=True,
add_assistant_prefix=False, add_assistant_prefix=False,
max_completion_tokens=65535, max_completion_tokens=65535,
litellm_params=LiteLLMParams( litellm_params=LiteLLMParams(
vertex_credentials=settings.VERTEX_CREDENTIALS,
api_base=f"{api_base}/gemini-2.5-flash-lite" if api_base else None, api_base=f"{api_base}/gemini-2.5-flash-lite" if api_base else None,
vertex_location=settings.VERTEX_LOCATION, vertex_location=settings.VERTEX_LOCATION,
thinking={ thinking={
"budget_tokens": settings.GEMINI_THINKING_BUDGET, "budget_tokens": settings.GEMINI_THINKING_BUDGET,
"type": "enabled" if settings.GEMINI_INCLUDE_THOUGHT else None, "type": "enabled" if settings.GEMINI_INCLUDE_THOUGHT else None,
}, },
vertex_credentials=settings.VERTEX_CREDENTIALS,
), ),
), ),
) )
@@ -1230,18 +1231,18 @@ if settings.ENABLE_VERTEX_AI and settings.VERTEX_CREDENTIALS:
"VERTEX_GEMINI_2.5_FLASH_PREVIEW_09_2025", "VERTEX_GEMINI_2.5_FLASH_PREVIEW_09_2025",
LLMConfig( LLMConfig(
"vertex_ai/gemini-2.5-flash-preview-09-2025", "vertex_ai/gemini-2.5-flash-preview-09-2025",
["VERTEX_CREDENTIALS"], [],
supports_vision=True, supports_vision=True,
add_assistant_prefix=False, add_assistant_prefix=False,
max_completion_tokens=65535, max_completion_tokens=65535,
litellm_params=LiteLLMParams( litellm_params=LiteLLMParams(
vertex_credentials=settings.VERTEX_CREDENTIALS,
api_base=f"{api_base}/gemini-2.5-flash-preview-09-2025" if api_base else None, api_base=f"{api_base}/gemini-2.5-flash-preview-09-2025" if api_base else None,
vertex_location=settings.VERTEX_LOCATION, vertex_location=settings.VERTEX_LOCATION,
thinking={ thinking={
"budget_tokens": settings.GEMINI_THINKING_BUDGET, "budget_tokens": settings.GEMINI_THINKING_BUDGET,
"type": "enabled" if settings.GEMINI_INCLUDE_THOUGHT else None, "type": "enabled" if settings.GEMINI_INCLUDE_THOUGHT else None,
}, },
vertex_credentials=settings.VERTEX_CREDENTIALS,
), ),
), ),
) )
@@ -1249,18 +1250,18 @@ if settings.ENABLE_VERTEX_AI and settings.VERTEX_CREDENTIALS:
"VERTEX_GEMINI_2.5_FLASH_LITE_PREVIEW_09_2025", "VERTEX_GEMINI_2.5_FLASH_LITE_PREVIEW_09_2025",
LLMConfig( LLMConfig(
"vertex_ai/gemini-2.5-flash-lite-preview-09-2025", "vertex_ai/gemini-2.5-flash-lite-preview-09-2025",
["VERTEX_CREDENTIALS"], [],
supports_vision=True, supports_vision=True,
add_assistant_prefix=False, add_assistant_prefix=False,
max_completion_tokens=65535, max_completion_tokens=65535,
litellm_params=LiteLLMParams( litellm_params=LiteLLMParams(
vertex_credentials=settings.VERTEX_CREDENTIALS,
api_base=f"{api_base}/gemini-2.5-flash-lite-preview-09-2025" if api_base else None, api_base=f"{api_base}/gemini-2.5-flash-lite-preview-09-2025" if api_base else None,
vertex_location=settings.VERTEX_LOCATION, vertex_location=settings.VERTEX_LOCATION,
thinking={ thinking={
"budget_tokens": settings.GEMINI_THINKING_BUDGET, "budget_tokens": settings.GEMINI_THINKING_BUDGET,
"type": "enabled" if settings.GEMINI_INCLUDE_THOUGHT else None, "type": "enabled" if settings.GEMINI_INCLUDE_THOUGHT else None,
}, },
vertex_credentials=settings.VERTEX_CREDENTIALS,
), ),
), ),
) )
@@ -1268,14 +1269,14 @@ if settings.ENABLE_VERTEX_AI and settings.VERTEX_CREDENTIALS:
"VERTEX_GEMINI_FLASH_2_0", "VERTEX_GEMINI_FLASH_2_0",
LLMConfig( LLMConfig(
"vertex_ai/gemini-2.0-flash-001", "vertex_ai/gemini-2.0-flash-001",
["VERTEX_CREDENTIALS"], [],
supports_vision=True, supports_vision=True,
add_assistant_prefix=False, add_assistant_prefix=False,
max_completion_tokens=8192, max_completion_tokens=8192,
litellm_params=LiteLLMParams( litellm_params=LiteLLMParams(
api_base=f"{api_base}/gemini-2.0-flash-001" if api_base else None, api_base=f"{api_base}/gemini-2.0-flash-001" if api_base else None,
vertex_credentials=settings.VERTEX_CREDENTIALS,
vertex_location=settings.VERTEX_LOCATION, vertex_location=settings.VERTEX_LOCATION,
vertex_credentials=settings.VERTEX_CREDENTIALS,
), ),
), ),
) )
@@ -1283,13 +1284,13 @@ if settings.ENABLE_VERTEX_AI and settings.VERTEX_CREDENTIALS:
"VERTEX_GEMINI_PRO", "VERTEX_GEMINI_PRO",
LLMConfig( LLMConfig(
"vertex_ai/gemini-1.5-pro", "vertex_ai/gemini-1.5-pro",
["VERTEX_CREDENTIALS"], [],
supports_vision=True, supports_vision=True,
add_assistant_prefix=False, add_assistant_prefix=False,
max_completion_tokens=8192, max_completion_tokens=8192,
litellm_params=LiteLLMParams( litellm_params=LiteLLMParams(
vertex_credentials=settings.VERTEX_CREDENTIALS,
vertex_location=settings.VERTEX_LOCATION, # WARN: this model don't support global vertex_location=settings.VERTEX_LOCATION, # WARN: this model don't support global
vertex_credentials=settings.VERTEX_CREDENTIALS,
), ),
), ),
) )
@@ -1297,13 +1298,13 @@ if settings.ENABLE_VERTEX_AI and settings.VERTEX_CREDENTIALS:
"VERTEX_GEMINI_FLASH", "VERTEX_GEMINI_FLASH",
LLMConfig( LLMConfig(
"vertex_ai/gemini-1.5-flash", "vertex_ai/gemini-1.5-flash",
["VERTEX_CREDENTIALS"], [],
supports_vision=True, supports_vision=True,
add_assistant_prefix=False, add_assistant_prefix=False,
max_completion_tokens=8192, max_completion_tokens=8192,
litellm_params=LiteLLMParams( litellm_params=LiteLLMParams(
vertex_credentials=settings.VERTEX_CREDENTIALS,
vertex_location=settings.VERTEX_LOCATION, # WARN: this model don't support global vertex_location=settings.VERTEX_LOCATION, # WARN: this model don't support global
vertex_credentials=settings.VERTEX_CREDENTIALS,
), ),
), ),
) )