use vertex global region (#2326)

2025-05-11 19:28:10 -07:00
parent 8f37625f51
commit 3c7ac35d4f
5 changed files with 53 additions and 30 deletions
--- a/skyvern/forge/sdk/api/llm/config_registry.py
+++ b/skyvern/forge/sdk/api/llm/config_registry.py
@@ -545,9 +545,9 @@ if settings.ENABLE_GEMINI:
        ),
    )
    LLMConfigRegistry.register_config(
-        "GEMINI_2.5_PRO_PREVIEW_03_25",
+        "GEMINI_2.5_PRO_PREVIEW",
        LLMConfig(
-            "gemini/gemini-2.5-pro-preview-03-25",
+            "gemini/gemini-2.5-pro-preview-05-06",
            ["GEMINI_API_KEY"],
            supports_vision=True,
            add_assistant_prefix=False,
@@ -737,9 +737,36 @@ if settings.ENABLE_NOVITA:
 # Get the credentials json file. See documentation: https://support.google.com/a/answer/7378726?hl=en
 # my_vertex_credentials = json.dumps(json.load(open("my_credentials_file.json")))
 # Set the value of my_vertex_credentials as the environment variable VERTEX_CREDENTIALS
-if settings.ENABLE_VERTEX_AI:
+# NOTE: If you want to specify a location, make sure the model is availale in the target location.
+# See documentation: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations#united-states
+if settings.ENABLE_VERTEX_AI and settings.VERTEX_CREDENTIALS:
+    if not settings.VERTEX_PROJECT_ID:
+        import json
+
+        credentials = json.loads(settings.VERTEX_CREDENTIALS)
+        settings.VERTEX_PROJECT_ID = credentials.get("project_id")
+
+    api_base: str | None = None
+    if settings.VERTEX_LOCATION == "global":
+        api_base = f"https://aiplatform.googleapis.com/v1/projects/{settings.VERTEX_PROJECT_ID}/locations/global/publishers/google/models"
+
    LLMConfigRegistry.register_config(
-        "VERTEX_GEMINI_2.5_FLASH_PREVIEW_04_17",
+        "VERTEX_GEMINI_2.5_PRO_PREVIEW",
+        LLMConfig(
+            "vertex_ai/gemini-2.5-pro-preview-05-06",
+            ["VERTEX_CREDENTIALS"],
+            supports_vision=True,
+            add_assistant_prefix=False,
+            max_completion_tokens=65535,
+            litellm_params=LiteLLMParams(
+                vertex_credentials=settings.VERTEX_CREDENTIALS,
+                api_base=f"{api_base}/gemini-2.5-pro-preview-05-06" if api_base else None,
+                vertex_location=settings.VERTEX_LOCATION,
+            ),
+        ),
+    )
+    LLMConfigRegistry.register_config(
+        "VERTEX_GEMINI_2.5_FLASH_PREVIEW",
        LLMConfig(
            "vertex_ai/gemini-2.5-flash-preview-04-17",
            ["VERTEX_CREDENTIALS"],
@@ -748,19 +775,8 @@ if settings.ENABLE_VERTEX_AI:
            max_completion_tokens=65535,
            litellm_params=LiteLLMParams(
                vertex_credentials=settings.VERTEX_CREDENTIALS,
-            ),
-        ),
-    )
-    LLMConfigRegistry.register_config(
-        "VERTEX_GEMINI_2.5_PRO_PREVIEW_03_25",
-        LLMConfig(
-            "vertex_ai/gemini-2.5-pro-preview-03-25",
-            ["VERTEX_CREDENTIALS"],
-            supports_vision=True,
-            add_assistant_prefix=False,
-            max_completion_tokens=65535,
-            litellm_params=LiteLLMParams(
-                vertex_credentials=settings.VERTEX_CREDENTIALS,
+                api_base=f"{api_base}/gemini-2.5-flash-preview-04-17" if api_base else None,
+                vertex_location=settings.VERTEX_LOCATION,
            ),
        ),
    )
@@ -773,7 +789,9 @@ if settings.ENABLE_VERTEX_AI:
            add_assistant_prefix=False,
            max_completion_tokens=8192,
            litellm_params=LiteLLMParams(
-                vertex_credentials=settings.VERTEX_CREDENTIALS,  # type: ignore
+                api_base=f"{api_base}/gemini-2.0-flash-001" if api_base else None,
+                vertex_credentials=settings.VERTEX_CREDENTIALS,
+                vertex_location=settings.VERTEX_LOCATION,
            ),
        ),
    )
@@ -786,7 +804,8 @@ if settings.ENABLE_VERTEX_AI:
            add_assistant_prefix=False,
            max_completion_tokens=8192,
            litellm_params=LiteLLMParams(
-                vertex_credentials=settings.VERTEX_CREDENTIALS,  # type: ignore
+                vertex_credentials=settings.VERTEX_CREDENTIALS,
+                vertex_location=settings.VERTEX_LOCATION,  # WARN: this model don't support global
            ),
        ),
    )
@@ -799,7 +818,8 @@ if settings.ENABLE_VERTEX_AI:
            add_assistant_prefix=False,
            max_completion_tokens=8192,
            litellm_params=LiteLLMParams(
-                vertex_credentials=settings.VERTEX_CREDENTIALS,  # type: ignore
+                vertex_credentials=settings.VERTEX_CREDENTIALS,
+                vertex_location=settings.VERTEX_LOCATION,  # WARN: this model don't support global
            ),
        ),
    )
--- a/skyvern/forge/sdk/api/llm/models.py
+++ b/skyvern/forge/sdk/api/llm/models.py
@@ -15,6 +15,7 @@ class LiteLLMParams(TypedDict, total=False):
    api_base: str | None
    model_info: dict[str, Any] | None
    vertex_credentials: str | None
+    vertex_location: str | None


@dataclass(frozen=True)