use vertex global region (#2326)
This commit is contained in:
18
poetry.lock
generated
18
poetry.lock
generated
@@ -3249,14 +3249,14 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "litellm"
|
||||
version = "1.67.5"
|
||||
version = "1.68.2"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
optional = false
|
||||
python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "litellm-1.67.5-py3-none-any.whl", hash = "sha256:bd3329731a36200539293521d312adf4f05fc4a6312a84baff2ce5a8b1507a43"},
|
||||
{file = "litellm-1.67.5.tar.gz", hash = "sha256:a9c73feed05aba33b3f2879658f57bb3480b43404ae693ebc827f1c157affde5"},
|
||||
{file = "litellm-1.68.2-py3-none-any.whl", hash = "sha256:49b63e0bdea0a84ac95ae1032f25b5730dc480d11c00b3afe21f1532496d6570"},
|
||||
{file = "litellm-1.68.2.tar.gz", hash = "sha256:03c9ecb8955239f37f52e8e91f69ec02e75eb6290ed881b0597d7e16aa88d5e0"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -3266,7 +3266,7 @@ httpx = ">=0.23.0"
|
||||
importlib-metadata = ">=6.8.0"
|
||||
jinja2 = ">=3.1.2,<4.0.0"
|
||||
jsonschema = ">=4.22.0,<5.0.0"
|
||||
openai = ">=1.68.2"
|
||||
openai = ">=1.68.2,<1.76.0"
|
||||
pydantic = ">=2.0.0,<3.0.0"
|
||||
python-dotenv = ">=0.2.0"
|
||||
tiktoken = ">=0.7.0"
|
||||
@@ -3274,7 +3274,7 @@ tokenizers = "*"
|
||||
|
||||
[package.extras]
|
||||
extra-proxy = ["azure-identity (>=1.15.0,<2.0.0)", "azure-keyvault-secrets (>=4.8.0,<5.0.0)", "google-cloud-kms (>=2.21.3,<3.0.0)", "prisma (==0.11.0)", "redisvl (>=0.4.1,<0.5.0) ; python_version >= \"3.9\" and python_version < \"3.14\"", "resend (>=0.8.0,<0.9.0)"]
|
||||
proxy = ["PyJWT (>=2.8.0,<3.0.0)", "apscheduler (>=3.10.4,<4.0.0)", "backoff", "boto3 (==1.34.34)", "cryptography (>=43.0.1,<44.0.0)", "fastapi (>=0.115.5,<0.116.0)", "fastapi-sso (>=0.16.0,<0.17.0)", "gunicorn (>=23.0.0,<24.0.0)", "litellm-proxy-extras (==0.1.13)", "mcp (==1.5.0) ; python_version >= \"3.10\"", "orjson (>=3.9.7,<4.0.0)", "pynacl (>=1.5.0,<2.0.0)", "python-multipart (>=0.0.18,<0.0.19)", "pyyaml (>=6.0.1,<7.0.0)", "rq", "uvicorn (>=0.29.0,<0.30.0)", "uvloop (>=0.21.0,<0.22.0)", "websockets (>=13.1.0,<14.0.0)"]
|
||||
proxy = ["PyJWT (>=2.8.0,<3.0.0)", "apscheduler (>=3.10.4,<4.0.0)", "backoff", "boto3 (==1.34.34)", "cryptography (>=43.0.1,<44.0.0)", "fastapi (>=0.115.5,<0.116.0)", "fastapi-sso (>=0.16.0,<0.17.0)", "gunicorn (>=23.0.0,<24.0.0)", "litellm-proxy-extras (==0.1.17)", "mcp (==1.5.0) ; python_version >= \"3.10\"", "orjson (>=3.9.7,<4.0.0)", "pynacl (>=1.5.0,<2.0.0)", "python-multipart (>=0.0.18,<0.0.19)", "pyyaml (>=6.0.1,<7.0.0)", "rich (==13.7.1)", "rq", "uvicorn (>=0.29.0,<0.30.0)", "uvloop (>=0.21.0,<0.22.0) ; sys_platform != \"win32\"", "websockets (>=13.1.0,<14.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "mako"
|
||||
@@ -4000,14 +4000,14 @@ sympy = "*"
|
||||
|
||||
[[package]]
|
||||
name = "openai"
|
||||
version = "1.76.2"
|
||||
version = "1.75.0"
|
||||
description = "The official Python library for the openai API"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "openai-1.76.2-py3-none-any.whl", hash = "sha256:9c1d9ad59e6e3bea7205eedc9ca66eeebae18d47b527e505a2b0d2fb1538e26e"},
|
||||
{file = "openai-1.76.2.tar.gz", hash = "sha256:f430c8b848775907405c6eff54621254c96f6444c593c097e0cc3a9f8fdda96f"},
|
||||
{file = "openai-1.75.0-py3-none-any.whl", hash = "sha256:fe6f932d2ded3b429ff67cc9ad118c71327db32eb9d32dd723de3acfca337125"},
|
||||
{file = "openai-1.75.0.tar.gz", hash = "sha256:fb3ea907efbdb1bcfd0c44507ad9c961afd7dce3147292b54505ecfd17be8fd1"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -7388,4 +7388,4 @@ type = ["pytest-mypy"]
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = "^3.11,<3.12"
|
||||
content-hash = "e8f6be169015304c4e64b7ce15ce3d0d01c95def129da95de7f66e1b41a46665"
|
||||
content-hash = "3a661fe8b4d1e4897db9b3825880e3c8125fc0250063ece94f6cca293e4aae0d"
|
||||
|
||||
@@ -16,7 +16,7 @@ python-multipart = "^0.0.6"
|
||||
toml = "^0.10.2"
|
||||
jinja2 = "^3.1.2"
|
||||
uvicorn = {extras = ["standard"], version = "^0.24.0.post1"}
|
||||
litellm = ">=1.67.4"
|
||||
litellm = ">=1.68.2"
|
||||
playwright = "1.46.0"
|
||||
pillow = "^10.1.0"
|
||||
starlette-context = "^0.3.6"
|
||||
|
||||
@@ -208,6 +208,8 @@ class Settings(BaseSettings):
|
||||
|
||||
# VERTEX_AI
|
||||
VERTEX_CREDENTIALS: str | None = None
|
||||
VERTEX_PROJECT_ID: str | None = None
|
||||
VERTEX_LOCATION: str | None = None
|
||||
|
||||
# NOVITA AI
|
||||
ENABLE_NOVITA: bool = False
|
||||
|
||||
@@ -545,9 +545,9 @@ if settings.ENABLE_GEMINI:
|
||||
),
|
||||
)
|
||||
LLMConfigRegistry.register_config(
|
||||
"GEMINI_2.5_PRO_PREVIEW_03_25",
|
||||
"GEMINI_2.5_PRO_PREVIEW",
|
||||
LLMConfig(
|
||||
"gemini/gemini-2.5-pro-preview-03-25",
|
||||
"gemini/gemini-2.5-pro-preview-05-06",
|
||||
["GEMINI_API_KEY"],
|
||||
supports_vision=True,
|
||||
add_assistant_prefix=False,
|
||||
@@ -737,9 +737,36 @@ if settings.ENABLE_NOVITA:
|
||||
# Get the credentials json file. See documentation: https://support.google.com/a/answer/7378726?hl=en
|
||||
# my_vertex_credentials = json.dumps(json.load(open("my_credentials_file.json")))
|
||||
# Set the value of my_vertex_credentials as the environment variable VERTEX_CREDENTIALS
|
||||
if settings.ENABLE_VERTEX_AI:
|
||||
# NOTE: If you want to specify a location, make sure the model is availale in the target location.
|
||||
# See documentation: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations#united-states
|
||||
if settings.ENABLE_VERTEX_AI and settings.VERTEX_CREDENTIALS:
|
||||
if not settings.VERTEX_PROJECT_ID:
|
||||
import json
|
||||
|
||||
credentials = json.loads(settings.VERTEX_CREDENTIALS)
|
||||
settings.VERTEX_PROJECT_ID = credentials.get("project_id")
|
||||
|
||||
api_base: str | None = None
|
||||
if settings.VERTEX_LOCATION == "global":
|
||||
api_base = f"https://aiplatform.googleapis.com/v1/projects/{settings.VERTEX_PROJECT_ID}/locations/global/publishers/google/models"
|
||||
|
||||
LLMConfigRegistry.register_config(
|
||||
"VERTEX_GEMINI_2.5_FLASH_PREVIEW_04_17",
|
||||
"VERTEX_GEMINI_2.5_PRO_PREVIEW",
|
||||
LLMConfig(
|
||||
"vertex_ai/gemini-2.5-pro-preview-05-06",
|
||||
["VERTEX_CREDENTIALS"],
|
||||
supports_vision=True,
|
||||
add_assistant_prefix=False,
|
||||
max_completion_tokens=65535,
|
||||
litellm_params=LiteLLMParams(
|
||||
vertex_credentials=settings.VERTEX_CREDENTIALS,
|
||||
api_base=f"{api_base}/gemini-2.5-pro-preview-05-06" if api_base else None,
|
||||
vertex_location=settings.VERTEX_LOCATION,
|
||||
),
|
||||
),
|
||||
)
|
||||
LLMConfigRegistry.register_config(
|
||||
"VERTEX_GEMINI_2.5_FLASH_PREVIEW",
|
||||
LLMConfig(
|
||||
"vertex_ai/gemini-2.5-flash-preview-04-17",
|
||||
["VERTEX_CREDENTIALS"],
|
||||
@@ -748,19 +775,8 @@ if settings.ENABLE_VERTEX_AI:
|
||||
max_completion_tokens=65535,
|
||||
litellm_params=LiteLLMParams(
|
||||
vertex_credentials=settings.VERTEX_CREDENTIALS,
|
||||
),
|
||||
),
|
||||
)
|
||||
LLMConfigRegistry.register_config(
|
||||
"VERTEX_GEMINI_2.5_PRO_PREVIEW_03_25",
|
||||
LLMConfig(
|
||||
"vertex_ai/gemini-2.5-pro-preview-03-25",
|
||||
["VERTEX_CREDENTIALS"],
|
||||
supports_vision=True,
|
||||
add_assistant_prefix=False,
|
||||
max_completion_tokens=65535,
|
||||
litellm_params=LiteLLMParams(
|
||||
vertex_credentials=settings.VERTEX_CREDENTIALS,
|
||||
api_base=f"{api_base}/gemini-2.5-flash-preview-04-17" if api_base else None,
|
||||
vertex_location=settings.VERTEX_LOCATION,
|
||||
),
|
||||
),
|
||||
)
|
||||
@@ -773,7 +789,9 @@ if settings.ENABLE_VERTEX_AI:
|
||||
add_assistant_prefix=False,
|
||||
max_completion_tokens=8192,
|
||||
litellm_params=LiteLLMParams(
|
||||
vertex_credentials=settings.VERTEX_CREDENTIALS, # type: ignore
|
||||
api_base=f"{api_base}/gemini-2.0-flash-001" if api_base else None,
|
||||
vertex_credentials=settings.VERTEX_CREDENTIALS,
|
||||
vertex_location=settings.VERTEX_LOCATION,
|
||||
),
|
||||
),
|
||||
)
|
||||
@@ -786,7 +804,8 @@ if settings.ENABLE_VERTEX_AI:
|
||||
add_assistant_prefix=False,
|
||||
max_completion_tokens=8192,
|
||||
litellm_params=LiteLLMParams(
|
||||
vertex_credentials=settings.VERTEX_CREDENTIALS, # type: ignore
|
||||
vertex_credentials=settings.VERTEX_CREDENTIALS,
|
||||
vertex_location=settings.VERTEX_LOCATION, # WARN: this model don't support global
|
||||
),
|
||||
),
|
||||
)
|
||||
@@ -799,7 +818,8 @@ if settings.ENABLE_VERTEX_AI:
|
||||
add_assistant_prefix=False,
|
||||
max_completion_tokens=8192,
|
||||
litellm_params=LiteLLMParams(
|
||||
vertex_credentials=settings.VERTEX_CREDENTIALS, # type: ignore
|
||||
vertex_credentials=settings.VERTEX_CREDENTIALS,
|
||||
vertex_location=settings.VERTEX_LOCATION, # WARN: this model don't support global
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
@@ -15,6 +15,7 @@ class LiteLLMParams(TypedDict, total=False):
|
||||
api_base: str | None
|
||||
model_info: dict[str, Any] | None
|
||||
vertex_credentials: str | None
|
||||
vertex_location: str | None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
|
||||
Reference in New Issue
Block a user