Files
Dorod-Sky/skyvern/forge/sdk/api/llm/models.py

93 lines
3.1 KiB
Python
Raw Normal View History

from dataclasses import dataclass, field
from typing import Any, Literal, Optional, TypedDict
2024-03-16 23:13:18 -07:00
from skyvern.forge.sdk.settings_manager import SettingsManager
2025-05-04 01:02:01 -07:00
class LiteLLMParams(TypedDict, total=False):
2024-08-02 19:35:52 +08:00
api_key: str | None
api_version: str | None
api_base: str | None
2024-10-10 23:24:27 -07:00
model_info: dict[str, Any] | None
2025-05-04 01:02:01 -07:00
vertex_credentials: str | None
2025-05-11 19:28:10 -07:00
vertex_location: str | None
thinking: dict[str, Any] | None
2025-12-17 20:52:29 -08:00
thinking_level: str | None
service_tier: str | None
timeout: float | None
2024-08-02 19:35:52 +08:00
2024-03-16 23:13:18 -07:00
@dataclass(frozen=True)
2024-08-02 19:35:52 +08:00
class LLMConfigBase:
2024-03-16 23:13:18 -07:00
model_name: str
required_env_vars: list[str]
supports_vision: bool
add_assistant_prefix: bool
2024-03-16 23:13:18 -07:00
def get_missing_env_vars(self) -> list[str]:
missing_env_vars = []
for env_var in self.required_env_vars:
env_var_value = getattr(SettingsManager.get_settings(), env_var, None)
if not env_var_value:
missing_env_vars.append(env_var)
return missing_env_vars
2024-08-02 19:35:52 +08:00
@dataclass(frozen=True)
class LLMConfig(LLMConfigBase):
litellm_params: Optional[LiteLLMParams] = field(default=None)
max_tokens: int | None = SettingsManager.get_settings().LLM_CONFIG_MAX_TOKENS
max_completion_tokens: int | None = None
2025-02-04 05:07:10 +08:00
temperature: float | None = SettingsManager.get_settings().LLM_CONFIG_TEMPERATURE
reasoning_effort: str | None = None
2024-08-02 19:35:52 +08:00
@dataclass(frozen=True)
class LLMAllowedFailsPolicy:
bad_request_error_allowed_fails: int | None = None
authentication_error_allowed_fails: int | None = None
timeout_error_allowed_fails: int | None = None
rate_limit_error_allowed_fails: int | None = None
content_policy_violation_error_allowed_fails: int | None = None
internal_server_error_allowed_fails: int | None = None
2024-03-27 14:44:25 -07:00
@dataclass(frozen=True)
class LLMRouterModelConfig:
model_name: str
# https://litellm.vercel.app/docs/routing
litellm_params: dict[str, Any]
model_info: dict[str, Any] = field(default_factory=dict)
2024-03-27 14:44:25 -07:00
tpm: int | None = None
rpm: int | None = None
@dataclass(frozen=True)
2024-08-02 19:35:52 +08:00
class LLMRouterConfig(LLMConfigBase):
2024-03-27 14:44:25 -07:00
model_list: list[LLMRouterModelConfig]
# All three redis parameters are required. Even if there isn't a password, it should be an empty string.
2024-03-27 14:44:25 -07:00
main_model_group: str
2024-04-25 01:32:22 -07:00
redis_host: str | None = None
redis_port: int | None = None
redis_password: str | None = None
2024-03-27 14:44:25 -07:00
fallback_model_group: str | None = None
routing_strategy: Literal[
"simple-shuffle",
"least-busy",
"usage-based-routing",
"usage-based-routing-v2",
2024-03-27 14:44:25 -07:00
"latency-based-routing",
2024-04-17 11:51:14 -07:00
] = "usage-based-routing"
num_retries: int = 1
2024-03-27 14:44:25 -07:00
retry_delay_seconds: int = 15
set_verbose: bool = False
2024-08-19 16:49:26 +08:00
disable_cooldowns: bool | None = None
allowed_fails: int | None = None
allowed_fails_policy: LLMAllowedFailsPolicy | None = None
2024-08-19 16:49:26 +08:00
cooldown_time: float | None = None
max_tokens: int | None = SettingsManager.get_settings().LLM_CONFIG_MAX_TOKENS
max_completion_tokens: int | None = None
2025-02-04 05:07:10 +08:00
reasoning_effort: str | None = None
temperature: float | None = SettingsManager.get_settings().LLM_CONFIG_TEMPERATURE