From 179c12db8ca286d26421d15dd97e53933a23bdbb Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Sun, 2 Mar 2025 00:16:00 -0500 Subject: [PATCH] add support back for old gpt versions by supporting `max_tokens` (#1860) --- skyvern/forge/sdk/api/llm/api_handler_factory.py | 6 +++++- skyvern/forge/sdk/api/llm/models.py | 6 ++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/skyvern/forge/sdk/api/llm/api_handler_factory.py b/skyvern/forge/sdk/api/llm/api_handler_factory.py index eb3781be..435a753a 100644 --- a/skyvern/forge/sdk/api/llm/api_handler_factory.py +++ b/skyvern/forge/sdk/api/llm/api_handler_factory.py @@ -389,7 +389,11 @@ class LLMAPIHandlerFactory: @staticmethod def get_api_parameters(llm_config: LLMConfig | LLMRouterConfig) -> dict[str, Any]: - params: dict[str, Any] = {"max_completion_tokens": llm_config.max_completion_tokens} + params: dict[str, Any] = {} + if llm_config.max_completion_tokens is not None: + params["max_completion_tokens"] = llm_config.max_completion_tokens + elif llm_config.max_tokens is not None: + params["max_tokens"] = llm_config.max_tokens if llm_config.temperature is not None: params["temperature"] = llm_config.temperature diff --git a/skyvern/forge/sdk/api/llm/models.py b/skyvern/forge/sdk/api/llm/models.py index cf4b7d1f..043a8583 100644 --- a/skyvern/forge/sdk/api/llm/models.py +++ b/skyvern/forge/sdk/api/llm/models.py @@ -36,7 +36,8 @@ class LLMConfigBase: @dataclass(frozen=True) class LLMConfig(LLMConfigBase): litellm_params: Optional[LiteLLMParams] = field(default=None) - max_completion_tokens: int = SettingsManager.get_settings().LLM_CONFIG_MAX_TOKENS + max_tokens: int | None = SettingsManager.get_settings().LLM_CONFIG_MAX_TOKENS + max_completion_tokens: int | None = None temperature: float | None = SettingsManager.get_settings().LLM_CONFIG_TEMPERATURE reasoning_effort: str | None = None @@ -74,7 +75,8 @@ class LLMRouterConfig(LLMConfigBase): allowed_fails: int | None = None allowed_fails_policy: AllowedFailsPolicy | None = None cooldown_time: float | None = None - max_completion_tokens: int = SettingsManager.get_settings().LLM_CONFIG_MAX_TOKENS + max_tokens: int | None = SettingsManager.get_settings().LLM_CONFIG_MAX_TOKENS + max_completion_tokens: int | None = None reasoning_effort: str | None = None temperature: float | None = SettingsManager.get_settings().LLM_CONFIG_TEMPERATURE