add support back for old gpt versions by supporting max_tokens (#1860)
This commit is contained in:
@@ -389,7 +389,11 @@ class LLMAPIHandlerFactory:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_api_parameters(llm_config: LLMConfig | LLMRouterConfig) -> dict[str, Any]:
|
def get_api_parameters(llm_config: LLMConfig | LLMRouterConfig) -> dict[str, Any]:
|
||||||
params: dict[str, Any] = {"max_completion_tokens": llm_config.max_completion_tokens}
|
params: dict[str, Any] = {}
|
||||||
|
if llm_config.max_completion_tokens is not None:
|
||||||
|
params["max_completion_tokens"] = llm_config.max_completion_tokens
|
||||||
|
elif llm_config.max_tokens is not None:
|
||||||
|
params["max_tokens"] = llm_config.max_tokens
|
||||||
|
|
||||||
if llm_config.temperature is not None:
|
if llm_config.temperature is not None:
|
||||||
params["temperature"] = llm_config.temperature
|
params["temperature"] = llm_config.temperature
|
||||||
|
|||||||
@@ -36,7 +36,8 @@ class LLMConfigBase:
|
|||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class LLMConfig(LLMConfigBase):
|
class LLMConfig(LLMConfigBase):
|
||||||
litellm_params: Optional[LiteLLMParams] = field(default=None)
|
litellm_params: Optional[LiteLLMParams] = field(default=None)
|
||||||
max_completion_tokens: int = SettingsManager.get_settings().LLM_CONFIG_MAX_TOKENS
|
max_tokens: int | None = SettingsManager.get_settings().LLM_CONFIG_MAX_TOKENS
|
||||||
|
max_completion_tokens: int | None = None
|
||||||
temperature: float | None = SettingsManager.get_settings().LLM_CONFIG_TEMPERATURE
|
temperature: float | None = SettingsManager.get_settings().LLM_CONFIG_TEMPERATURE
|
||||||
reasoning_effort: str | None = None
|
reasoning_effort: str | None = None
|
||||||
|
|
||||||
@@ -74,7 +75,8 @@ class LLMRouterConfig(LLMConfigBase):
|
|||||||
allowed_fails: int | None = None
|
allowed_fails: int | None = None
|
||||||
allowed_fails_policy: AllowedFailsPolicy | None = None
|
allowed_fails_policy: AllowedFailsPolicy | None = None
|
||||||
cooldown_time: float | None = None
|
cooldown_time: float | None = None
|
||||||
max_completion_tokens: int = SettingsManager.get_settings().LLM_CONFIG_MAX_TOKENS
|
max_tokens: int | None = SettingsManager.get_settings().LLM_CONFIG_MAX_TOKENS
|
||||||
|
max_completion_tokens: int | None = None
|
||||||
reasoning_effort: str | None = None
|
reasoning_effort: str | None = None
|
||||||
temperature: float | None = SettingsManager.get_settings().LLM_CONFIG_TEMPERATURE
|
temperature: float | None = SettingsManager.get_settings().LLM_CONFIG_TEMPERATURE
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user