shu/remove max completion tokens for ollama (#2300)

This commit is contained in:
Shuchang Zheng
2025-05-06 12:21:42 -07:00
committed by GitHub
parent 5971108237
commit 349e506dc1
2 changed files with 6 additions and 5 deletions

View File

@@ -430,10 +430,12 @@ class LLMAPIHandlerFactory:
@staticmethod
def get_api_parameters(llm_config: LLMConfig | LLMRouterConfig) -> dict[str, Any]:
params: dict[str, Any] = {}
if llm_config.max_completion_tokens is not None:
params["max_completion_tokens"] = llm_config.max_completion_tokens
elif llm_config.max_tokens is not None:
params["max_tokens"] = llm_config.max_tokens
if not llm_config.model_name.startswith("ollama/"):
# OLLAMA does not support max_completion_tokens
if llm_config.max_completion_tokens is not None:
params["max_completion_tokens"] = llm_config.max_completion_tokens
elif llm_config.max_tokens is not None:
params["max_tokens"] = llm_config.max_tokens
if llm_config.temperature is not None:
params["temperature"] = llm_config.temperature

View File

@@ -815,7 +815,6 @@ if settings.ENABLE_OLLAMA:
["OLLAMA_SERVER_URL", "OLLAMA_MODEL"],
supports_vision=False, # Ollama does not support vision yet
add_assistant_prefix=False,
max_completion_tokens=settings.LLM_CONFIG_MAX_TOKENS,
litellm_params=LiteLLMParams(
api_base=settings.OLLAMA_SERVER_URL,
api_key=None,