shu/remove max completion tokens for ollama (#2300)
This commit is contained in:
@@ -430,10 +430,12 @@ class LLMAPIHandlerFactory:
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def get_api_parameters(llm_config: LLMConfig | LLMRouterConfig) -> dict[str, Any]:
|
def get_api_parameters(llm_config: LLMConfig | LLMRouterConfig) -> dict[str, Any]:
|
||||||
params: dict[str, Any] = {}
|
params: dict[str, Any] = {}
|
||||||
if llm_config.max_completion_tokens is not None:
|
if not llm_config.model_name.startswith("ollama/"):
|
||||||
params["max_completion_tokens"] = llm_config.max_completion_tokens
|
# OLLAMA does not support max_completion_tokens
|
||||||
elif llm_config.max_tokens is not None:
|
if llm_config.max_completion_tokens is not None:
|
||||||
params["max_tokens"] = llm_config.max_tokens
|
params["max_completion_tokens"] = llm_config.max_completion_tokens
|
||||||
|
elif llm_config.max_tokens is not None:
|
||||||
|
params["max_tokens"] = llm_config.max_tokens
|
||||||
|
|
||||||
if llm_config.temperature is not None:
|
if llm_config.temperature is not None:
|
||||||
params["temperature"] = llm_config.temperature
|
params["temperature"] = llm_config.temperature
|
||||||
|
|||||||
@@ -815,7 +815,6 @@ if settings.ENABLE_OLLAMA:
|
|||||||
["OLLAMA_SERVER_URL", "OLLAMA_MODEL"],
|
["OLLAMA_SERVER_URL", "OLLAMA_MODEL"],
|
||||||
supports_vision=False, # Ollama does not support vision yet
|
supports_vision=False, # Ollama does not support vision yet
|
||||||
add_assistant_prefix=False,
|
add_assistant_prefix=False,
|
||||||
max_completion_tokens=settings.LLM_CONFIG_MAX_TOKENS,
|
|
||||||
litellm_params=LiteLLMParams(
|
litellm_params=LiteLLMParams(
|
||||||
api_base=settings.OLLAMA_SERVER_URL,
|
api_base=settings.OLLAMA_SERVER_URL,
|
||||||
api_key=None,
|
api_key=None,
|
||||||
|
|||||||
Reference in New Issue
Block a user