From 349e506dc13c0b6433cf64db9a274afdf1a06888 Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Tue, 6 May 2025 12:21:42 -0700 Subject: [PATCH] shu/remove max completion tokens for ollama (#2300) --- skyvern/forge/sdk/api/llm/api_handler_factory.py | 10 ++++++---- skyvern/forge/sdk/api/llm/config_registry.py | 1 - 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/skyvern/forge/sdk/api/llm/api_handler_factory.py b/skyvern/forge/sdk/api/llm/api_handler_factory.py index f546b062..6ef25dfb 100644 --- a/skyvern/forge/sdk/api/llm/api_handler_factory.py +++ b/skyvern/forge/sdk/api/llm/api_handler_factory.py @@ -430,10 +430,12 @@ class LLMAPIHandlerFactory: @staticmethod def get_api_parameters(llm_config: LLMConfig | LLMRouterConfig) -> dict[str, Any]: params: dict[str, Any] = {} - if llm_config.max_completion_tokens is not None: - params["max_completion_tokens"] = llm_config.max_completion_tokens - elif llm_config.max_tokens is not None: - params["max_tokens"] = llm_config.max_tokens + if not llm_config.model_name.startswith("ollama/"): + # OLLAMA does not support max_completion_tokens + if llm_config.max_completion_tokens is not None: + params["max_completion_tokens"] = llm_config.max_completion_tokens + elif llm_config.max_tokens is not None: + params["max_tokens"] = llm_config.max_tokens if llm_config.temperature is not None: params["temperature"] = llm_config.temperature diff --git a/skyvern/forge/sdk/api/llm/config_registry.py b/skyvern/forge/sdk/api/llm/config_registry.py index 3fcfec7e..2eb4b6e0 100644 --- a/skyvern/forge/sdk/api/llm/config_registry.py +++ b/skyvern/forge/sdk/api/llm/config_registry.py @@ -815,7 +815,6 @@ if settings.ENABLE_OLLAMA: ["OLLAMA_SERVER_URL", "OLLAMA_MODEL"], supports_vision=False, # Ollama does not support vision yet add_assistant_prefix=False, - max_completion_tokens=settings.LLM_CONFIG_MAX_TOKENS, litellm_params=LiteLLMParams( api_base=settings.OLLAMA_SERVER_URL, api_key=None,