diff --git a/skyvern/forge/sdk/api/llm/api_handler_factory.py b/skyvern/forge/sdk/api/llm/api_handler_factory.py index b9fc40b4..12e4325f 100644 --- a/skyvern/forge/sdk/api/llm/api_handler_factory.py +++ b/skyvern/forge/sdk/api/llm/api_handler_factory.py @@ -155,7 +155,12 @@ class LLMAPIHandlerFactory: LOG.exception("Failed to calculate LLM cost", error=str(e)) llm_cost = 0 prompt_tokens = response.get("usage", {}).get("prompt_tokens", 0) - completion_tokens = response.get("usage", {}).get("completion_tokens", 0) + + # TODO (suchintan): Properly support reasoning tokens + reasoning_tokens = response.get("usage", {}).get("reasoning_tokens", 0) + LOG.info("Reasoning tokens", reasoning_tokens=reasoning_tokens) + + completion_tokens = response.get("usage", {}).get("completion_tokens", 0) + reasoning_tokens if step: await app.DATABASE.update_step( @@ -388,10 +393,15 @@ class LLMAPIHandlerFactory: @staticmethod def get_api_parameters(llm_config: LLMConfig | LLMRouterConfig) -> dict[str, Any]: - return { - "max_tokens": llm_config.max_output_tokens, - "temperature": settings.LLM_CONFIG_TEMPERATURE, - } + params: dict[str, Any] = {"max_completion_tokens": llm_config.max_completion_tokens} + + if llm_config.temperature is not None: + params["temperature"] = llm_config.temperature + + if llm_config.reasoning_effort is not None: + params["reasoning_effort"] = llm_config.reasoning_effort + + return params @classmethod def register_custom_handler(cls, llm_key: str, handler: LLMAPIHandler) -> None: diff --git a/skyvern/forge/sdk/api/llm/config_registry.py b/skyvern/forge/sdk/api/llm/config_registry.py index ed7dc97e..7811f3a9 100644 --- a/skyvern/forge/sdk/api/llm/config_registry.py +++ b/skyvern/forge/sdk/api/llm/config_registry.py @@ -80,7 +80,19 @@ if settings.ENABLE_OPENAI: LLMConfigRegistry.register_config( "OPENAI_GPT4O", LLMConfig( - "gpt-4o", ["OPENAI_API_KEY"], supports_vision=True, add_assistant_prefix=False, max_output_tokens=16384 + "gpt-4o", ["OPENAI_API_KEY"], supports_vision=True, add_assistant_prefix=False, max_completion_tokens=16384 + ), + ) + LLMConfigRegistry.register_config( + "OPENAI_O3_MINI", + LLMConfig( + "o3-mini", + ["OPENAI_API_KEY"], + supports_vision=False, + add_assistant_prefix=False, + max_completion_tokens=16384, + temperature=None, # Temperature isn't supported in the O-model series + reasoning_effort="high", ), ) LLMConfigRegistry.register_config( @@ -90,7 +102,7 @@ if settings.ENABLE_OPENAI: ["OPENAI_API_KEY"], supports_vision=True, add_assistant_prefix=False, - max_output_tokens=16384, + max_completion_tokens=16384, ), ) LLMConfigRegistry.register_config( @@ -100,7 +112,7 @@ if settings.ENABLE_OPENAI: ["OPENAI_API_KEY"], supports_vision=True, add_assistant_prefix=False, - max_output_tokens=16384, + max_completion_tokens=16384, ), ) @@ -149,7 +161,7 @@ if settings.ENABLE_ANTHROPIC: ["ANTHROPIC_API_KEY"], supports_vision=True, add_assistant_prefix=True, - max_output_tokens=8192, + max_completion_tokens=8192, ), ) @@ -275,7 +287,7 @@ if settings.ENABLE_GEMINI: ["GEMINI_API_KEY"], supports_vision=True, add_assistant_prefix=False, - max_output_tokens=8192, + max_completion_tokens=8192, ), ) LLMConfigRegistry.register_config( @@ -285,7 +297,7 @@ if settings.ENABLE_GEMINI: ["GEMINI_API_KEY"], supports_vision=True, add_assistant_prefix=False, - max_output_tokens=8192, + max_completion_tokens=8192, ), ) diff --git a/skyvern/forge/sdk/api/llm/models.py b/skyvern/forge/sdk/api/llm/models.py index 197999a5..19b2abf7 100644 --- a/skyvern/forge/sdk/api/llm/models.py +++ b/skyvern/forge/sdk/api/llm/models.py @@ -36,7 +36,9 @@ class LLMConfigBase: @dataclass(frozen=True) class LLMConfig(LLMConfigBase): litellm_params: Optional[LiteLLMParams] = field(default=None) - max_output_tokens: int = SettingsManager.get_settings().LLM_CONFIG_MAX_TOKENS + max_completion_tokens: int = SettingsManager.get_settings().LLM_CONFIG_MAX_TOKENS + temperature: float | None = SettingsManager.get_settings().LLM_CONFIG_TEMPERATURE + reasoning_effort: str | None = None @dataclass(frozen=True) @@ -72,7 +74,9 @@ class LLMRouterConfig(LLMConfigBase): allowed_fails: int | None = None allowed_fails_policy: AllowedFailsPolicy | None = None cooldown_time: float | None = None - max_output_tokens: int = SettingsManager.get_settings().LLM_CONFIG_MAX_TOKENS + max_completion_tokens: int = SettingsManager.get_settings().LLM_CONFIG_MAX_TOKENS + reasoning_effort: str | None = None + temperature: float | None = SettingsManager.get_settings().LLM_CONFIG_TEMPERATURE class LLMAPIHandler(Protocol):