O3 Mini support (#1709)
This commit is contained in:
@@ -155,7 +155,12 @@ class LLMAPIHandlerFactory:
|
|||||||
LOG.exception("Failed to calculate LLM cost", error=str(e))
|
LOG.exception("Failed to calculate LLM cost", error=str(e))
|
||||||
llm_cost = 0
|
llm_cost = 0
|
||||||
prompt_tokens = response.get("usage", {}).get("prompt_tokens", 0)
|
prompt_tokens = response.get("usage", {}).get("prompt_tokens", 0)
|
||||||
completion_tokens = response.get("usage", {}).get("completion_tokens", 0)
|
|
||||||
|
# TODO (suchintan): Properly support reasoning tokens
|
||||||
|
reasoning_tokens = response.get("usage", {}).get("reasoning_tokens", 0)
|
||||||
|
LOG.info("Reasoning tokens", reasoning_tokens=reasoning_tokens)
|
||||||
|
|
||||||
|
completion_tokens = response.get("usage", {}).get("completion_tokens", 0) + reasoning_tokens
|
||||||
|
|
||||||
if step:
|
if step:
|
||||||
await app.DATABASE.update_step(
|
await app.DATABASE.update_step(
|
||||||
@@ -388,10 +393,15 @@ class LLMAPIHandlerFactory:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_api_parameters(llm_config: LLMConfig | LLMRouterConfig) -> dict[str, Any]:
|
def get_api_parameters(llm_config: LLMConfig | LLMRouterConfig) -> dict[str, Any]:
|
||||||
return {
|
params: dict[str, Any] = {"max_completion_tokens": llm_config.max_completion_tokens}
|
||||||
"max_tokens": llm_config.max_output_tokens,
|
|
||||||
"temperature": settings.LLM_CONFIG_TEMPERATURE,
|
if llm_config.temperature is not None:
|
||||||
}
|
params["temperature"] = llm_config.temperature
|
||||||
|
|
||||||
|
if llm_config.reasoning_effort is not None:
|
||||||
|
params["reasoning_effort"] = llm_config.reasoning_effort
|
||||||
|
|
||||||
|
return params
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def register_custom_handler(cls, llm_key: str, handler: LLMAPIHandler) -> None:
|
def register_custom_handler(cls, llm_key: str, handler: LLMAPIHandler) -> None:
|
||||||
|
|||||||
@@ -80,7 +80,19 @@ if settings.ENABLE_OPENAI:
|
|||||||
LLMConfigRegistry.register_config(
|
LLMConfigRegistry.register_config(
|
||||||
"OPENAI_GPT4O",
|
"OPENAI_GPT4O",
|
||||||
LLMConfig(
|
LLMConfig(
|
||||||
"gpt-4o", ["OPENAI_API_KEY"], supports_vision=True, add_assistant_prefix=False, max_output_tokens=16384
|
"gpt-4o", ["OPENAI_API_KEY"], supports_vision=True, add_assistant_prefix=False, max_completion_tokens=16384
|
||||||
|
),
|
||||||
|
)
|
||||||
|
LLMConfigRegistry.register_config(
|
||||||
|
"OPENAI_O3_MINI",
|
||||||
|
LLMConfig(
|
||||||
|
"o3-mini",
|
||||||
|
["OPENAI_API_KEY"],
|
||||||
|
supports_vision=False,
|
||||||
|
add_assistant_prefix=False,
|
||||||
|
max_completion_tokens=16384,
|
||||||
|
temperature=None, # Temperature isn't supported in the O-model series
|
||||||
|
reasoning_effort="high",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
LLMConfigRegistry.register_config(
|
LLMConfigRegistry.register_config(
|
||||||
@@ -90,7 +102,7 @@ if settings.ENABLE_OPENAI:
|
|||||||
["OPENAI_API_KEY"],
|
["OPENAI_API_KEY"],
|
||||||
supports_vision=True,
|
supports_vision=True,
|
||||||
add_assistant_prefix=False,
|
add_assistant_prefix=False,
|
||||||
max_output_tokens=16384,
|
max_completion_tokens=16384,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
LLMConfigRegistry.register_config(
|
LLMConfigRegistry.register_config(
|
||||||
@@ -100,7 +112,7 @@ if settings.ENABLE_OPENAI:
|
|||||||
["OPENAI_API_KEY"],
|
["OPENAI_API_KEY"],
|
||||||
supports_vision=True,
|
supports_vision=True,
|
||||||
add_assistant_prefix=False,
|
add_assistant_prefix=False,
|
||||||
max_output_tokens=16384,
|
max_completion_tokens=16384,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -149,7 +161,7 @@ if settings.ENABLE_ANTHROPIC:
|
|||||||
["ANTHROPIC_API_KEY"],
|
["ANTHROPIC_API_KEY"],
|
||||||
supports_vision=True,
|
supports_vision=True,
|
||||||
add_assistant_prefix=True,
|
add_assistant_prefix=True,
|
||||||
max_output_tokens=8192,
|
max_completion_tokens=8192,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -275,7 +287,7 @@ if settings.ENABLE_GEMINI:
|
|||||||
["GEMINI_API_KEY"],
|
["GEMINI_API_KEY"],
|
||||||
supports_vision=True,
|
supports_vision=True,
|
||||||
add_assistant_prefix=False,
|
add_assistant_prefix=False,
|
||||||
max_output_tokens=8192,
|
max_completion_tokens=8192,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
LLMConfigRegistry.register_config(
|
LLMConfigRegistry.register_config(
|
||||||
@@ -285,7 +297,7 @@ if settings.ENABLE_GEMINI:
|
|||||||
["GEMINI_API_KEY"],
|
["GEMINI_API_KEY"],
|
||||||
supports_vision=True,
|
supports_vision=True,
|
||||||
add_assistant_prefix=False,
|
add_assistant_prefix=False,
|
||||||
max_output_tokens=8192,
|
max_completion_tokens=8192,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -36,7 +36,9 @@ class LLMConfigBase:
|
|||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class LLMConfig(LLMConfigBase):
|
class LLMConfig(LLMConfigBase):
|
||||||
litellm_params: Optional[LiteLLMParams] = field(default=None)
|
litellm_params: Optional[LiteLLMParams] = field(default=None)
|
||||||
max_output_tokens: int = SettingsManager.get_settings().LLM_CONFIG_MAX_TOKENS
|
max_completion_tokens: int = SettingsManager.get_settings().LLM_CONFIG_MAX_TOKENS
|
||||||
|
temperature: float | None = SettingsManager.get_settings().LLM_CONFIG_TEMPERATURE
|
||||||
|
reasoning_effort: str | None = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
@@ -72,7 +74,9 @@ class LLMRouterConfig(LLMConfigBase):
|
|||||||
allowed_fails: int | None = None
|
allowed_fails: int | None = None
|
||||||
allowed_fails_policy: AllowedFailsPolicy | None = None
|
allowed_fails_policy: AllowedFailsPolicy | None = None
|
||||||
cooldown_time: float | None = None
|
cooldown_time: float | None = None
|
||||||
max_output_tokens: int = SettingsManager.get_settings().LLM_CONFIG_MAX_TOKENS
|
max_completion_tokens: int = SettingsManager.get_settings().LLM_CONFIG_MAX_TOKENS
|
||||||
|
reasoning_effort: str | None = None
|
||||||
|
temperature: float | None = SettingsManager.get_settings().LLM_CONFIG_TEMPERATURE
|
||||||
|
|
||||||
|
|
||||||
class LLMAPIHandler(Protocol):
|
class LLMAPIHandler(Protocol):
|
||||||
|
|||||||
Reference in New Issue
Block a user