Fix openai flex usage (#4141)

Co-authored-by: Suchintan Singh <suchintan@skyvern.com>
This commit is contained in:
Suchintan
2025-11-28 21:15:38 -08:00
committed by GitHub
parent 0ad149d905
commit d6aed0d0be
2 changed files with 4 additions and 2 deletions

View File

@@ -653,6 +653,9 @@ class LLMAPIHandlerFactory:
if llm_config.litellm_params: # type: ignore
active_parameters.update(llm_config.litellm_params) # type: ignore
if "timeout" not in active_parameters:
active_parameters["timeout"] = settings.LLM_CONFIG_TIMEOUT
# Apply thinking budget optimization if settings are available
if (
LLMAPIHandlerFactory._thinking_budget_settings
@@ -773,13 +776,11 @@ class LLMAPIHandlerFactory:
t_llm_request = time.perf_counter()
try:
# TODO (kerem): add a timeout to this call
# TODO (kerem): add a retry mechanism to this call (acompletion_with_retries)
# TODO (kerem): use litellm fallbacks? https://litellm.vercel.app/docs/tutorials/fallbacks#how-does-completion_with_fallbacks-work
response = await litellm.acompletion(
model=model_name,
messages=messages,
timeout=settings.LLM_CONFIG_TIMEOUT,
drop_params=True, # Drop unsupported parameters gracefully
**active_parameters,
)

View File

@@ -19,6 +19,7 @@ class LiteLLMParams(TypedDict, total=False):
vertex_location: str | None
thinking: dict[str, Any] | None
service_tier: str | None
timeout: float | None
@dataclass(frozen=True)