Fix openai flex usage (#4141)
Co-authored-by: Suchintan Singh <suchintan@skyvern.com>
This commit is contained in:
@@ -653,6 +653,9 @@ class LLMAPIHandlerFactory:
|
|||||||
if llm_config.litellm_params: # type: ignore
|
if llm_config.litellm_params: # type: ignore
|
||||||
active_parameters.update(llm_config.litellm_params) # type: ignore
|
active_parameters.update(llm_config.litellm_params) # type: ignore
|
||||||
|
|
||||||
|
if "timeout" not in active_parameters:
|
||||||
|
active_parameters["timeout"] = settings.LLM_CONFIG_TIMEOUT
|
||||||
|
|
||||||
# Apply thinking budget optimization if settings are available
|
# Apply thinking budget optimization if settings are available
|
||||||
if (
|
if (
|
||||||
LLMAPIHandlerFactory._thinking_budget_settings
|
LLMAPIHandlerFactory._thinking_budget_settings
|
||||||
@@ -773,13 +776,11 @@ class LLMAPIHandlerFactory:
|
|||||||
|
|
||||||
t_llm_request = time.perf_counter()
|
t_llm_request = time.perf_counter()
|
||||||
try:
|
try:
|
||||||
# TODO (kerem): add a timeout to this call
|
|
||||||
# TODO (kerem): add a retry mechanism to this call (acompletion_with_retries)
|
# TODO (kerem): add a retry mechanism to this call (acompletion_with_retries)
|
||||||
# TODO (kerem): use litellm fallbacks? https://litellm.vercel.app/docs/tutorials/fallbacks#how-does-completion_with_fallbacks-work
|
# TODO (kerem): use litellm fallbacks? https://litellm.vercel.app/docs/tutorials/fallbacks#how-does-completion_with_fallbacks-work
|
||||||
response = await litellm.acompletion(
|
response = await litellm.acompletion(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
timeout=settings.LLM_CONFIG_TIMEOUT,
|
|
||||||
drop_params=True, # Drop unsupported parameters gracefully
|
drop_params=True, # Drop unsupported parameters gracefully
|
||||||
**active_parameters,
|
**active_parameters,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ class LiteLLMParams(TypedDict, total=False):
|
|||||||
vertex_location: str | None
|
vertex_location: str | None
|
||||||
thinking: dict[str, Any] | None
|
thinking: dict[str, Any] | None
|
||||||
service_tier: str | None
|
service_tier: str | None
|
||||||
|
timeout: float | None
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
|
|||||||
Reference in New Issue
Block a user