diff --git a/skyvern/forge/sdk/api/llm/api_handler_factory.py b/skyvern/forge/sdk/api/llm/api_handler_factory.py index 46655c2d..042e6e53 100644 --- a/skyvern/forge/sdk/api/llm/api_handler_factory.py +++ b/skyvern/forge/sdk/api/llm/api_handler_factory.py @@ -653,6 +653,9 @@ class LLMAPIHandlerFactory: if llm_config.litellm_params: # type: ignore active_parameters.update(llm_config.litellm_params) # type: ignore + if "timeout" not in active_parameters: + active_parameters["timeout"] = settings.LLM_CONFIG_TIMEOUT + # Apply thinking budget optimization if settings are available if ( LLMAPIHandlerFactory._thinking_budget_settings @@ -773,13 +776,11 @@ class LLMAPIHandlerFactory: t_llm_request = time.perf_counter() try: - # TODO (kerem): add a timeout to this call # TODO (kerem): add a retry mechanism to this call (acompletion_with_retries) # TODO (kerem): use litellm fallbacks? https://litellm.vercel.app/docs/tutorials/fallbacks#how-does-completion_with_fallbacks-work response = await litellm.acompletion( model=model_name, messages=messages, - timeout=settings.LLM_CONFIG_TIMEOUT, drop_params=True, # Drop unsupported parameters gracefully **active_parameters, ) diff --git a/skyvern/forge/sdk/api/llm/models.py b/skyvern/forge/sdk/api/llm/models.py index aff891ce..45274c57 100644 --- a/skyvern/forge/sdk/api/llm/models.py +++ b/skyvern/forge/sdk/api/llm/models.py @@ -19,6 +19,7 @@ class LiteLLMParams(TypedDict, total=False): vertex_location: str | None thinking: dict[str, Any] | None service_tier: str | None + timeout: float | None @dataclass(frozen=True)