Fix openai flex usage (#4141)

Co-authored-by: Suchintan Singh <suchintan@skyvern.com>
2025-11-28 21:15:38 -08:00
parent 0ad149d905
commit d6aed0d0be
2 changed files with 4 additions and 2 deletions
--- a/skyvern/forge/sdk/api/llm/api_handler_factory.py
+++ b/skyvern/forge/sdk/api/llm/api_handler_factory.py
@@ -653,6 +653,9 @@ class LLMAPIHandlerFactory:
            if llm_config.litellm_params:  # type: ignore
                active_parameters.update(llm_config.litellm_params)  # type: ignore

+            if "timeout" not in active_parameters:
+                active_parameters["timeout"] = settings.LLM_CONFIG_TIMEOUT
+
            # Apply thinking budget optimization if settings are available
            if (
                LLMAPIHandlerFactory._thinking_budget_settings
@@ -773,13 +776,11 @@ class LLMAPIHandlerFactory:

            t_llm_request = time.perf_counter()
            try:
-                # TODO (kerem): add a timeout to this call
                # TODO (kerem): add a retry mechanism to this call (acompletion_with_retries)
                # TODO (kerem): use litellm fallbacks? https://litellm.vercel.app/docs/tutorials/fallbacks#how-does-completion_with_fallbacks-work
                response = await litellm.acompletion(
                    model=model_name,
                    messages=messages,
-                    timeout=settings.LLM_CONFIG_TIMEOUT,
                    drop_params=True,  # Drop unsupported parameters gracefully
                    **active_parameters,
                )
--- a/skyvern/forge/sdk/api/llm/models.py
+++ b/skyvern/forge/sdk/api/llm/models.py
@@ -19,6 +19,7 @@ class LiteLLMParams(TypedDict, total=False):
    vertex_location: str | None
    thinking: dict[str, Any] | None
    service_tier: str | None
+    timeout: float | None


@dataclass(frozen=True)