diff --git a/skyvern/config.py b/skyvern/config.py index 0640204e..9de3d1e6 100644 --- a/skyvern/config.py +++ b/skyvern/config.py @@ -312,6 +312,8 @@ class Settings(BaseSettings): GEMINI_API_KEY: str | None = None GEMINI_INCLUDE_THOUGHT: bool = False GEMINI_THINKING_BUDGET: int | None = None + DEFAULT_THINKING_BUDGET: int = 1024 + EXTRACT_ACTION_THINKING_BUDGET: int = 512 # VERTEX_AI VERTEX_CREDENTIALS: str | None = None diff --git a/skyvern/forge/sdk/api/llm/api_handler_factory.py b/skyvern/forge/sdk/api/llm/api_handler_factory.py index ca13af83..b2ae5544 100644 --- a/skyvern/forge/sdk/api/llm/api_handler_factory.py +++ b/skyvern/forge/sdk/api/llm/api_handler_factory.py @@ -51,8 +51,9 @@ LOG = structlog.get_logger() EXTRACT_ACTION_PROMPT_NAME = "extract-actions" CHECK_USER_GOAL_PROMPT_NAMES = {"check-user-goal", "check-user-goal-with-termination"} -# Default thinking budget for extract-actions prompt (can be overridden by THINKING_BUDGET_OPTIMIZATION experiment) -EXTRACT_ACTION_DEFAULT_THINKING_BUDGET = 512 +# Default thinking budgets (configurable via env vars, can be overridden by THINKING_BUDGET_OPTIMIZATION experiment) +EXTRACT_ACTION_DEFAULT_THINKING_BUDGET = settings.EXTRACT_ACTION_THINKING_BUDGET +DEFAULT_THINKING_BUDGET = settings.DEFAULT_THINKING_BUDGET def _safe_model_dump_json(response: ModelResponse, indent: int = 2) -> str: @@ -348,6 +349,14 @@ class LLMAPIHandlerFactory: parameters: dict[str, Any], new_budget: int, llm_config: LLMConfig | LLMRouterConfig, prompt_name: str ) -> None: """Apply thinking optimization for Gemini models using exact integer budget value.""" + # Get model label for logging — prefer main_model_group for router configs + model_label = llm_config.main_model_group if isinstance(llm_config, LLMRouterConfig) else llm_config.model_name + + # Models that use thinking_level (e.g. Gemini 3 Pro/Flash) don't support budget_tokens. + # Their reasoning is already bounded by the thinking_level set in their config, so skip. + if "thinking_level" in parameters: + return + if "thinking" in parameters and isinstance(parameters["thinking"], dict): parameters["thinking"]["budget_tokens"] = new_budget else: @@ -355,10 +364,6 @@ class LLMAPIHandlerFactory: if settings.GEMINI_INCLUDE_THOUGHT: thinking_payload["type"] = "enabled" parameters["thinking"] = thinking_payload - # Get safe model label for logging - model_label = getattr(llm_config, "model_name", None) - if model_label is None and isinstance(llm_config, LLMRouterConfig): - model_label = getattr(llm_config, "main_model_group", "router") LOG.info( "Applied thinking budget optimization (budget_tokens)", @@ -458,6 +463,11 @@ class LLMAPIHandlerFactory: LLMAPIHandlerFactory._apply_thinking_budget_optimization( parameters, EXTRACT_ACTION_DEFAULT_THINKING_BUDGET, llm_config, prompt_name ) + else: + # Apply default thinking budget for all other prompts to prevent unbounded reasoning + LLMAPIHandlerFactory._apply_thinking_budget_optimization( + parameters, DEFAULT_THINKING_BUDGET, llm_config, prompt_name + ) context = skyvern_context.current() is_speculative_step = step.is_speculative if step else False @@ -886,6 +896,11 @@ class LLMAPIHandlerFactory: LLMAPIHandlerFactory._apply_thinking_budget_optimization( active_parameters, EXTRACT_ACTION_DEFAULT_THINKING_BUDGET, llm_config, prompt_name ) + else: + # Apply default thinking budget for all other prompts to prevent unbounded reasoning + LLMAPIHandlerFactory._apply_thinking_budget_optimization( + active_parameters, DEFAULT_THINKING_BUDGET, llm_config, prompt_name + ) context = skyvern_context.current() is_speculative_step = step.is_speculative if step else False