diff --git a/skyvern/config.py b/skyvern/config.py
index 0640204e..9de3d1e6 100644
--- a/skyvern/config.py
+++ b/skyvern/config.py
@@ -312,6 +312,8 @@ class Settings(BaseSettings):
     GEMINI_API_KEY: str | None = None
     GEMINI_INCLUDE_THOUGHT: bool = False
     GEMINI_THINKING_BUDGET: int | None = None
+    DEFAULT_THINKING_BUDGET: int = 1024
+    EXTRACT_ACTION_THINKING_BUDGET: int = 512
 
     # VERTEX_AI
     VERTEX_CREDENTIALS: str | None = None
diff --git a/skyvern/forge/sdk/api/llm/api_handler_factory.py b/skyvern/forge/sdk/api/llm/api_handler_factory.py
index ca13af83..b2ae5544 100644
--- a/skyvern/forge/sdk/api/llm/api_handler_factory.py
+++ b/skyvern/forge/sdk/api/llm/api_handler_factory.py
@@ -51,8 +51,9 @@ LOG = structlog.get_logger()
 EXTRACT_ACTION_PROMPT_NAME = "extract-actions"
 CHECK_USER_GOAL_PROMPT_NAMES = {"check-user-goal", "check-user-goal-with-termination"}
 
-# Default thinking budget for extract-actions prompt (can be overridden by THINKING_BUDGET_OPTIMIZATION experiment)
-EXTRACT_ACTION_DEFAULT_THINKING_BUDGET = 512
+# Default thinking budgets (configurable via env vars, can be overridden by THINKING_BUDGET_OPTIMIZATION experiment)
+EXTRACT_ACTION_DEFAULT_THINKING_BUDGET = settings.EXTRACT_ACTION_THINKING_BUDGET
+DEFAULT_THINKING_BUDGET = settings.DEFAULT_THINKING_BUDGET
 
 
 def _safe_model_dump_json(response: ModelResponse, indent: int = 2) -> str:
@@ -348,6 +349,14 @@ class LLMAPIHandlerFactory:
         parameters: dict[str, Any], new_budget: int, llm_config: LLMConfig | LLMRouterConfig, prompt_name: str
     ) -> None:
         """Apply thinking optimization for Gemini models using exact integer budget value."""
+        # Get model label for logging — prefer main_model_group for router configs
+        model_label = llm_config.main_model_group if isinstance(llm_config, LLMRouterConfig) else llm_config.model_name
+
+        # Models that use thinking_level (e.g. Gemini 3 Pro/Flash) don't support budget_tokens.
+        # Their reasoning is already bounded by the thinking_level set in their config, so skip.
+        if "thinking_level" in parameters:
+            return
+
         if "thinking" in parameters and isinstance(parameters["thinking"], dict):
             parameters["thinking"]["budget_tokens"] = new_budget
         else:
@@ -355,10 +364,6 @@ class LLMAPIHandlerFactory:
             if settings.GEMINI_INCLUDE_THOUGHT:
                 thinking_payload["type"] = "enabled"
             parameters["thinking"] = thinking_payload
-        # Get safe model label for logging
-        model_label = getattr(llm_config, "model_name", None)
-        if model_label is None and isinstance(llm_config, LLMRouterConfig):
-            model_label = getattr(llm_config, "main_model_group", "router")
 
         LOG.info(
             "Applied thinking budget optimization (budget_tokens)",
@@ -458,6 +463,11 @@ class LLMAPIHandlerFactory:
                 LLMAPIHandlerFactory._apply_thinking_budget_optimization(
                     parameters, EXTRACT_ACTION_DEFAULT_THINKING_BUDGET, llm_config, prompt_name
                 )
+            else:
+                # Apply default thinking budget for all other prompts to prevent unbounded reasoning
+                LLMAPIHandlerFactory._apply_thinking_budget_optimization(
+                    parameters, DEFAULT_THINKING_BUDGET, llm_config, prompt_name
+                )
 
             context = skyvern_context.current()
             is_speculative_step = step.is_speculative if step else False
@@ -886,6 +896,11 @@ class LLMAPIHandlerFactory:
                 LLMAPIHandlerFactory._apply_thinking_budget_optimization(
                     active_parameters, EXTRACT_ACTION_DEFAULT_THINKING_BUDGET, llm_config, prompt_name
                 )
+            else:
+                # Apply default thinking budget for all other prompts to prevent unbounded reasoning
+                LLMAPIHandlerFactory._apply_thinking_budget_optimization(
+                    active_parameters, DEFAULT_THINKING_BUDGET, llm_config, prompt_name
+                )
 
             context = skyvern_context.current()
             is_speculative_step = step.is_speculative if step else False