diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index c78351a8..3185f4d3 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -3020,7 +3020,7 @@ class ForgeAgent: local_datetime=datetime.now(context.tz_info).isoformat(), ) - data_extraction_summary_resp = await app.SECONDARY_LLM_API_HANDLER( + data_extraction_summary_resp = await app.EXTRACTION_LLM_API_HANDLER( prompt=prompt, step=step, prompt_name="data-extraction-summary" ) return ExtractAction( diff --git a/skyvern/forge/sdk/api/llm/config_registry.py b/skyvern/forge/sdk/api/llm/config_registry.py index 44d155f6..cbf8bb97 100644 --- a/skyvern/forge/sdk/api/llm/config_registry.py +++ b/skyvern/forge/sdk/api/llm/config_registry.py @@ -1097,6 +1097,25 @@ if settings.ENABLE_VERTEX_AI and settings.VERTEX_CREDENTIALS: ), ), ) + LLMConfigRegistry.register_config( + "VERTEX_GEMINI_2.5_FLASH_LITE", + LLMConfig( + "vertex_ai/gemini-2.5-flash-lite", + ["VERTEX_CREDENTIALS"], + supports_vision=True, + add_assistant_prefix=False, + max_completion_tokens=65535, + litellm_params=LiteLLMParams( + vertex_credentials=settings.VERTEX_CREDENTIALS, + api_base=f"{api_base}/gemini-2.5-flash-lite" if api_base else None, + vertex_location=settings.VERTEX_LOCATION, + thinking={ + "budget_tokens": settings.GEMINI_THINKING_BUDGET, + "type": "enabled" if settings.GEMINI_INCLUDE_THOUGHT else None, + }, + ), + ), + ) LLMConfigRegistry.register_config( "VERTEX_GEMINI_2.5_FLASH_PREVIEW", LLMConfig( diff --git a/skyvern/webeye/actions/handler.py b/skyvern/webeye/actions/handler.py index 4cdea6fc..017680ea 100644 --- a/skyvern/webeye/actions/handler.py +++ b/skyvern/webeye/actions/handler.py @@ -3603,7 +3603,10 @@ async def extract_information_for_navigation_goal( # CUA tasks should use the default data extraction llm key llm_key_override = None - llm_api_handler = LLMAPIHandlerFactory.get_override_llm_api_handler(llm_key_override, default=app.LLM_API_HANDLER) + # Use the appropriate LLM handler based on the feature flag + llm_api_handler = LLMAPIHandlerFactory.get_override_llm_api_handler( + llm_key_override, default=app.EXTRACTION_LLM_API_HANDLER + ) json_response = await llm_api_handler( prompt=extract_information_prompt, step=step,