diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index fd35a557..dd510bec 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -2066,6 +2066,27 @@ class ForgeAgent: # If we don't have pre-scraped data, scrape normally if scraped_page is None: + # Check PostHog for speed optimizations BEFORE scraping + # This decision will be used in both: + # 1. SVG conversion skip (in agent_functions.py cleanup) + # 2. Tree selection (economy vs regular tree) + # By checking once and storing in context, we ensure perfect coordination + if context: + try: + distinct_id = task.workflow_run_id if task.workflow_run_id else task.task_id + context.enable_speed_optimizations = await app.EXPERIMENTATION_PROVIDER.is_feature_enabled_cached( + "ENABLE_SPEED_OPTIMIZATIONS", + distinct_id, + properties={"organization_id": task.organization_id}, + ) + except Exception: + LOG.warning( + "Failed to check ENABLE_SPEED_OPTIMIZATIONS feature flag", + exc_info=True, + task_id=task.task_id, + ) + context.enable_speed_optimizations = False + # start the async tasks while running scrape_website if engine not in CUA_ENGINES: self.async_operation_pool.run_operation(task.task_id, AgentPhase.scrape) @@ -2113,7 +2134,51 @@ class ForgeAgent: ) # TODO: we only use HTML element for now, introduce a way to switch in the future element_tree_format = ElementTreeFormat.HTML - element_tree_in_prompt: str = scraped_page.build_element_tree(element_tree_format) + + # OPTIMIZATION: Use economy tree (skip SVGs) when ENABLE_SPEED_OPTIMIZATIONS is enabled + # Economy tree removes all SVG elements from the DOM tree sent to LLM + # - SVGs are decorative (icons, logos, graphics) - not needed for action planning + # - Even for charts/graphs: LLM sees them in screenshots, not SVG code + # - Saves ~8s per SVG x ~15 SVGs = ~120s per workflow (30% speedup!) + # + # RETRY STRATEGY: Use economy tree on first attempt only + # - retry_index 0: Use economy tree (fast, no SVGs) + # - retry_index 1+: Use regular tree (SVGs loaded from existing 4-week cache) + # Note: SVG conversions are already cached globally with 4-week TTL, so retries are fast + # + # COORDINATION: The enable_speed_optimizations decision is made ONCE before scraping + # and stored in context. Both SVG conversion skip (agent_functions.py) and tree + # selection (here) use the SAME value, ensuring perfect coordination. + element_tree_in_prompt: str = "" + + # Use the speed optimization decision from context (set before scraping) + enable_speed_optimizations = context.enable_speed_optimizations if context else False + + if not enable_speed_optimizations: + # Optimization disabled - use regular tree always + element_tree_in_prompt = scraped_page.build_element_tree(element_tree_format) + elif step.retry_index == 0: + # First attempt - use economy tree (fast, no SVG conversion) + # Note: SVG conversion was already skipped in cleanup_element_tree_func + # based on the same context.enable_speed_optimizations value + element_tree_in_prompt = scraped_page.build_economy_elements_tree(element_tree_format) + LOG.info( + "Speed optimization: Using economy element tree (skipping SVGs)", + step_order=step.order, + step_retry=step.retry_index, + task_id=task.task_id, + workflow_run_id=task.workflow_run_id, + ) + else: + # Retry 1+ - use regular tree (SVGs will be loaded from existing 4-week cache) + element_tree_in_prompt = scraped_page.build_element_tree(element_tree_format) + LOG.info( + "Speed optimization: Using regular tree on retry (SVGs from global cache)", + step_order=step.order, + step_retry=step.retry_index, + task_id=task.task_id, + workflow_run_id=task.workflow_run_id, + ) extract_action_prompt = "" if engine not in CUA_ENGINES: extract_action_prompt, use_caching = await self._build_extract_action_prompt( diff --git a/skyvern/forge/agent_functions.py b/skyvern/forge/agent_functions.py index 767ab6cc..3579f18b 100644 --- a/skyvern/forge/agent_functions.py +++ b/skyvern/forge/agent_functions.py @@ -571,8 +571,30 @@ class AgentFunction: if "children" in queue_ele: queue.extend(queue_ele["children"]) - # Convert all eligible SVGs in parallel - if eligible_svgs: + # SPEED OPTIMIZATION: Skip SVG conversion when using economy tree + # Economy tree removes SVGs, so no point converting them + # + # COORDINATION: Use the same enable_speed_optimizations decision from context + # that was set in agent.py BEFORE scraping. This ensures SVG conversion skip + # is perfectly coordinated with economy tree selection. + skip_svg_conversion = False + if eligible_svgs and task and step: + # Get the optimization decision from context (set before scraping in agent.py) + current_context = skyvern_context.current() + enable_speed_optimizations = current_context.enable_speed_optimizations if current_context else False + + if enable_speed_optimizations and step.retry_index == 0: + skip_svg_conversion = True + LOG.info( + "Speed optimization: Skipping SVG conversion (will use economy tree)", + step_order=step.order, + step_retry=step.retry_index, + workflow_run_id=task.workflow_run_id, + svg_count=len(eligible_svgs), + ) + + # Convert all eligible SVGs in parallel (unless skipped by optimization) + if eligible_svgs and not skip_svg_conversion: await asyncio.gather(*[_convert_svg_to_string(element, task, step) for element, frame in eligible_svgs]) return element_tree diff --git a/skyvern/forge/sdk/core/skyvern_context.py b/skyvern/forge/sdk/core/skyvern_context.py index 637813cf..6a4ec557 100644 --- a/skyvern/forge/sdk/core/skyvern_context.py +++ b/skyvern/forge/sdk/core/skyvern_context.py @@ -37,6 +37,7 @@ class SkyvernContext: use_prompt_caching: bool = False cached_static_prompt: str | None = None vertex_cache_name: str | None = None # Vertex AI cache resource name for explicit caching + enable_speed_optimizations: bool = False # script run context script_id: str | None = None diff --git a/skyvern/webeye/actions/handler.py b/skyvern/webeye/actions/handler.py index d780402a..d3427405 100644 --- a/skyvern/webeye/actions/handler.py +++ b/skyvern/webeye/actions/handler.py @@ -69,6 +69,7 @@ from skyvern.forge.sdk.api.files import ( from skyvern.forge.sdk.api.llm.api_handler_factory import LLMAPIHandlerFactory, LLMCallerManager from skyvern.forge.sdk.api.llm.exceptions import LLMProviderError from skyvern.forge.sdk.core import skyvern_context +from skyvern.forge.sdk.core.skyvern_context import current as skyvern_current from skyvern.forge.sdk.core.skyvern_context import ensure_context from skyvern.forge.sdk.models import Step from skyvern.forge.sdk.schemas.tasks import Task @@ -1096,16 +1097,46 @@ async def handle_input_text_action( incremental_element: list[dict] = [] auto_complete_hacky_flag: bool = False - input_or_select_context = await _get_input_or_select_context( - action=action, - element_tree_builder=scraped_page, - skyvern_element=skyvern_element, - step=step, - ) + # OPTIMIZATION: Skip expensive LLM context parsing for TOTP and secret values + # TOTP inputs don't need autocomplete detection - we already have the generated code + # This saves ~4-5s per TOTP digit (6 digits = ~27s saved for 2FA!) + # Gated by ENABLE_SPEED_OPTIMIZATIONS feature flag + skip_context_parsing = False + if ( + is_totp_value + or is_secret_value + or (action.totp_timing_info and action.totp_timing_info.get("is_totp_sequence")) + ): + try: + current_context = skyvern_current() + enable_speed_optimizations = current_context.enable_speed_optimizations if current_context else False + + if enable_speed_optimizations: + skip_context_parsing = True + LOG.info( + "Speed optimization: Skipping input context parsing for TOTP/secret input", + element_id=skyvern_element.get_id(), + is_totp=is_totp_value, + is_secret=is_secret_value, + is_multi_field_totp=bool(action.totp_timing_info), + ) + except Exception: + LOG.warning("Failed to read ENABLE_SPEED_OPTIMIZATIONS from context for TOTP optimization", exc_info=True) + + if skip_context_parsing: + input_or_select_context = None + else: + input_or_select_context = await _get_input_or_select_context( + action=action, + element_tree_builder=scraped_page, + skyvern_element=skyvern_element, + step=step, + ) # check if it's selectable if ( - not input_or_select_context.is_search_bar # no need to to trigger selection logic for search bar + input_or_select_context is not None + and not input_or_select_context.is_search_bar # no need to to trigger selection logic for search bar and not is_totp_value and not is_secret_value and skyvern_element.get_tag_name() == InteractiveElement.INPUT @@ -1361,7 +1392,8 @@ async def handle_input_text_action( return [ActionSuccess()] if not await skyvern_element.is_raw_input(): - if await skyvern_element.is_auto_completion_input() or input_or_select_context.is_location_input: + is_location_input = input_or_select_context.is_location_input if input_or_select_context else False + if input_or_select_context and (await skyvern_element.is_auto_completion_input() or is_location_input): if result := await input_or_auto_complete_input( input_or_select_context=input_or_select_context, scraped_page=scraped_page,