From f16367bf963715562510cdb8fe3c7ca7f3221ee6 Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Wed, 12 Feb 2025 17:10:24 +0800 Subject: [PATCH] fix terminate action in caching - do not replay terminate + introduce SINGLE_CLICK_AGENT_LLM_API_HANDLER (#1760) --- skyvern/config.py | 1 + skyvern/forge/app.py | 5 +++++ skyvern/webeye/actions/caching.py | 25 +++++++++++-------------- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/skyvern/config.py b/skyvern/config.py index ebb171c6..2ce3a555 100644 --- a/skyvern/config.py +++ b/skyvern/config.py @@ -95,6 +95,7 @@ class Settings(BaseSettings): LLM_KEY: str = "OPENAI_GPT4O" SECONDARY_LLM_KEY: str | None = None SELECT_AGENT_LLM_KEY: str | None = None + SINGLE_CLICK_AGENT_LLM_KEY: str | None = None # COMMON LLM_CONFIG_TIMEOUT: int = 300 LLM_CONFIG_MAX_TOKENS: int = 4096 diff --git a/skyvern/forge/app.py b/skyvern/forge/app.py index bc6d00cf..9ce12a7c 100644 --- a/skyvern/forge/app.py +++ b/skyvern/forge/app.py @@ -40,6 +40,11 @@ SELECT_AGENT_LLM_API_HANDLER = ( if SETTINGS_MANAGER.SELECT_AGENT_LLM_KEY else SECONDARY_LLM_API_HANDLER ) +SINGLE_CLICK_AGENT_LLM_API_HANDLER = ( + LLMAPIHandlerFactory.get_llm_api_handler(SETTINGS_MANAGER.SINGLE_CLICK_AGENT_LLM_KEY) + if SETTINGS_MANAGER.SINGLE_CLICK_AGENT_LLM_KEY + else SECONDARY_LLM_API_HANDLER +) WORKFLOW_CONTEXT_MANAGER = WorkflowContextManager() WORKFLOW_SERVICE = WorkflowService() AGENT_FUNCTION = AgentFunction() diff --git a/skyvern/webeye/actions/caching.py b/skyvern/webeye/actions/caching.py index 43381a52..292c2d3c 100644 --- a/skyvern/webeye/actions/caching.py +++ b/skyvern/webeye/actions/caching.py @@ -108,7 +108,7 @@ async def _retrieve_action_plan(task: Task, step: Step, scraped_page: ScrapedPag LOG.info("Found cached actions to execute", actions=cached_actions_to_execute) - actions_queries: list[Action] = [] + actions: list[Action] = [] for idx, cached_action in enumerate(cached_actions_to_execute): updated_action = cached_action.model_copy() updated_action.status = ActionStatus.pending @@ -135,18 +135,16 @@ async def _retrieve_action_plan(task: Task, step: Step, scraped_page: ScrapedPag "All elements with either no hash or multiple hashes should have been already filtered out" ) - actions_queries.append(updated_action) + actions.append(updated_action) # Check for unsupported actions before personalizing the actions # Classify the supported actions into two groups: # 1. Actions that can be cached with a query # 2. Actions that can be cached without a query # We'll use this classification to determine if we should continue with caching or fallback to no-cache mode - check_for_unsupported_actions(actions_queries) + check_for_unsupported_actions(actions) - personalized_actions = await personalize_actions( - task=task, step=step, scraped_page=scraped_page, actions_queries=actions_queries - ) + personalized_actions = await personalize_actions(task=task, step=step, scraped_page=scraped_page, actions=actions) LOG.info("Personalized cached actions are ready", actions=personalized_actions) return personalized_actions @@ -155,12 +153,10 @@ async def _retrieve_action_plan(task: Task, step: Step, scraped_page: ScrapedPag async def personalize_actions( task: Task, step: Step, - actions_queries: list[Action], + actions: list[Action], scraped_page: ScrapedPage, ) -> list[Action]: - queries_and_answers: dict[str, str | None] = { - action.intention: None for action in actions_queries if action.intention - } + queries_and_answers: dict[str, str | None] = {action.intention: None for action in actions if action.intention} answered_queries: dict[str, str] = {} if queries_and_answers: @@ -170,7 +166,7 @@ async def personalize_actions( ) personalized_actions = [] - for action in actions_queries: + for action in actions: query = action.intention if query and (personalized_answer := answered_queries.get(query)): current_personized_actions = await personalize_action( @@ -230,10 +226,11 @@ async def personalize_action( elif action.action_type in [ ActionType.COMPLETE, ActionType.WAIT, - ActionType.TERMINATE, ActionType.SOLVE_CAPTCHA, ]: return [action] + elif action.action_type == ActionType.TERMINATE: + return [] else: raise CachedActionPlanError( f"Unsupported action type for personalization, fallback to no-cache mode: {action.action_type}" @@ -242,10 +239,10 @@ async def personalize_action( return [action] -def check_for_unsupported_actions(actions_queries: list[Action]) -> None: +def check_for_unsupported_actions(actions: list[Action]) -> None: supported_actions = [ActionType.INPUT_TEXT, ActionType.WAIT, ActionType.CLICK, ActionType.COMPLETE] supported_actions_with_query = [ActionType.INPUT_TEXT] - for action in actions_queries: + for action in actions: query = action.intention if action.action_type not in supported_actions: raise CachedActionPlanError(