add actions db model and caching V0 (#980)

2024-10-15 12:06:50 -07:00
parent e7583ac878
commit 9048cdfa73
19 changed files with 731 additions and 90 deletions
--- a/skyvern/webeye/actions/caching.py
+++ b/skyvern/webeye/actions/caching.py
@@ -0,0 +1,226 @@
+import structlog
+
+from skyvern.exceptions import CachedActionPlanError
+from skyvern.forge import app
+from skyvern.forge.prompts import prompt_engine
+from skyvern.forge.sdk.models import Step
+from skyvern.forge.sdk.schemas.tasks import Task
+from skyvern.webeye.actions.actions import Action, ActionStatus, ActionType
+from skyvern.webeye.scraper.scraper import ScrapedPage
+
+LOG = structlog.get_logger()
+
+
+async def retrieve_action_plan(task: Task, step: Step, scraped_page: ScrapedPage) -> list[Action]:
+    try:
+        return await _retrieve_action_plan(task, step, scraped_page)
+    except Exception as e:
+        LOG.exception("Failed to retrieve action plan", exception=e)
+        return []
+
+
+async def _retrieve_action_plan(task: Task, step: Step, scraped_page: ScrapedPage) -> list[Action]:
+    # V0: use the previous action plan if there is a completed task with the same url and navigation goal
+    # get completed task with the same url and navigation goal
+    # TODO(kerem): don't use step_order, get all the previous actions instead
+    cached_actions = await app.DATABASE.retrieve_action_plan(task=task)
+    if not cached_actions:
+        LOG.info("No cached actions found for the task, fallback to no-cache mode")
+        return []
+
+    # Get the existing actions for this task from the database. Then find the actions that are already executed by looking at
+    # the source_action_id field for this task's actions.
+    previous_actions = await app.DATABASE.get_previous_actions_for_task(task_id=task.task_id)
+
+    executed_cached_actions = []
+    remaining_cached_actions = []
+    action_matching_complete = False
+    if previous_actions:
+        for idx, cached_action in enumerate(cached_actions):
+            if not action_matching_complete:
+                should_be_matching_action = previous_actions[idx]
+                if not should_be_matching_action.source_action_id:
+                    # If there is an action without a source_action_id, it means we already went back to no-cache mode
+                    # and we should not try to reuse the previous action plan since it's not possible to determine which
+                    # action we should execute next
+                    return []
+
+                action_id_to_match = (
+                    cached_action.source_action_id if cached_action.source_action_id else cached_action.action_id
+                )
+                if should_be_matching_action.source_action_id == action_id_to_match:
+                    executed_cached_actions.append(cached_action)
+                    if idx == len(previous_actions) - 1:
+                        # If we've reached the end of the previous actions, we've completed matching.
+                        action_matching_complete = True
+                else:
+                    # If we've reached an action that doesn't match the source_action_id of the previous actions,
+                    # we've completed matching.
+                    action_matching_complete = True
+                    remaining_cached_actions.append(cached_action)
+            else:
+                remaining_cached_actions.append(cached_action)
+    else:
+        remaining_cached_actions = cached_actions
+        action_matching_complete = True
+
+    # For any remaining cached action,
+    # check if the element hash exists in the current scraped page. Add them to a list until we can't find a match. Always keep the
+    # actions without an element hash.
+
+    cached_actions_to_execute: list[Action] = []
+    found_element_with_no_hash = False
+    for cached_action in remaining_cached_actions:
+        # The actions without an element hash: TerminateAction CompleteAction NullAction SolveCaptchaAction WaitAction
+        # For these actions, we can't check if the element hash exists in the current scraped page.
+        # For that reason, we're going to make sure they're executed always as the first action in each step.
+        if not cached_action.skyvern_element_hash:
+            if not found_element_with_no_hash and len(cached_actions_to_execute) > 0:
+                # If we've already added actions with element hashes to the list before we encounter an action without an element hash,
+                # we need to execute the actions we already added first. We want the actions without an element hash
+                # to be executed as the first actions in each step. We're ok with executing multiple actions without an element hash
+                # in a row, but we want them to be executed in a new step after we wait & scrape the page again.
+                break
+            cached_actions_to_execute.append(cached_action)
+            found_element_with_no_hash = True
+            continue
+
+        matching_element_ids = scraped_page.hash_to_element_ids.get(cached_action.skyvern_element_hash)
+        if matching_element_ids and len(matching_element_ids) == 1:
+            cached_actions_to_execute.append(cached_action)
+            continue
+        # After this point, we can't continue adding actions to the plan, so we break and continue with what we have.
+        # Because this action has either no hash-match or multiple hash-matches, we can't continue.
+        elif matching_element_ids and len(matching_element_ids) > 1:
+            LOG.warning(
+                "Found multiple elements with the same hash, stop matching",
+                element_hash=cached_action.skyvern_element_hash,
+                element_ids=matching_element_ids,
+            )
+            break
+        else:
+            LOG.warning("No element found with the hash", element_hash=cached_action.skyvern_element_hash)
+            break
+
+    # If there are no items in the list we just built, we need to revert back to no-cache mode. Return empty list.
+    if not cached_actions_to_execute:
+        return []
+
+    LOG.info("Found cached actions to execute", actions=cached_actions_to_execute)
+
+    actions_queries: list[tuple[Action, str | None]] = []
+    for idx, cached_action in enumerate(cached_actions_to_execute):
+        updated_action = cached_action.model_copy()
+        updated_action.status = ActionStatus.pending
+        updated_action.source_action_id = (
+            cached_action.source_action_id if cached_action.source_action_id else cached_action.action_id
+        )
+        updated_action.workflow_run_id = task.workflow_run_id
+        updated_action.task_id = task.task_id
+        updated_action.step_id = step.step_id
+        updated_action.step_order = step.order
+        updated_action.action_order = idx
+        # Reset the action response to None so we don't use the previous answers
+        updated_action.response = None
+
+        # Update the element id with the element id from the current scraped page, matched by element hash
+        if cached_action.skyvern_element_hash:
+            matching_element_ids = scraped_page.hash_to_element_ids.get(cached_action.skyvern_element_hash)
+            if matching_element_ids and len(matching_element_ids) == 1:
+                matching_element_id = matching_element_ids[0]
+                updated_action.element_id = matching_element_id
+                updated_action.skyvern_element_data = scraped_page.id_to_element_dict.get(matching_element_id)
+            else:
+                raise CachedActionPlanError(
+                    "All elements with either no hash or multiple hashes should have been already filtered out"
+                )
+
+        actions_queries.append((updated_action, updated_action.intention))
+
+    # Check for unsupported actions before personalizing the actions
+    # Classify the supported actions into two groups:
+    # 1. Actions that can be cached with a query
+    # 2. Actions that can be cached without a query
+    # We'll use this classification to determine if we should continue with caching or fallback to no-cache mode
+    check_for_unsupported_actions(actions_queries)
+
+    personalized_actions = await personalize_actions(
+        task=task, step=step, scraped_page=scraped_page, actions_queries=actions_queries
+    )
+
+    LOG.info("Personalized cached actions are ready", actions=personalized_actions)
+    return personalized_actions
+
+
+async def personalize_actions(
+    task: Task,
+    step: Step,
+    actions_queries: list[tuple[Action, str | None]],
+    scraped_page: ScrapedPage,
+) -> list[Action]:
+    queries_and_answers: dict[str, str | None] = {query: None for _, query in actions_queries if query}
+
+    answered_queries: dict[str, str] = {}
+    if queries_and_answers:
+        # Call LLM to get answers for the queries only if there are queries to answer
+        answered_queries = await get_user_detail_answers(
+            task=task, step=step, scraped_page=scraped_page, queries_and_answers=queries_and_answers
+        )
+
+    personalized_actions = []
+    for action, query in actions_queries:
+        if query and (personalized_answer := answered_queries.get(query)):
+            personalized_actions.append(personalize_action(action, query, personalized_answer))
+        else:
+            personalized_actions.append(action)
+
+    return personalized_actions
+
+
+async def get_user_detail_answers(
+    task: Task, step: Step, scraped_page: ScrapedPage, queries_and_answers: dict[str, str | None]
+) -> dict[str, str]:
+    try:
+        question_answering_prompt = prompt_engine.load_prompt(
+            "answer-user-detail-questions",
+            navigation_goal=task.navigation_goal,
+            navigation_payload=task.navigation_payload,
+            queries_and_answers=queries_and_answers,
+        )
+
+        llm_response = await app.SECONDARY_LLM_API_HANDLER(
+            prompt=question_answering_prompt, step=step, screenshots=None
+        )
+        return llm_response
+    except Exception as e:
+        LOG.exception("Failed to get user detail answers", exception=e)
+        # TODO: custom exception so we can fallback to no-cache mode by catching it
+        raise e
+
+
+def personalize_action(action: Action, query: str, answer: str) -> Action:
+    action.intention = query
+    action.response = answer
+
+    if action.action_type == ActionType.INPUT_TEXT:
+        action.text = answer
+    else:
+        raise CachedActionPlanError(
+            f"Unsupported action type for personalization, fallback to no-cache mode: {action.action_type}"
+        )
+
+    return action
+
+
+def check_for_unsupported_actions(actions_queries: list[tuple[Action, str | None]]) -> None:
+    supported_actions = [ActionType.INPUT_TEXT, ActionType.WAIT, ActionType.CLICK, ActionType.COMPLETE]
+    supported_actions_with_query = [ActionType.INPUT_TEXT]
+    for action, query in actions_queries:
+        if action.action_type not in supported_actions:
+            raise CachedActionPlanError(
+                f"This action type does not support caching: {action.action_type}, fallback to no-cache mode"
+            )
+        if query and action.action_type not in supported_actions_with_query:
+            raise CachedActionPlanError(
+                f"This action type does not support caching with a query: {action.action_type}, fallback to no-cache mode"
+            )