258 lines
12 KiB
Python
258 lines
12 KiB
Python
import structlog
|
|
|
|
from skyvern.exceptions import CachedActionPlanError
|
|
from skyvern.forge import app
|
|
from skyvern.forge.prompts import prompt_engine
|
|
from skyvern.forge.sdk.models import Step
|
|
from skyvern.forge.sdk.schemas.tasks import Task
|
|
from skyvern.webeye.actions.actions import Action, ActionStatus, ActionType
|
|
from skyvern.webeye.scraper.scraper import ScrapedPage
|
|
|
|
LOG = structlog.get_logger()
|
|
|
|
|
|
async def retrieve_action_plan(task: Task, step: Step, scraped_page: ScrapedPage) -> list[Action]:
|
|
try:
|
|
return await _retrieve_action_plan(task, step, scraped_page)
|
|
except Exception as e:
|
|
LOG.exception("Failed to retrieve action plan", exception=e)
|
|
return []
|
|
|
|
|
|
async def _retrieve_action_plan(task: Task, step: Step, scraped_page: ScrapedPage) -> list[Action]:
|
|
# V0: use the previous action plan if there is a completed task with the same url and navigation goal
|
|
# get completed task with the same url and navigation goal
|
|
# TODO(kerem): don't use step_order, get all the previous actions instead
|
|
cached_actions = await app.DATABASE.retrieve_action_plan(task=task)
|
|
if not cached_actions:
|
|
LOG.info("No cached actions found for the task, fallback to no-cache mode")
|
|
return []
|
|
|
|
# Get the existing actions for this task from the database. Then find the actions that are already executed by looking at
|
|
# the source_action_id field for this task's actions.
|
|
previous_actions = await app.DATABASE.get_previous_actions_for_task(task_id=task.task_id)
|
|
|
|
executed_cached_actions = []
|
|
remaining_cached_actions = []
|
|
action_matching_complete = False
|
|
if previous_actions:
|
|
for idx, cached_action in enumerate(cached_actions):
|
|
if not action_matching_complete:
|
|
should_be_matching_action = previous_actions[idx]
|
|
if not should_be_matching_action.source_action_id:
|
|
# If there is an action without a source_action_id, it means we already went back to no-cache mode
|
|
# and we should not try to reuse the previous action plan since it's not possible to determine which
|
|
# action we should execute next
|
|
return []
|
|
|
|
action_id_to_match = (
|
|
cached_action.source_action_id if cached_action.source_action_id else cached_action.action_id
|
|
)
|
|
if should_be_matching_action.source_action_id == action_id_to_match:
|
|
executed_cached_actions.append(cached_action)
|
|
if idx == len(previous_actions) - 1:
|
|
# If we've reached the end of the previous actions, we've completed matching.
|
|
action_matching_complete = True
|
|
else:
|
|
# If we've reached an action that doesn't match the source_action_id of the previous actions,
|
|
# we've completed matching.
|
|
action_matching_complete = True
|
|
remaining_cached_actions.append(cached_action)
|
|
else:
|
|
remaining_cached_actions.append(cached_action)
|
|
else:
|
|
remaining_cached_actions = cached_actions
|
|
action_matching_complete = True
|
|
|
|
# For any remaining cached action,
|
|
# check if the element hash exists in the current scraped page. Add them to a list until we can't find a match. Always keep the
|
|
# actions without an element hash.
|
|
|
|
cached_actions_to_execute: list[Action] = []
|
|
found_element_with_no_hash = False
|
|
for cached_action in remaining_cached_actions:
|
|
# The actions without an element hash: TerminateAction CompleteAction NullAction SolveCaptchaAction WaitAction
|
|
# For these actions, we can't check if the element hash exists in the current scraped page.
|
|
# For that reason, we're going to make sure they're executed always as the first action in each step.
|
|
if not cached_action.skyvern_element_hash:
|
|
if not found_element_with_no_hash and len(cached_actions_to_execute) > 0:
|
|
# If we've already added actions with element hashes to the list before we encounter an action without an element hash,
|
|
# we need to execute the actions we already added first. We want the actions without an element hash
|
|
# to be executed as the first actions in each step. We're ok with executing multiple actions without an element hash
|
|
# in a row, but we want them to be executed in a new step after we wait & scrape the page again.
|
|
break
|
|
cached_actions_to_execute.append(cached_action)
|
|
found_element_with_no_hash = True
|
|
continue
|
|
|
|
matching_element_ids = scraped_page.hash_to_element_ids.get(cached_action.skyvern_element_hash)
|
|
if matching_element_ids and len(matching_element_ids) == 1:
|
|
cached_actions_to_execute.append(cached_action)
|
|
continue
|
|
# After this point, we can't continue adding actions to the plan, so we break and continue with what we have.
|
|
# Because this action has either no hash-match or multiple hash-matches, we can't continue.
|
|
elif matching_element_ids and len(matching_element_ids) > 1:
|
|
LOG.warning(
|
|
"Found multiple elements with the same hash, stop matching",
|
|
element_hash=cached_action.skyvern_element_hash,
|
|
element_ids=matching_element_ids,
|
|
)
|
|
break
|
|
else:
|
|
LOG.warning("No element found with the hash", element_hash=cached_action.skyvern_element_hash)
|
|
break
|
|
|
|
# If there are no items in the list we just built, we need to revert back to no-cache mode. Return empty list.
|
|
if not cached_actions_to_execute:
|
|
return []
|
|
|
|
LOG.info("Found cached actions to execute", actions=cached_actions_to_execute)
|
|
|
|
actions: list[Action] = []
|
|
for idx, cached_action in enumerate(cached_actions_to_execute):
|
|
updated_action = cached_action.model_copy()
|
|
updated_action.status = ActionStatus.pending
|
|
updated_action.source_action_id = (
|
|
cached_action.source_action_id if cached_action.source_action_id else cached_action.action_id
|
|
)
|
|
updated_action.workflow_run_id = task.workflow_run_id
|
|
updated_action.task_id = task.task_id
|
|
updated_action.step_id = step.step_id
|
|
updated_action.step_order = step.order
|
|
updated_action.action_order = idx
|
|
# Reset the action response to None so we don't use the previous answers
|
|
updated_action.response = None
|
|
|
|
# Update the element id with the element id from the current scraped page, matched by element hash
|
|
if cached_action.skyvern_element_hash:
|
|
matching_element_ids = scraped_page.hash_to_element_ids.get(cached_action.skyvern_element_hash)
|
|
if matching_element_ids and len(matching_element_ids) == 1:
|
|
matching_element_id = matching_element_ids[0]
|
|
updated_action.element_id = matching_element_id
|
|
updated_action.skyvern_element_data = scraped_page.id_to_element_dict.get(matching_element_id)
|
|
else:
|
|
raise CachedActionPlanError(
|
|
"All elements with either no hash or multiple hashes should have been already filtered out"
|
|
)
|
|
|
|
actions.append(updated_action)
|
|
|
|
# Check for unsupported actions before personalizing the actions
|
|
# Classify the supported actions into two groups:
|
|
# 1. Actions that can be cached with a query
|
|
# 2. Actions that can be cached without a query
|
|
# We'll use this classification to determine if we should continue with caching or fallback to no-cache mode
|
|
check_for_unsupported_actions(actions)
|
|
|
|
personalized_actions = await personalize_actions(task=task, step=step, scraped_page=scraped_page, actions=actions)
|
|
|
|
LOG.info("Personalized cached actions are ready", actions=personalized_actions)
|
|
return personalized_actions
|
|
|
|
|
|
async def personalize_actions(
|
|
task: Task,
|
|
step: Step,
|
|
actions: list[Action],
|
|
scraped_page: ScrapedPage,
|
|
) -> list[Action]:
|
|
queries_and_answers: dict[str, str | None] = {action.intention: None for action in actions if action.intention}
|
|
|
|
answered_queries: dict[str, str] = {}
|
|
if queries_and_answers:
|
|
# Call LLM to get answers for the queries only if there are queries to answer
|
|
answered_queries = await get_user_detail_answers(
|
|
task=task, step=step, scraped_page=scraped_page, queries_and_answers=queries_and_answers
|
|
)
|
|
|
|
personalized_actions = []
|
|
for action in actions:
|
|
query = action.intention
|
|
if query and (personalized_answer := answered_queries.get(query)):
|
|
current_personized_actions = await personalize_action(
|
|
action, query, personalized_answer, task, step, scraped_page
|
|
)
|
|
personalized_actions.extend(current_personized_actions)
|
|
else:
|
|
personalized_actions.append(action)
|
|
|
|
return personalized_actions
|
|
|
|
|
|
async def get_user_detail_answers(
|
|
task: Task, step: Step, scraped_page: ScrapedPage, queries_and_answers: dict[str, str | None]
|
|
) -> dict[str, str]:
|
|
try:
|
|
question_answering_prompt = prompt_engine.load_prompt(
|
|
"answer-user-detail-questions",
|
|
navigation_goal=task.navigation_goal,
|
|
navigation_payload=task.navigation_payload,
|
|
queries_and_answers=queries_and_answers,
|
|
)
|
|
|
|
llm_response = await app.SECONDARY_LLM_API_HANDLER(
|
|
prompt=question_answering_prompt, step=step, screenshots=None, prompt_name="answer-user-detail-questions"
|
|
)
|
|
return llm_response
|
|
except Exception as e:
|
|
LOG.exception("Failed to get user detail answers", exception=e)
|
|
# TODO: custom exception so we can fallback to no-cache mode by catching it
|
|
raise e
|
|
|
|
|
|
async def personalize_action(
|
|
action: Action,
|
|
query: str,
|
|
answer: str,
|
|
task: Task,
|
|
step: Step,
|
|
scraped_page: ScrapedPage,
|
|
) -> list[Action]:
|
|
action.intention = query
|
|
action.response = answer
|
|
|
|
if action.action_type == ActionType.INPUT_TEXT:
|
|
action.text = answer
|
|
if not answer:
|
|
return []
|
|
elif action.action_type == ActionType.UPLOAD_FILE:
|
|
action.file_url = answer
|
|
elif action.action_type == ActionType.CLICK:
|
|
# TODO: we only use cached action.intention. send the intention, navigation payload + navigation goal, html
|
|
# to small llm and make a decision of which elements to click. Not clicking anything is also an option here
|
|
return [action]
|
|
elif action.action_type == ActionType.SELECT_OPTION:
|
|
# TODO: send the selection action with the original/previous option value. Our current selection agent
|
|
# is already able to handle it
|
|
return [action]
|
|
elif action.action_type in [
|
|
ActionType.COMPLETE,
|
|
ActionType.WAIT,
|
|
ActionType.SOLVE_CAPTCHA,
|
|
ActionType.NULL_ACTION,
|
|
]:
|
|
return [action]
|
|
elif action.action_type == ActionType.TERMINATE:
|
|
return []
|
|
else:
|
|
raise CachedActionPlanError(
|
|
f"Unsupported action type for personalization, fallback to no-cache mode: {action.action_type}"
|
|
)
|
|
|
|
return [action]
|
|
|
|
|
|
def check_for_unsupported_actions(actions: list[Action]) -> None:
|
|
supported_actions = [ActionType.INPUT_TEXT, ActionType.WAIT, ActionType.CLICK, ActionType.COMPLETE]
|
|
supported_actions_with_query = [ActionType.INPUT_TEXT]
|
|
for action in actions:
|
|
query = action.intention
|
|
if action.action_type not in supported_actions:
|
|
raise CachedActionPlanError(
|
|
f"This action type does not support caching: {action.action_type}, fallback to no-cache mode"
|
|
)
|
|
if query and action.action_type not in supported_actions_with_query:
|
|
raise CachedActionPlanError(
|
|
f"This action type does not support caching with a query: {action.action_type}, fallback to no-cache mode"
|
|
)
|