Files
Dorod-Sky/skyvern/webeye/actions/handler.py

3756 lines
146 KiB
Python
Raw Normal View History

import asyncio
2024-08-21 10:54:32 +08:00
import copy
import json
2024-04-04 19:09:19 -07:00
import os
import urllib.parse
2024-04-04 19:09:19 -07:00
import uuid
2024-08-06 13:30:52 +08:00
from datetime import datetime, timedelta, timezone
2024-12-06 02:25:13 +08:00
from pathlib import Path
from typing import Any, Awaitable, Callable, List
import pyotp
import structlog
2024-10-31 00:12:13 +08:00
from playwright.async_api import FileChooser, Frame, Locator, Page, TimeoutError
2024-08-21 10:54:32 +08:00
from pydantic import BaseModel
from skyvern.config import settings
from skyvern.constants import (
AUTO_COMPLETION_POTENTIAL_VALUES_COUNT,
BROWSER_DOWNLOAD_MAX_WAIT_TIME,
2025-01-08 14:27:50 +08:00
DROPDOWN_MENU_MAX_DISTANCE,
REPO_ROOT_DIR,
SKYVERN_ID_ATTR,
)
from skyvern.exceptions import (
DownloadFileMaxWaitingTime,
2024-06-25 01:46:54 +08:00
EmptySelect,
2024-08-21 10:54:32 +08:00
ErrEmptyTweakValue,
2024-07-09 02:22:16 +08:00
ErrFoundSelectableElement,
FailedToFetchSecret,
FailToClick,
2024-06-25 01:46:54 +08:00
FailToSelectByIndex,
2025-09-11 13:10:02 +08:00
FailToSelectByLabel,
2024-06-25 01:46:54 +08:00
FailToSelectByValue,
IllegitComplete,
ImaginaryFileUrl,
2025-05-08 22:52:12 -07:00
InputToInvisibleElement,
2024-10-25 14:52:02 +08:00
InteractWithDisabledElement,
InteractWithDropdownContainer,
InvalidElementForTextInput,
MissingElement,
MissingElementDict,
MissingElementInCSSMap,
MissingFileUrl,
MultipleElementsFound,
2024-08-21 10:54:32 +08:00
NoAutoCompleteOptionMeetCondition,
NoAvailableOptionFoundForCustomSelection,
2024-08-06 13:30:52 +08:00
NoElementMatchedForTargetOption,
2024-08-21 10:54:32 +08:00
NoIncrementalElementFoundForAutoCompletion,
2024-08-06 13:30:52 +08:00
NoIncrementalElementFoundForCustomSelection,
2024-08-21 10:54:32 +08:00
NoSuitableAutoCompleteOption,
2024-06-25 01:46:54 +08:00
OptionIndexOutOfBound,
2024-07-11 02:45:13 +08:00
WrongElementToUploadFile,
)
from skyvern.forge import app
from skyvern.forge.prompts import prompt_engine
from skyvern.forge.sdk.api.files import (
download_file,
get_download_dir,
list_downloading_files_in_directory,
list_files_in_directory,
wait_for_download_finished,
)
2025-06-11 08:23:44 -07:00
from skyvern.forge.sdk.api.llm.api_handler_factory import LLMAPIHandlerFactory, LLMCallerManager
2025-01-24 16:21:26 +08:00
from skyvern.forge.sdk.api.llm.exceptions import LLMProviderError
from skyvern.forge.sdk.core import skyvern_context
2024-12-16 11:22:51 +08:00
from skyvern.forge.sdk.core.skyvern_context import ensure_context
from skyvern.forge.sdk.models import Step
from skyvern.forge.sdk.schemas.tasks import Task
2024-04-10 23:31:17 -07:00
from skyvern.forge.sdk.services.bitwarden import BitwardenConstants
2025-09-12 11:01:57 -06:00
from skyvern.forge.sdk.services.credentials import AzureVaultConstants, OnePasswordConstants
2025-07-07 14:43:10 +08:00
from skyvern.forge.sdk.trace import TraceManager
from skyvern.services import service_utils
from skyvern.services.action_service import get_action_history
2025-08-21 22:11:48 +08:00
from skyvern.utils.prompt_engine import (
CheckDateFormatResponse,
CheckPhoneNumberFormatResponse,
load_prompt_with_elements,
)
from skyvern.webeye.actions import actions, handler_utils
from skyvern.webeye.actions.action_types import ActionType
2024-04-26 17:51:50 +08:00
from skyvern.webeye.actions.actions import (
Action,
ActionStatus,
2024-05-15 02:15:18 +08:00
CheckboxAction,
2024-04-26 17:51:50 +08:00
ClickAction,
CompleteVerifyResult,
InputOrSelectContext,
2025-04-11 11:18:53 -07:00
InputTextAction,
2024-04-26 17:51:50 +08:00
ScrapeResult,
2024-08-07 18:51:29 +08:00
SelectOption,
2024-04-26 17:51:50 +08:00
SelectOptionAction,
UploadFileAction,
UserDefinedError,
2024-04-26 17:51:50 +08:00
WebAction,
)
from skyvern.webeye.actions.responses import ActionAbort, ActionFailure, ActionResult, ActionSuccess
2024-08-21 10:54:32 +08:00
from skyvern.webeye.scraper.scraper import (
CleanupElementTreeFunc,
ElementTreeBuilder,
ElementTreeFormat,
2024-08-21 10:54:32 +08:00
IncrementalScrapePage,
ScrapedPage,
hash_element,
2024-08-21 10:54:32 +08:00
json_to_html,
trim_element_tree,
)
from skyvern.webeye.utils.dom import COMMON_INPUT_TAGS, DomUtil, InteractiveElement, SkyvernElement
2024-07-26 18:10:42 +08:00
from skyvern.webeye.utils.page import SkyvernFrame
LOG = structlog.get_logger()
2024-09-09 11:34:09 +08:00
class CustomSingleSelectResult:
2024-09-10 17:10:47 +08:00
def __init__(self, skyvern_frame: SkyvernFrame) -> None:
2024-09-12 09:57:04 +08:00
self.reasoning: str | None = None
self.action_result: ActionResult | None = None
2025-02-13 21:11:14 +08:00
self.action_type: ActionType | None = None
self.value: str | None = None
self.dropdown_menu: SkyvernElement | None = None
2024-09-10 17:10:47 +08:00
self.skyvern_frame = skyvern_frame
2024-09-09 11:34:09 +08:00
async def is_done(self) -> bool:
# check if the dropdown menu is still on the page
# if it still exists, might mean there might be multi-level selection
# FIXME: only able to execute multi-level selection logic when dropdown menu detected
if self.dropdown_menu is None:
return True
if not isinstance(self.action_result, ActionSuccess):
return True
if await self.dropdown_menu.get_locator().count() == 0:
return True
2024-09-10 17:10:47 +08:00
return not await self.skyvern_frame.get_element_visible(await self.dropdown_menu.get_element_handler())
2024-09-09 11:34:09 +08:00
def is_ul_or_listbox_element_factory(
incremental_scraped: IncrementalScrapePage, task: Task, step: Step
) -> Callable[[dict], Awaitable[bool]]:
async def wrapper(element_dict: dict) -> bool:
element_id: str = element_dict.get("id", "")
try:
element = await SkyvernElement.create_from_incremental(incremental_scraped, element_id)
except Exception:
LOG.debug(
"Failed to element in the incremental page",
element_id=element_id,
step_id=step.step_id,
task_id=task.task_id,
exc_info=True,
)
return False
if element.get_tag_name() == "ul":
return True
if await element.get_attr("role") == "listbox":
return True
return False
return wrapper
2025-04-21 12:44:48 +08:00
CheckFilterOutElementIDFunc = Callable[[dict, Page | Frame], Awaitable[bool]]
def check_existed_but_not_option_element_in_dom_factory(
dom: DomUtil,
) -> CheckFilterOutElementIDFunc:
async def helper(element_dict: dict, frame: Page | Frame) -> bool:
element_id: str = element_dict.get("id", "")
if not element_id:
return False
try:
locator = frame.locator(f"[{SKYVERN_ID_ATTR}={element_id}]")
current_element = SkyvernElement(locator=locator, frame=frame, static_element=element_dict)
if await current_element.is_custom_option():
return False
return await dom.check_id_in_dom(element_id)
except Exception:
LOG.debug(
"Failed to check if the element is a custom option, going to keep the element in the incremental tree",
exc_info=True,
element_id=element_id,
)
return False
return helper
2024-09-09 11:34:09 +08:00
2025-01-09 16:14:31 +08:00
def check_disappeared_element_id_in_incremental_factory(
incremental_scraped: IncrementalScrapePage,
) -> CheckFilterOutElementIDFunc:
current_element_to_dict = copy.deepcopy(incremental_scraped.id_to_css_dict)
2025-04-21 12:44:48 +08:00
async def helper(element_dict: dict, frame: Page | Frame) -> bool:
element_id: str = element_dict.get("id", "")
2025-01-09 16:14:31 +08:00
if not current_element_to_dict.get(element_id, ""):
return False
try:
skyvern_element = await SkyvernElement.create_from_incremental(
incre_page=incremental_scraped, element_id=element_id
)
except Exception:
LOG.debug(
2025-01-09 16:14:31 +08:00
"Failed to create skyvern element, going to drop the element from incremental tree",
exc_info=True,
element_id=element_id,
)
2024-09-09 11:34:09 +08:00
return True
2025-01-09 16:14:31 +08:00
skyvern_frame = incremental_scraped.skyvern_frame
return not await skyvern_frame.get_element_visible(await skyvern_element.get_element_handler())
2024-09-09 11:34:09 +08:00
return helper
2025-04-21 12:44:48 +08:00
async def filter_out_elements(
frame: Page | Frame, element_tree: list[dict], check_filter: CheckFilterOutElementIDFunc
) -> list[dict]:
new_element_tree = []
for element in element_tree:
children_elements = element.get("children", [])
if len(children_elements) > 0:
2025-04-21 12:44:48 +08:00
children_elements = await filter_out_elements(
frame=frame, element_tree=children_elements, check_filter=check_filter
)
if await check_filter(element, frame):
new_element_tree.extend(children_elements)
else:
element["children"] = children_elements
new_element_tree.append(element)
return new_element_tree
2024-09-09 11:34:09 +08:00
def clean_and_remove_element_tree_factory(
2025-01-09 16:14:31 +08:00
task: Task, step: Step, check_filter_funcs: list[CheckFilterOutElementIDFunc]
2024-09-09 11:34:09 +08:00
) -> CleanupElementTreeFunc:
2024-10-31 00:12:13 +08:00
async def helper_func(frame: Page | Frame, url: str, element_tree: list[dict]) -> list[dict]:
element_tree = await app.AGENT_FUNCTION.cleanup_element_tree_factory(task=task, step=step)(
frame, url, element_tree
)
2025-01-09 16:14:31 +08:00
for check_filter in check_filter_funcs:
2025-04-21 12:44:48 +08:00
element_tree = await filter_out_elements(frame=frame, element_tree=element_tree, check_filter=check_filter)
2024-09-09 11:34:09 +08:00
return element_tree
return helper_func
async def check_phone_number_format(
2025-04-23 01:56:43 +08:00
value: str,
action: actions.InputTextAction,
skyvern_element: SkyvernElement,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> str:
# check the phone number format
LOG.info(
"Input is a tel input, trigger phone number format checking",
action=action,
element_id=skyvern_element.get_id(),
)
new_scraped_page = await scraped_page.generate_scraped_page_without_screenshots()
html = new_scraped_page.build_element_tree(html_need_skyvern_attrs=False)
prompt = prompt_engine.load_prompt(
template="check-phone-number-format",
2025-04-23 01:56:43 +08:00
context=action.intention,
current_value=value,
navigation_goal=task.navigation_goal,
navigation_payload_str=json.dumps(task.navigation_payload),
elements=html,
local_datetime=datetime.now(skyvern_context.ensure_context().tz_info).isoformat(),
)
json_response = await app.SECONDARY_LLM_API_HANDLER(
prompt=prompt, step=step, prompt_name="check-phone-number-format"
)
check_phone_number_format_response = CheckPhoneNumberFormatResponse.model_validate(json_response)
if (
2025-04-23 01:56:43 +08:00
not check_phone_number_format_response.is_phone_number_input
or check_phone_number_format_response.is_current_format_correct
or not check_phone_number_format_response.recommended_phone_number
):
2025-04-23 01:56:43 +08:00
return value
LOG.info(
"The current phone number format is incorrect, using the recommended phone number",
action=action,
element_id=skyvern_element.get_id(),
recommended_phone_number=check_phone_number_format_response.recommended_phone_number,
)
return check_phone_number_format_response.recommended_phone_number
2025-08-21 22:11:48 +08:00
async def check_date_format(
value: str,
action: actions.InputTextAction,
skyvern_element: SkyvernElement,
task: Task,
step: Step,
) -> str:
# check the date format
LOG.info(
"Input is a date input, trigger date format checking",
action=action,
element_id=skyvern_element.get_id(),
)
prompt = prompt_engine.load_prompt(
template="check-date-format",
current_value=value,
navigation_goal=task.navigation_goal,
navigation_payload_str=json.dumps(task.navigation_payload),
local_datetime=datetime.now(skyvern_context.ensure_context().tz_info).isoformat(),
)
json_response = await app.SECONDARY_LLM_API_HANDLER(prompt=prompt, step=step, prompt_name="check-date-format")
check_date_format_response = CheckDateFormatResponse.model_validate(json_response)
if check_date_format_response.is_current_format_correct or not check_date_format_response.recommended_date:
return value
LOG.info(
"The current date format is incorrect, using the recommended date",
action=action,
element_id=skyvern_element.get_id(),
recommended_date=check_date_format_response.recommended_date,
)
return check_date_format_response.recommended_date
2024-08-21 10:54:32 +08:00
class AutoCompletionResult(BaseModel):
auto_completion_attempt: bool = False
incremental_elements: list[dict] = []
action_result: ActionResult = ActionSuccess()
class ActionHandler:
_handled_action_types: dict[
ActionType,
Callable[[Action, Page, ScrapedPage, Task, Step], Awaitable[list[ActionResult]]],
] = {}
2024-04-05 21:47:18 +08:00
_setup_action_types: dict[
ActionType,
Callable[[Action, Page, ScrapedPage, Task, Step], Awaitable[list[ActionResult]]],
2024-04-05 21:47:18 +08:00
] = {}
_teardown_action_types: dict[
ActionType,
Callable[[Action, Page, ScrapedPage, Task, Step], Awaitable[list[ActionResult]]],
2024-04-05 21:47:18 +08:00
] = {}
@classmethod
def register_action_type(
cls,
action_type: ActionType,
handler: Callable[[Action, Page, ScrapedPage, Task, Step], Awaitable[list[ActionResult]]],
) -> None:
cls._handled_action_types[action_type] = handler
2024-04-05 21:47:18 +08:00
@classmethod
def register_setup_for_action_type(
cls,
action_type: ActionType,
handler: Callable[[Action, Page, ScrapedPage, Task, Step], Awaitable[list[ActionResult]]],
) -> None:
cls._setup_action_types[action_type] = handler
@classmethod
def register_teardown_for_action_type(
cls,
action_type: ActionType,
handler: Callable[[Action, Page, ScrapedPage, Task, Step], Awaitable[list[ActionResult]]],
) -> None:
cls._teardown_action_types[action_type] = handler
@staticmethod
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(ignore_inputs=["scraped_page", "page"])
async def handle_action(
scraped_page: ScrapedPage,
task: Task,
step: Step,
page: Page,
action: Action,
) -> list[ActionResult]:
LOG.info("Handling action", action=action)
actions_result: list[ActionResult] = []
2025-09-18 14:31:11 +08:00
llm_caller = LLMCallerManager.get_llm_caller(task.task_id)
try:
if action.action_type in ActionHandler._handled_action_types:
invalid_web_action_check = check_for_invalid_web_action(action, page, scraped_page, task, step)
if invalid_web_action_check:
actions_result.extend(invalid_web_action_check)
return actions_result
2024-04-05 21:47:18 +08:00
# do setup before action handler
if setup := ActionHandler._setup_action_types.get(action.action_type):
results = await setup(action, page, scraped_page, task, step)
actions_result.extend(results)
if results and results[-1] != ActionSuccess:
return actions_result
# do the handler
handler = ActionHandler._handled_action_types[action.action_type]
2024-04-05 21:47:18 +08:00
results = await handler(action, page, scraped_page, task, step)
actions_result.extend(results)
# do the teardown
teardown = ActionHandler._teardown_action_types.get(action.action_type)
if teardown:
results = await teardown(action, page, scraped_page, task, step)
actions_result.extend(results)
2024-04-05 21:47:18 +08:00
return actions_result
else:
LOG.error(
"Unsupported action type in handler",
action=action,
type=type(action),
)
actions_result.append(ActionFailure(Exception(f"Unsupported action type: {type(action)}")))
return actions_result
except MissingElement as e:
LOG.info(
"Known exceptions",
action=action,
exception_type=type(e),
exception_message=str(e),
)
actions_result.append(ActionFailure(e))
except MultipleElementsFound as e:
LOG.exception(
"Cannot handle multiple elements with the same selector in one action.",
action=action,
)
actions_result.append(ActionFailure(e))
2025-01-24 16:21:26 +08:00
except LLMProviderError as e:
LOG.exception("LLM error in action handler", action=action, exc_info=True)
actions_result.append(ActionFailure(e))
except Exception as e:
LOG.exception("Unhandled exception in action handler", action=action)
actions_result.append(ActionFailure(e))
finally:
2025-09-18 14:31:11 +08:00
tool_result_content = ""
if actions_result and isinstance(actions_result[-1], ActionSuccess):
action.status = ActionStatus.completed
2025-09-18 14:31:11 +08:00
tool_result_content = "Tool executed successfully"
elif actions_result and isinstance(actions_result[-1], ActionAbort):
action.status = ActionStatus.skipped
2025-09-18 14:31:11 +08:00
tool_result_content = "Tool executed successfully"
else:
2025-09-18 14:31:11 +08:00
tool_result_content = "Tool execution failed"
# either actions_result is empty or the last action is a failure
if not actions_result:
LOG.warning("Action failed to execute, setting status to failed", action=action)
action.status = ActionStatus.failed
2025-09-18 14:31:11 +08:00
await app.DATABASE.create_action(action=action)
2025-09-18 14:31:11 +08:00
if llm_caller and action.tool_call_id:
tool_call_result = {
"type": "tool_result",
"tool_use_id": action.tool_call_id,
"content": tool_result_content,
}
llm_caller.add_tool_result(tool_call_result)
return actions_result
def check_for_invalid_web_action(
action: actions.Action,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
2025-04-11 11:18:53 -07:00
if isinstance(action, ClickAction) and action.x is not None and action.y is not None:
return []
if isinstance(action, InputTextAction) and not action.element_id:
return []
if isinstance(action, WebAction) and action.element_id not in scraped_page.id_to_element_dict:
return [ActionFailure(MissingElement(element_id=action.element_id), stop_execution_on_failure=False)]
return []
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(ignore_inputs=["scraped_page", "page"])
async def handle_solve_captcha_action(
action: actions.SolveCaptchaAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
LOG.warning(
"Please solve the captcha on the page, you have 30 seconds",
action=action,
)
await asyncio.sleep(30)
return [ActionSuccess()]
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(ignore_inputs=["scraped_page", "page"])
async def handle_click_action(
action: actions.ClickAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
2025-05-22 22:18:42 -07:00
dom = DomUtil(scraped_page=scraped_page, page=page)
2025-05-19 09:08:55 -07:00
original_url = page.url
2025-04-11 11:18:53 -07:00
if action.x is not None and action.y is not None:
# Find the element at the clicked location using JavaScript evaluation
2025-05-22 22:18:42 -07:00
element_id: str | None = await page.evaluate(
2025-04-11 11:18:53 -07:00
"""data => {
const element = document.elementFromPoint(data.x, data.y);
if (!element) return null;
// Function to get the unique_id attribute of an element
function getElementUniqueId(element) {
if (element && element.nodeType === 1) {
// Check if the element has the unique_id attribute
if (element.hasAttribute('unique_id')) {
return element.getAttribute('unique_id');
}
2025-09-18 16:08:44 +08:00
2025-04-11 11:18:53 -07:00
// If no unique_id attribute is found, return null
return null;
}
return null;
}
return getElementUniqueId(element);
}""",
{"x": action.x, "y": action.y},
)
LOG.info("Clicked element at location", x=action.x, y=action.y, element_id=element_id, button=action.button)
2025-05-22 22:18:42 -07:00
if element_id:
2025-09-18 14:31:11 +08:00
if skyvern_element := await dom.safe_get_skyvern_element_by_id(element_id):
2025-09-18 16:08:44 +08:00
if await skyvern_element.navigate_to_a_href(page=page):
return [ActionSuccess()]
2025-04-11 11:18:53 -07:00
if action.repeat == 1:
await page.mouse.click(x=action.x, y=action.y, button=action.button)
elif action.repeat == 2:
await page.mouse.dblclick(x=action.x, y=action.y, button=action.button)
elif action.repeat == 3:
await page.mouse.click(x=action.x, y=action.y, button=action.button, click_count=3)
else:
raise ValueError(f"Invalid repeat value: {action.repeat}")
2025-04-11 11:18:53 -07:00
return [ActionSuccess()]
skyvern_element = await dom.get_skyvern_element_by_id(action.element_id)
await asyncio.sleep(0.3)
2024-10-25 14:52:02 +08:00
# dynamically validate the attr, since it could change into enabled after the previous actions
if await skyvern_element.is_disabled(dynamic=True):
LOG.warning(
"Try to click on a disabled element",
action_type=action.action_type,
task_id=task.task_id,
step_id=step.step_id,
element_id=skyvern_element.get_id(),
)
return [ActionFailure(InteractWithDisabledElement(skyvern_element.get_id()))]
if action.download:
# get the initial page count
browser_state = app.BROWSER_MANAGER.get_for_task(task.task_id, workflow_run_id=task.workflow_run_id)
initial_page_count = 0
if browser_state is not None:
initial_page_count = len(browser_state.browser_context.pages if browser_state.browser_context else [])
LOG.info(
"Page count before download file action",
initial_page_count=initial_page_count,
task_id=task.task_id,
step_id=step.step_id,
workflow_run_id=task.workflow_run_id,
)
results: list[ActionResult] = []
try:
results = await handle_click_to_download_file_action(action, page, scraped_page, task, step)
except Exception:
raise
finally:
# get the page count after download
page_count_after_download = 0
if browser_state is not None:
page_count_after_download = len(
browser_state.browser_context.pages if browser_state.browser_context else []
)
LOG.info(
"Page count after download file action",
initial_page_count=initial_page_count,
page_count_after_download=page_count_after_download,
task_id=task.task_id,
step_id=step.step_id,
workflow_run_id=task.workflow_run_id,
)
if page_count_after_download > initial_page_count and browser_state and browser_state.browser_context:
if results and results[-1].download_triggered:
LOG.info(
"Download triggered, closing the extra page",
task_id=task.task_id,
step_id=step.step_id,
workflow_run_id=task.workflow_run_id,
)
if page == browser_state.browser_context.pages[-1]:
LOG.warning(
"The extra page is the current page, closing it",
task_id=task.task_id,
step_id=step.step_id,
workflow_run_id=task.workflow_run_id,
)
# close the extra page
await browser_state.browser_context.pages[-1].close()
else:
LOG.info(
"No download triggered, not closing the extra page",
task_id=task.task_id,
step_id=step.step_id,
workflow_run_id=task.workflow_run_id,
)
else:
incremental_scraped: IncrementalScrapePage | None = None
2025-05-19 09:08:55 -07:00
try:
skyvern_frame = await SkyvernFrame.create_instance(skyvern_element.get_frame())
incremental_scraped = IncrementalScrapePage(skyvern_frame=skyvern_frame)
await incremental_scraped.start_listen_dom_increment(await skyvern_element.get_element_handler())
results = await chain_click(
task,
scraped_page,
page,
action,
skyvern_element,
timeout=settings.BROWSER_ACTION_TIMEOUT_MS,
)
if page.url != original_url:
return results
if results and not isinstance(results[-1], ActionSuccess):
return results
try:
if sequential_click_result := await handle_sequential_click_for_dropdown(
action=action,
action_history=results,
anchor_element=skyvern_element,
dom=dom,
page=page,
scraped_page=scraped_page,
incremental_scraped=incremental_scraped,
task=task,
step=step,
):
results.append(sequential_click_result)
return results
except Exception:
LOG.warning(
"Failed to do sequential logic for the click action, skipping",
exc_info=True,
step_id=step.step_id,
task_id=task.task_id,
element_id=skyvern_element.get_id(),
)
2025-05-19 09:08:55 -07:00
return results
finally:
if incremental_scraped:
await incremental_scraped.stop_listen_dom_increment()
2025-05-19 09:08:55 -07:00
return results
2025-05-19 09:08:55 -07:00
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(ignore_inputs=["anchor_element", "scraped_page", "page", "incremental_scraped", "dom"])
async def handle_sequential_click_for_dropdown(
action: actions.ClickAction,
action_history: list[ActionResult],
anchor_element: SkyvernElement,
dom: DomUtil,
page: Page,
scraped_page: ScrapedPage,
incremental_scraped: IncrementalScrapePage,
task: Task,
step: Step,
) -> ActionResult | None:
if await incremental_scraped.get_incremental_elements_num() == 0:
return None
incremental_elements = await incremental_scraped.get_incremental_element_tree(
clean_and_remove_element_tree_factory(
task=task, step=step, check_filter_funcs=[check_existed_but_not_option_element_in_dom_factory(dom)]
),
)
2025-05-19 09:08:55 -07:00
if len(incremental_elements) == 0:
return None
2025-05-19 09:08:55 -07:00
LOG.info("Detected new element after clicking", action=action)
scraped_page_after_open = await scraped_page.generate_scraped_page_without_screenshots()
new_element_ids = set(scraped_page_after_open.id_to_css_dict.keys()) - set(scraped_page.id_to_css_dict.keys())
dom_after_open = DomUtil(scraped_page=scraped_page_after_open, page=page)
new_interactable_element_ids = [
element_id
for element_id in new_element_ids
if (await dom_after_open.get_skyvern_element_by_id(element_id)).is_interactable()
]
action_history_str = ""
if action_history and len(action_history) > 0:
result = action_history[-1]
action_result = {
"action_type": action.action_type,
"reasoning": action.reasoning,
"result": result.success,
}
action_history_str = json.dumps(action_result)
prompt = load_prompt_with_elements(
element_tree_builder=scraped_page_after_open,
prompt_engine=prompt_engine,
template_name="check-user-goal",
navigation_goal=task.navigation_goal,
navigation_payload=task.navigation_payload,
new_elements_ids=new_element_ids,
without_screenshots=True,
action_history=action_history_str,
local_datetime=datetime.now(skyvern_context.ensure_context().tz_info).isoformat(),
)
response = await app.CHECK_USER_GOAL_LLM_API_HANDLER(
prompt=prompt,
step=step,
prompt_name="check-user-goal",
)
verify_result = CompleteVerifyResult.model_validate(response)
if verify_result.user_goal_achieved:
LOG.info(
"User goal achieved, exiting the sequential click logic",
step_id=step.step_id,
task_id=task.task_id,
)
return None
dropdown_menu_element = await locate_dropdown_menu(
current_anchor_element=anchor_element,
incremental_scraped=incremental_scraped,
step=step,
task=task,
)
2025-05-19 09:08:55 -07:00
if dropdown_menu_element is None:
return None
2025-05-19 09:08:55 -07:00
dropdown_select_context = await _get_input_or_select_context(
action=AbstractActionForContextParse(
reasoning=action.reasoning, intention=action.intention, element_id=action.element_id
),
skyvern_element=anchor_element,
element_tree_builder=scraped_page,
step=step,
)
if dropdown_select_context.is_date_related:
LOG.info(
"The dropdown is date related, exiting the sequential click logic",
step_id=step.step_id,
task_id=task.task_id,
)
return None
LOG.info(
"Found the dropdown menu element after clicking, triggering the sequential click logic",
step_id=step.step_id,
task_id=task.task_id,
element_id=dropdown_menu_element.get_id(),
)
return await select_from_emerging_elements(
current_element_id=anchor_element.get_id(),
options=CustomSelectPromptOptions(
field_information=dropdown_select_context.intention
if dropdown_select_context.intention
else dropdown_select_context.field,
is_date_related=dropdown_select_context.is_date_related,
required_field=dropdown_select_context.is_required,
),
page=page,
scraped_page=scraped_page,
step=step,
task=task,
scraped_page_after_open=scraped_page_after_open,
new_interactable_element_ids=new_interactable_element_ids,
)
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(ignore_inputs=["scraped_page", "page"])
async def handle_click_to_download_file_action(
action: actions.ClickAction,
page: Page,
scraped_page: ScrapedPage,
2024-07-26 18:10:42 +08:00
task: Task,
2024-12-06 02:25:13 +08:00
step: Step,
) -> list[ActionResult]:
dom = DomUtil(scraped_page=scraped_page, page=page)
skyvern_element = await dom.get_skyvern_element_by_id(action.element_id)
locator = skyvern_element.locator
2025-08-18 14:24:18 +08:00
context = skyvern_context.current()
download_dir = Path(
get_download_dir(run_id=context.run_id if context and context.run_id else task.workflow_run_id or task.task_id)
)
2024-12-06 02:25:13 +08:00
list_files_before = list_files_in_directory(download_dir)
if task.browser_session_id:
files_in_browser_session = await app.STORAGE.list_downloaded_files_in_browser_session(
organization_id=task.organization_id, browser_session_id=task.browser_session_id
)
list_files_before = list_files_before + files_in_browser_session
2024-12-06 02:25:13 +08:00
LOG.info(
"Number of files in download directory before click",
num_downloaded_files_before=len(list_files_before),
download_dir=download_dir,
task_id=task.task_id,
step_id=step.step_id,
workflow_run_id=task.workflow_run_id,
)
try:
2025-05-22 22:18:42 -07:00
if not await skyvern_element.navigate_to_a_href(page=page):
await locator.click(timeout=settings.BROWSER_ACTION_TIMEOUT_MS)
await page.wait_for_load_state(timeout=settings.BROWSER_LOADING_TIMEOUT_MS)
except Exception as e:
2024-12-06 02:25:13 +08:00
LOG.exception(
"ClickAction with download failed",
exc_info=True,
action=action,
task_id=task.task_id,
step_id=step.step_id,
workflow_run_id=task.workflow_run_id,
)
return [ActionFailure(e, download_triggered=False)]
try:
LOG.info(
"Checking if there is any new files after click",
download_dir=download_dir,
)
async with asyncio.timeout(BROWSER_DOWNLOAD_MAX_WAIT_TIME):
while True:
list_files_after = list_files_in_directory(download_dir)
if task.browser_session_id:
files_in_browser_session = await app.STORAGE.list_downloaded_files_in_browser_session(
organization_id=task.organization_id, browser_session_id=task.browser_session_id
)
list_files_after = list_files_after + files_in_browser_session
if len(list_files_after) > len(list_files_before):
LOG.info(
"Found new files in download directory after click",
num_downloaded_files_after=len(list_files_after),
download_dir=download_dir,
task_id=task.task_id,
step_id=step.step_id,
workflow_run_id=task.workflow_run_id,
)
break
await asyncio.sleep(1)
2024-12-06 02:25:13 +08:00
except asyncio.TimeoutError:
2024-12-06 02:25:13 +08:00
LOG.warning(
"No file to download after click",
task_id=task.task_id,
step_id=step.step_id,
workflow_run_id=task.workflow_run_id,
)
return [ActionSuccess(download_triggered=False)]
2024-12-06 02:25:13 +08:00
# check if there's any file is still downloading
downloading_files = list_downloading_files_in_directory(download_dir)
if task.browser_session_id:
files_in_browser_session = await app.STORAGE.list_downloading_files_in_browser_session(
organization_id=task.organization_id, browser_session_id=task.browser_session_id
)
downloading_files = downloading_files + files_in_browser_session
2024-12-06 02:25:13 +08:00
if len(downloading_files) == 0:
return [ActionSuccess(download_triggered=True)]
LOG.info(
"File downloading hasn't completed, wait for a while",
downloading_files=downloading_files,
task_id=task.task_id,
step_id=step.step_id,
workflow_run_id=task.workflow_run_id,
)
try:
await wait_for_download_finished(downloading_files=downloading_files)
except DownloadFileMaxWaitingTime as e:
2024-12-06 02:25:13 +08:00
LOG.warning(
"There're several long-time downloading files, these files might be broken",
downloading_files=e.downloading_files,
2024-12-06 02:25:13 +08:00
task_id=task.task_id,
step_id=step.step_id,
workflow_run_id=task.workflow_run_id,
)
return [ActionSuccess(download_triggered=True)]
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(ignore_inputs=["scraped_page", "page"])
async def handle_input_text_action(
action: actions.InputTextAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
2025-04-11 11:18:53 -07:00
if not action.element_id:
# This is a CUA type action
await page.keyboard.type(action.text)
return [ActionSuccess()]
2024-06-18 11:34:52 +08:00
dom = DomUtil(scraped_page, page)
skyvern_element = await dom.get_skyvern_element_by_id(action.element_id)
2024-08-07 18:51:29 +08:00
skyvern_frame = await SkyvernFrame.create_instance(skyvern_element.get_frame())
2024-08-21 10:54:32 +08:00
incremental_scraped = IncrementalScrapePage(skyvern_frame=skyvern_frame)
timeout = settings.BROWSER_ACTION_TIMEOUT_MS
2024-06-18 11:34:52 +08:00
2024-07-10 00:04:03 +08:00
current_text = await get_input_value(skyvern_element.get_tag_name(), skyvern_element.get_locator())
if current_text == action.text:
return [ActionSuccess()]
# before filling text, we need to validate if the element can be filled if it's not one of COMMON_INPUT_TAGS
tag_name = scraped_page.id_to_element_dict[action.element_id]["tagName"].lower()
text: str | None = await get_actual_value_of_parameter_if_secret(task, action.text)
if text is None:
return [ActionFailure(FailedToFetchSecret())]
2025-09-12 11:01:57 -06:00
is_totp_value = (
text == BitwardenConstants.TOTP or text == OnePasswordConstants.TOTP or text == AzureVaultConstants.TOTP
)
is_secret_value = text != action.text
2024-10-25 14:52:02 +08:00
# dynamically validate the attr, since it could change into enabled after the previous actions
if await skyvern_element.is_disabled(dynamic=True):
LOG.warning(
"Try to input text on a disabled element",
action_type=action.action_type,
task_id=task.task_id,
step_id=step.step_id,
element_id=skyvern_element.get_id(),
)
return [ActionFailure(InteractWithDisabledElement(skyvern_element.get_id()))]
2025-01-09 16:14:31 +08:00
select_action = SelectOptionAction(
reasoning=action.reasoning,
element_id=skyvern_element.get_id(),
option=SelectOption(label=text),
2025-02-03 19:19:39 +08:00
intention=action.intention,
2025-01-09 16:14:31 +08:00
)
2025-04-30 13:27:30 +08:00
if await skyvern_element.get_selectable():
2025-01-09 16:14:31 +08:00
LOG.info(
"Input element is selectable, doing select actions",
task_id=task.task_id,
step_id=step.step_id,
element_id=skyvern_element.get_id(),
action=action,
)
return await handle_select_option_action(select_action, page, scraped_page, task, step)
2024-09-04 23:31:47 +08:00
incremental_element: list[dict] = []
auto_complete_hacky_flag: bool = False
input_or_select_context = await _get_input_or_select_context(
action=action,
element_tree_builder=scraped_page,
skyvern_element=skyvern_element,
step=step,
)
2024-08-07 18:51:29 +08:00
# check if it's selectable
if (
not input_or_select_context.is_search_bar # no need to to trigger selection logic for search bar
and skyvern_element.get_tag_name() == InteractiveElement.INPUT
and not await skyvern_element.is_raw_input()
):
2024-08-07 18:51:29 +08:00
await skyvern_element.scroll_into_view()
# press arrowdown to watch if there's any options popping up
await incremental_scraped.start_listen_dom_increment(await skyvern_element.get_element_handler())
2025-01-08 14:27:50 +08:00
try:
await skyvern_element.input_clear()
except Exception:
LOG.info(
"Failed to clear up the input, but continue to input",
task_id=task.task_id,
step_id=step.step_id,
element_id=skyvern_element.get_id(),
)
2024-10-16 19:23:12 +08:00
try:
await skyvern_element.press_key("ArrowDown")
except TimeoutError:
# sometimes we notice `press_key()` raise a timeout but actually the dropdown is opened.
LOG.info(
"Timeout to press ArrowDown to open dropdown, ignore the timeout and continue to execute the action",
task_id=task.task_id,
step_id=step.step_id,
element_id=skyvern_element.get_id(),
action=action,
)
await skyvern_frame.safe_wait_for_animation_end()
2024-08-07 18:51:29 +08:00
incremental_element = await incremental_scraped.get_incremental_element_tree(
2025-04-21 12:44:48 +08:00
clean_and_remove_element_tree_factory(
task=task, step=step, check_filter_funcs=[check_existed_but_not_option_element_in_dom_factory(dom)]
),
2024-08-07 18:51:29 +08:00
)
if len(incremental_element) == 0:
LOG.info(
"No new element detected, indicating it couldn't be a selectable auto-completion input",
task_id=task.task_id,
step_id=step.step_id,
element_id=skyvern_element.get_id(),
action=action,
)
2024-09-13 17:57:36 -07:00
await incremental_scraped.stop_listen_dom_increment()
2024-08-07 18:51:29 +08:00
else:
auto_complete_hacky_flag = True
try_to_quit_dropdown = True
2024-08-07 18:51:29 +08:00
try:
# TODO: we don't select by value for the auto completion detect case
2025-04-28 16:16:36 +08:00
select_result = await sequentially_select_from_dropdown(
2024-08-07 18:51:29 +08:00
action=select_action,
2025-04-28 16:16:36 +08:00
input_or_select_context=input_or_select_context,
2024-08-07 18:51:29 +08:00
page=page,
2024-08-28 14:51:05 +08:00
dom=dom,
2025-01-08 14:27:50 +08:00
skyvern_element=skyvern_element,
2024-08-07 18:51:29 +08:00
skyvern_frame=skyvern_frame,
incremental_scraped=incremental_scraped,
step=step,
task=task,
target_value=text,
2024-08-07 18:51:29 +08:00
)
2025-03-25 03:00:15 -07:00
if select_result is not None:
if select_result.action_result and select_result.action_result.success:
try_to_quit_dropdown = False
return [select_result.action_result]
2025-03-25 03:00:15 -07:00
if select_result.dropdown_menu is None:
try_to_quit_dropdown = False
2025-03-25 03:00:15 -07:00
if select_result.action_result is None:
LOG.info(
"It might not be a selectable auto-completion input, exit the custom selection mode",
task_id=task.task_id,
step_id=step.step_id,
element_id=skyvern_element.get_id(),
action=action,
)
else:
LOG.warning(
"Custom selection returned an error, continue to input text",
task_id=task.task_id,
step_id=step.step_id,
element_id=skyvern_element.get_id(),
action=action,
err_msg=select_result.action_result.exception_message,
)
2024-09-07 09:34:33 +08:00
except Exception:
LOG.warning(
"Failed to do custom selection transformed from input action, continue to input text",
exc_info=True,
task_id=task.task_id,
step_id=step.step_id,
)
2025-01-08 14:27:50 +08:00
await skyvern_element.scroll_into_view()
2024-08-07 18:51:29 +08:00
finally:
if await skyvern_element.is_visible():
blocking_element, exist = await skyvern_element.find_blocking_element(
dom=dom, incremental_page=incremental_scraped
2025-01-08 14:27:50 +08:00
)
if blocking_element and exist:
LOG.info(
"Find a blocking element to the current element, going to blur the blocking element first",
task_id=task.task_id,
step_id=step.step_id,
blocking_element=blocking_element.get_locator(),
)
if await blocking_element.get_locator().count():
await blocking_element.press_key("Escape")
if await blocking_element.get_locator().count():
await blocking_element.blur()
if try_to_quit_dropdown and await skyvern_element.is_visible():
await skyvern_element.press_key("Escape")
await skyvern_element.blur()
2024-08-07 18:51:29 +08:00
await incremental_scraped.stop_listen_dom_increment()
2025-05-08 22:52:12 -07:00
### Start filling text logic
# check if the element has hidden attribute
if await skyvern_element.has_hidden_attr():
return [ActionFailure(InputToInvisibleElement(skyvern_element.get_id()), stop_execution_on_failure=False)]
2024-08-07 18:51:29 +08:00
# force to move focus back to the element
await skyvern_element.get_locator().focus(timeout=timeout)
2025-04-23 01:56:43 +08:00
# check the phone number format when type=tel and the text is not a secret value
if not is_secret_value and await skyvern_element.get_attr("type") == "tel":
try:
text = await check_phone_number_format(
2025-04-23 01:56:43 +08:00
value=text,
action=action,
skyvern_element=skyvern_element,
scraped_page=scraped_page,
task=task,
step=step,
)
except Exception:
LOG.warning(
"Failed to check the phone number format, using the original text",
action=action,
exc_info=True,
)
2025-08-06 23:15:59 +08:00
# TODO: some elements are supported to use `locator.press_sequentially()` to fill in the data
# we need find a better way to detect the attribute in the future
class_name: str | None = await skyvern_element.get_attr("class")
if class_name and "blinking-cursor" in class_name:
if is_totp_value:
text = generate_totp_value(task=task, parameter=action.text)
await skyvern_element.press_fill(text=text)
return [ActionSuccess()]
2024-09-11 11:53:47 +08:00
# `Locator.clear()` on a spin button could cause the cursor moving away, and never be back
2025-08-06 23:15:59 +08:00
# run `Locator.clear()` when:
# 1. the element is not a spin button
# 1.1. the element has a value attribute
# 1.2. the element is not common input tag
if not await skyvern_element.is_spinbtn_input() and (current_text or (tag_name not in COMMON_INPUT_TAGS)):
2024-09-11 11:53:47 +08:00
try:
await skyvern_element.input_clear()
except TimeoutError:
LOG.info("None input tag clear timeout", action=action)
return [ActionFailure(InvalidElementForTextInput(element_id=action.element_id, tag_name=tag_name))]
except Exception:
LOG.warning("Failed to clear the input field", action=action, exc_info=True)
2025-08-06 23:15:59 +08:00
return [ActionFailure(InvalidElementForTextInput(element_id=action.element_id, tag_name=tag_name))]
# wait for blocking element to show up
await skyvern_frame.safe_wait_for_animation_end()
2025-01-08 14:27:50 +08:00
try:
blocking_element, exist = await skyvern_element.find_blocking_element(
dom=dom, incremental_page=incremental_scraped
)
if blocking_element and exist:
LOG.warning(
"Find a blocking element to the current element, going to input on the blocking element",
)
if await blocking_element.is_editable():
skyvern_element = blocking_element
tag_name = blocking_element.get_tag_name()
2025-01-08 14:27:50 +08:00
except Exception:
LOG.info(
2025-06-27 21:26:21 -04:00
"Failed to find the blocking element, continue with the original element",
2025-01-08 14:27:50 +08:00
exc_info=True,
task_id=task.task_id,
step_id=step.step_id,
)
if is_totp_value:
LOG.info("Skipping the auto completion logic since it's a TOTP input")
text = generate_totp_value(task=task, parameter=action.text)
await skyvern_element.input(text)
return [ActionSuccess()]
2024-09-04 23:31:47 +08:00
try:
# TODO: not sure if this case will trigger auto-completion
if tag_name not in COMMON_INPUT_TAGS:
await skyvern_element.input_fill(text)
return [ActionSuccess()]
2024-09-04 23:31:47 +08:00
if len(text) == 0:
return [ActionSuccess()]
2024-08-21 10:54:32 +08:00
2025-08-21 22:11:48 +08:00
if tag_name == InteractiveElement.INPUT and await skyvern_element.get_attr("type") == "date":
try:
text = await check_date_format(
value=text,
action=action,
skyvern_element=skyvern_element,
task=task,
step=step,
)
except Exception:
LOG.warning(
"Failed to check the date format, using the original text to fill in the date input",
text=text,
action=action,
exc_info=True,
)
2025-08-21 22:11:48 +08:00
await skyvern_element.input_fill(text=text)
return [ActionSuccess()]
2024-11-20 10:32:24 +08:00
if not await skyvern_element.is_raw_input():
if await skyvern_element.is_auto_completion_input() or input_or_select_context.is_location_input:
if result := await input_or_auto_complete_input(
input_or_select_context=input_or_select_context,
scraped_page=scraped_page,
2024-11-20 10:32:24 +08:00
page=page,
dom=dom,
text=text,
skyvern_element=skyvern_element,
step=step,
task=task,
):
2025-01-09 16:14:31 +08:00
auto_complete_hacky_flag = False
2024-11-20 10:32:24 +08:00
return [result]
2024-08-21 10:54:32 +08:00
await incremental_scraped.start_listen_dom_increment(await skyvern_element.get_element_handler())
try:
await skyvern_element.input_sequentially(text=text)
finally:
incremental_element = await incremental_scraped.get_incremental_element_tree(
2025-04-21 12:44:48 +08:00
clean_and_remove_element_tree_factory(
task=task, step=step, check_filter_funcs=[check_existed_but_not_option_element_in_dom_factory(dom)]
),
)
if len(incremental_element) > 0:
auto_complete_hacky_flag = True
await incremental_scraped.stop_listen_dom_increment()
2024-09-04 23:31:47 +08:00
return [ActionSuccess()]
2025-01-14 13:08:35 +08:00
except Exception as e:
LOG.exception(
"Failed to input the value or finish the auto completion",
task_id=task.task_id,
step_id=step.step_id,
)
raise e
2024-09-04 23:31:47 +08:00
finally:
# HACK: force to finish missing auto completion input
2025-01-14 13:08:35 +08:00
if auto_complete_hacky_flag and await skyvern_element.is_visible() and not await skyvern_element.is_raw_input():
2024-09-04 23:31:47 +08:00
LOG.debug(
"Trigger input-selection hack, pressing Tab to choose one",
action=action,
2025-01-14 13:08:35 +08:00
task_id=task.task_id,
step_id=step.step_id,
2024-09-04 23:31:47 +08:00
)
await skyvern_element.press_key("Tab")
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(ignore_inputs=["scraped_page", "page"])
async def handle_upload_file_action(
action: actions.UploadFileAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
if not action.file_url:
LOG.warning("InputFileAction has no file_url", action=action)
return [ActionFailure(MissingFileUrl())]
# ************************************************************************************************************** #
# After this point if the file_url is a secret, it will be replaced with the actual value
# In order to make sure we don't log the secret value, we log the action with the original value action.file_url
# ************************************************************************************************************** #
file_url = await get_actual_value_of_parameter_if_secret(task, action.file_url)
decoded_url = urllib.parse.unquote(file_url)
2024-11-21 22:28:32 +08:00
if (
file_url not in str(task.navigation_payload)
and file_url not in str(task.navigation_goal)
and decoded_url not in str(task.navigation_payload)
and decoded_url not in str(task.navigation_goal)
):
LOG.warning(
"LLM might be imagining the file url, which is not in navigation payload",
action=action,
file_url=action.file_url,
)
return [ActionFailure(ImaginaryFileUrl(action.file_url))]
dom = DomUtil(scraped_page=scraped_page, page=page)
skyvern_element = await dom.get_skyvern_element_by_id(action.element_id)
2024-10-25 14:52:02 +08:00
# dynamically validate the attr, since it could change into enabled after the previous actions
if await skyvern_element.is_disabled(dynamic=True):
LOG.warning(
"Try to upload file on a disabled element",
action_type=action.action_type,
task_id=task.task_id,
step_id=step.step_id,
element_id=skyvern_element.get_id(),
)
return [ActionFailure(InteractWithDisabledElement(skyvern_element.get_id()))]
locator = skyvern_element.locator
2024-03-31 01:58:11 -07:00
file_path = await download_file(file_url)
is_file_input = await skyvern_element.is_file_input()
if is_file_input:
LOG.info("Taking UploadFileAction. Found file input tag", action=action)
if file_path:
await locator.set_input_files(
file_path,
timeout=settings.BROWSER_ACTION_TIMEOUT_MS,
)
# Sleep for 10 seconds after uploading a file to let the page process it
await asyncio.sleep(10)
return [ActionSuccess()]
else:
return [ActionFailure(Exception(f"Failed to download file from {action.file_url}"))]
else:
LOG.info("Taking UploadFileAction. Found non file input tag", action=action)
# treat it as a click action
action.is_upload_file_tag = False
return await chain_click(
task,
scraped_page,
page,
action,
skyvern_element,
timeout=settings.BROWSER_ACTION_TIMEOUT_MS,
)
2024-11-10 16:24:13 -08:00
# This function is deprecated. Downloads are handled by the click action handler now.
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(ignore_inputs=["scraped_page", "page"])
2024-04-04 19:09:19 -07:00
async def handle_download_file_action(
action: actions.DownloadFileAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
2024-04-04 19:09:19 -07:00
) -> list[ActionResult]:
dom = DomUtil(scraped_page=scraped_page, page=page)
skyvern_element = await dom.get_skyvern_element_by_id(action.element_id)
2024-04-04 19:09:19 -07:00
file_name = f"{action.file_name or uuid.uuid4()}"
full_file_path = f"{REPO_ROOT_DIR}/downloads/{task.workflow_run_id or task.task_id}/{file_name}"
2024-04-04 19:09:19 -07:00
try:
# Start waiting for the download
async with page.expect_download() as download_info:
await asyncio.sleep(0.3)
locator = skyvern_element.locator
await locator.click(
timeout=settings.BROWSER_ACTION_TIMEOUT_MS,
modifiers=["Alt"],
)
2024-04-04 19:09:19 -07:00
download = await download_info.value
# Create download folders if they don't exist
download_folder = f"{REPO_ROOT_DIR}/downloads/{task.workflow_run_id or task.task_id}"
2024-04-04 19:09:19 -07:00
os.makedirs(download_folder, exist_ok=True)
# Wait for the download process to complete and save the downloaded file
await download.save_as(full_file_path)
except Exception as e:
LOG.exception(
"DownloadFileAction: Failed to download file",
action=action,
full_file_path=full_file_path,
2024-04-04 19:09:19 -07:00
)
return [ActionFailure(e)]
return [ActionSuccess(data={"file_path": full_file_path})]
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(ignore_inputs=["scraped_page", "page"])
async def handle_null_action(
action: actions.NullAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
return [ActionSuccess()]
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(ignore_inputs=["scraped_page", "page"])
async def handle_select_option_action(
action: actions.SelectOptionAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
2024-06-18 11:34:52 +08:00
dom = DomUtil(scraped_page, page)
skyvern_element = await dom.get_skyvern_element_by_id(action.element_id)
tag_name = skyvern_element.get_tag_name()
element_dict = scraped_page.id_to_element_dict[action.element_id]
LOG.info(
"SelectOptionAction",
action=action,
tag_name=tag_name,
element_dict=element_dict,
)
2024-10-16 19:23:12 +08:00
# Handle the edge case:
# Sometimes our custom select logic could fail, and leaving the dropdown being opened.
# Confirm if the select action is on the custom option element
if await skyvern_element.is_custom_option():
click_action = ClickAction(element_id=action.element_id)
return await chain_click(task, scraped_page, page, click_action, skyvern_element)
2024-07-09 02:22:16 +08:00
if not await skyvern_element.is_selectable():
# 1. find from children
2025-06-27 21:26:21 -04:00
# TODO: 2. find from siblings and their children
LOG.info(
2025-06-27 21:26:21 -04:00
"Element is not selectable, try to find the selectable element in the children",
2024-07-09 02:22:16 +08:00
tag_name=tag_name,
action=action,
)
2024-04-26 17:51:50 +08:00
2024-07-09 02:22:16 +08:00
selectable_child: SkyvernElement | None = None
try:
selectable_child = await skyvern_element.find_selectable_child(dom=dom)
except Exception as e:
LOG.error(
2025-06-27 21:26:21 -04:00
"Failed to find selectable element in children",
2024-07-09 02:22:16 +08:00
exc_info=True,
tag_name=tag_name,
2024-04-26 17:51:50 +08:00
action=action,
)
2024-07-09 02:22:16 +08:00
return [ActionFailure(ErrFoundSelectableElement(action.element_id, e))]
2024-04-26 17:51:50 +08:00
2024-08-06 13:30:52 +08:00
if selectable_child:
LOG.info(
"Found selectable element in the children",
tag_name=selectable_child.get_tag_name(),
element_id=selectable_child.get_id(),
2024-05-15 02:15:18 +08:00
)
select_action = SelectOptionAction(
reasoning=action.reasoning,
element_id=selectable_child.get_id(),
option=action.option,
2025-02-03 19:19:39 +08:00
intention=action.intention,
)
2025-01-10 01:44:20 +08:00
action = select_action
skyvern_element = selectable_child
2024-07-09 02:22:16 +08:00
2024-10-25 14:52:02 +08:00
# dynamically validate the attr, since it could change into enabled after the previous actions
if await skyvern_element.is_disabled(dynamic=True):
LOG.warning(
"Try to select on a disabled element",
action_type=action.action_type,
task_id=task.task_id,
step_id=step.step_id,
element_id=skyvern_element.get_id(),
)
return [ActionFailure(InteractWithDisabledElement(skyvern_element.get_id()))]
2025-01-10 01:44:20 +08:00
if skyvern_element.get_tag_name() == InteractiveElement.SELECT:
2024-07-09 02:22:16 +08:00
LOG.info(
2024-08-06 13:30:52 +08:00
"SelectOptionAction is on <select>",
action=action,
2024-10-28 23:52:26 +08:00
task_id=task.task_id,
step_id=step.step_id,
2024-07-09 02:22:16 +08:00
)
2025-01-10 01:44:20 +08:00
try:
blocking_element, exist = await skyvern_element.find_blocking_element(dom=dom)
except Exception:
LOG.warning(
2025-06-27 21:26:21 -04:00
"Failed to find the blocking element, continue to select on the original <select>",
2025-01-10 01:44:20 +08:00
task_id=task.task_id,
step_id=step.step_id,
exc_info=True,
)
return await normal_select(
action=action, skyvern_element=skyvern_element, builder=dom.scraped_page, task=task, step=step
)
2025-01-10 01:44:20 +08:00
if not exist:
return await normal_select(
action=action, skyvern_element=skyvern_element, builder=dom.scraped_page, task=task, step=step
)
if blocking_element is None:
LOG.info(
"Try to scroll the element into view, then detecting the blocking element",
step_id=step.step_id,
)
try:
await skyvern_element.scroll_into_view()
blocking_element, exist = await skyvern_element.find_blocking_element(dom=dom)
except Exception:
LOG.warning(
"Failed to find the blocking element when scrolling into view, fallback to normal select",
action=action,
step_id=step.step_id,
exc_info=True,
)
return await normal_select(
action=action, skyvern_element=skyvern_element, builder=dom.scraped_page, task=task, step=step
)
2025-01-10 01:44:20 +08:00
if not exist or blocking_element is None:
return await normal_select(
action=action, skyvern_element=skyvern_element, builder=dom.scraped_page, task=task, step=step
)
2025-01-10 01:44:20 +08:00
LOG.info(
"<select> is blocked by another element, going to select on the blocking element",
task_id=task.task_id,
step_id=step.step_id,
blocking_element=blocking_element.get_id(),
)
select_action = SelectOptionAction(
reasoning=action.reasoning,
element_id=blocking_element.get_id(),
option=action.option,
2025-02-03 19:19:39 +08:00
intention=action.intention,
2025-01-10 01:44:20 +08:00
)
action = select_action
skyvern_element = blocking_element
2024-05-15 02:15:18 +08:00
2024-08-06 13:30:52 +08:00
if await skyvern_element.is_checkbox():
2024-07-09 02:22:16 +08:00
LOG.info(
2024-08-06 13:30:52 +08:00
"SelectOptionAction is on <input> checkbox",
2024-07-09 02:22:16 +08:00
action=action,
2024-10-28 23:52:26 +08:00
task_id=task.task_id,
step_id=step.step_id,
2024-07-09 02:22:16 +08:00
)
2024-08-06 13:30:52 +08:00
check_action = CheckboxAction(element_id=action.element_id, is_checked=True)
return await handle_checkbox_action(check_action, page, scraped_page, task, step)
if await skyvern_element.is_radio():
LOG.info(
2024-08-06 13:30:52 +08:00
"SelectOptionAction is on <input> radio",
action=action,
2024-10-28 23:52:26 +08:00
task_id=task.task_id,
step_id=step.step_id,
)
click_action = ClickAction(element_id=action.element_id)
return await chain_click(task, scraped_page, page, click_action, skyvern_element)
# FIXME: maybe there's a case where <input type="button"> could trigger dropdown menu?
if await skyvern_element.is_btn_input():
LOG.info(
"SelectOptionAction is on <input> button",
action=action,
task_id=task.task_id,
step_id=step.step_id,
)
2024-08-06 13:30:52 +08:00
click_action = ClickAction(element_id=action.element_id)
return await chain_click(task, scraped_page, page, click_action, skyvern_element)
LOG.info(
"Trigger custom select",
action=action,
2025-01-10 01:44:20 +08:00
element_id=skyvern_element.get_id(),
2024-08-06 13:30:52 +08:00
)
timeout = settings.BROWSER_ACTION_TIMEOUT_MS
2024-08-06 13:30:52 +08:00
skyvern_frame = await SkyvernFrame.create_instance(skyvern_element.get_frame())
incremental_scraped = IncrementalScrapePage(skyvern_frame=skyvern_frame)
is_open = False
suggested_value: str | None = None
results: list[ActionResult] = []
2024-08-06 13:30:52 +08:00
try:
await incremental_scraped.start_listen_dom_increment(await skyvern_element.get_element_handler())
2024-09-24 12:44:06 +08:00
await skyvern_element.scroll_into_view()
2024-08-06 13:30:52 +08:00
await skyvern_element.click(page=page, dom=dom, timeout=timeout)
# wait for options to load
await skyvern_frame.safe_wait_for_animation_end(before_wait_sec=0.5)
2024-08-06 13:30:52 +08:00
incremental_element = await incremental_scraped.get_incremental_element_tree(
2025-04-21 12:44:48 +08:00
clean_and_remove_element_tree_factory(
task=task, step=step, check_filter_funcs=[check_existed_but_not_option_element_in_dom_factory(dom)]
),
2024-07-27 01:32:35 +08:00
)
if len(incremental_element) == 0 and skyvern_element.get_tag_name() == InteractiveElement.INPUT:
LOG.info(
"No incremental elements detected for the input element, trying to press Arrowdown to trigger the dropdown",
element_id=skyvern_element.get_id(),
task_id=task.task_id,
step_id=step.step_id,
)
await skyvern_element.scroll_into_view()
await skyvern_element.press_key("ArrowDown")
# wait for options to load
await skyvern_frame.safe_wait_for_animation_end(before_wait_sec=0.5)
incremental_element = await incremental_scraped.get_incremental_element_tree(
2025-04-21 12:44:48 +08:00
clean_and_remove_element_tree_factory(
task=task, step=step, check_filter_funcs=[check_existed_but_not_option_element_in_dom_factory(dom)]
),
)
2025-04-28 16:16:36 +08:00
input_or_select_context = await _get_input_or_select_context(
action=action, element_tree_builder=scraped_page, step=step, skyvern_element=skyvern_element
2025-04-28 16:16:36 +08:00
)
2024-08-06 13:30:52 +08:00
if len(incremental_element) == 0:
2025-05-19 09:08:55 -07:00
LOG.info(
"No incremental elements detected by MutationObserver, using re-scraping the page to find the match element"
)
2025-04-28 16:16:36 +08:00
results.append(
await select_from_emerging_elements(
2025-05-19 09:08:55 -07:00
current_element_id=skyvern_element.get_id(),
options=CustomSelectPromptOptions(
is_date_related=input_or_select_context.is_date_related or False,
field_information=input_or_select_context.intention or input_or_select_context.field or "",
required_field=input_or_select_context.is_required or False,
target_value=action.option.label or action.option.value or "",
),
2025-04-28 16:16:36 +08:00
page=page,
scraped_page=scraped_page,
task=task,
step=step,
)
)
return results
2024-07-09 02:22:16 +08:00
is_open = True
2024-09-09 11:34:09 +08:00
# TODO: support sequetially select from dropdown by value, just support single select now
result = await sequentially_select_from_dropdown(
2024-08-07 18:51:29 +08:00
action=action,
2025-04-28 16:16:36 +08:00
input_or_select_context=input_or_select_context,
2024-08-07 18:51:29 +08:00
page=page,
2024-08-28 14:51:05 +08:00
dom=dom,
2025-01-08 14:27:50 +08:00
skyvern_element=skyvern_element,
2024-08-07 18:51:29 +08:00
skyvern_frame=skyvern_frame,
2024-08-06 13:30:52 +08:00
incremental_scraped=incremental_scraped,
step=step,
task=task,
2024-08-07 18:51:29 +08:00
force_select=True,
target_value=action.option.label or action.option.value or "",
2024-08-06 13:30:52 +08:00
)
2024-08-07 18:51:29 +08:00
# force_select won't return None result
assert result is not None
assert result.action_result is not None
results.append(result.action_result)
if isinstance(result.action_result, ActionSuccess) or result.value is None:
return results
suggested_value = result.value
2024-08-06 13:30:52 +08:00
except Exception as e:
LOG.exception("Custom select error")
results.append(ActionFailure(exception=e))
return results
finally:
if (
await skyvern_element.is_visible()
and is_open
and len(results) > 0
and not isinstance(results[-1], ActionSuccess)
):
2024-08-06 13:30:52 +08:00
await skyvern_element.scroll_into_view()
await skyvern_element.coordinate_click(page=page)
await skyvern_element.press_key("Escape")
is_open = False
await skyvern_element.blur()
await incremental_scraped.stop_listen_dom_increment()
LOG.info(
"Try to select by value in custom select",
element_id=skyvern_element.get_id(),
value=suggested_value,
task_id=task.task_id,
step_id=step.step_id,
)
try:
await incremental_scraped.start_listen_dom_increment(await skyvern_element.get_element_handler())
timeout = settings.BROWSER_ACTION_TIMEOUT_MS
2024-09-24 12:44:06 +08:00
await skyvern_element.scroll_into_view()
2024-09-24 12:44:06 +08:00
try:
await skyvern_element.get_locator().click(timeout=timeout)
except Exception:
LOG.info(
2024-09-24 12:44:06 +08:00
"fail to open dropdown by clicking, try to press arrow down to open",
element_id=skyvern_element.get_id(),
task_id=task.task_id,
step_id=step.step_id,
)
2024-09-24 12:44:06 +08:00
await skyvern_element.scroll_into_view()
await skyvern_element.press_key("ArrowDown")
await skyvern_frame.safe_wait_for_animation_end(before_wait_sec=0.5)
2024-09-24 12:44:06 +08:00
is_open = True
result = await select_from_dropdown_by_value(
value=suggested_value,
page=page,
dom=dom,
2025-01-08 14:27:50 +08:00
skyvern_element=skyvern_element,
skyvern_frame=skyvern_frame,
incremental_scraped=incremental_scraped,
task=task,
step=step,
)
results.append(result)
return results
except Exception as e:
LOG.exception("Custom select by value error")
results.append(ActionFailure(exception=e))
return results
2024-08-06 13:30:52 +08:00
finally:
if (
await skyvern_element.is_visible()
and is_open
and len(results) > 0
and not isinstance(results[-1], ActionSuccess)
):
await skyvern_element.scroll_into_view()
await skyvern_element.coordinate_click(page=page)
await skyvern_element.press_key("Escape")
is_open = False
await skyvern_element.blur()
2024-08-06 13:30:52 +08:00
await incremental_scraped.stop_listen_dom_increment()
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(ignore_inputs=["scraped_page", "page"])
async def handle_checkbox_action(
action: actions.CheckboxAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
"""
******* NOT REGISTERED *******
This action causes more harm than it does good.
It frequently mis-behaves, or gets stuck in click loops.
Treating checkbox actions as click actions seem to perform way more reliably
Developers who tried this and failed: 2 (Suchintan and Shu 😂)
"""
dom = DomUtil(scraped_page=scraped_page, page=page)
skyvern_element = await dom.get_skyvern_element_by_id(action.element_id)
locator = skyvern_element.locator
if action.is_checked:
await locator.check(timeout=settings.BROWSER_ACTION_TIMEOUT_MS)
else:
await locator.uncheck(timeout=settings.BROWSER_ACTION_TIMEOUT_MS)
# TODO (suchintan): Why does checking the label work, but not the actual input element?
return [ActionSuccess()]
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(ignore_inputs=["scraped_page", "page"])
async def handle_wait_action(
action: actions.WaitAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
2025-04-11 11:18:53 -07:00
await asyncio.sleep(action.seconds)
return [ActionFailure(exception=Exception("Wait action is treated as a failure"))]
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(ignore_inputs=["scraped_page", "page"])
async def handle_terminate_action(
action: actions.TerminateAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
if task.error_code_mapping:
action.errors = await extract_user_defined_errors(
task=task, step=step, scraped_page=scraped_page, reasoning=action.reasoning
)
return [ActionSuccess()]
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(ignore_inputs=["scraped_page", "page"])
async def handle_complete_action(
action: actions.CompleteAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
if not action.verified and task.navigation_goal:
LOG.info(
"CompleteAction hasn't been verified, going to verify the user goal",
task_id=task.task_id,
step_id=step.step_id,
workflow_run_id=task.workflow_run_id,
)
try:
verification_result = await app.agent.complete_verify(page, scraped_page, task, step)
except Exception as e:
LOG.exception(
"Failed to verify the complete action",
task_id=task.task_id,
step_id=step.step_id,
workflow_run_id=task.workflow_run_id,
)
return [ActionFailure(exception=e)]
if not verification_result.user_goal_achieved:
return [ActionFailure(exception=IllegitComplete(data={"error": verification_result.thoughts}))]
LOG.info(
"CompleteAction has been verified successfully",
task_id=task.task_id,
step_id=step.step_id,
workflow_run_id=task.workflow_run_id,
)
action.verified = True
2025-04-14 13:56:13 -07:00
if not task.data_extraction_goal and verification_result.thoughts:
await app.DATABASE.update_task(
task.task_id,
organization_id=task.organization_id,
extracted_information=verification_result.thoughts,
)
2025-01-24 16:21:26 +08:00
return [ActionSuccess()]
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(ignore_inputs=["scraped_page", "page"])
2025-01-24 16:21:26 +08:00
async def handle_extract_action(
action: actions.ExtractAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
extracted_data = None
2025-01-24 16:21:26 +08:00
if task.data_extraction_goal:
scrape_action_result = await extract_information_for_navigation_goal(
scraped_page=scraped_page,
task=task,
step=step,
)
extracted_data = scrape_action_result.scraped_data
2025-01-24 16:21:26 +08:00
return [ActionSuccess(data=extracted_data)]
else:
LOG.warning("No data extraction goal, skipping extract action", step_id=step.step_id)
return [ActionFailure(exception=Exception("No data extraction goal"))]
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(ignore_inputs=["scraped_page", "page"])
2025-04-11 11:18:53 -07:00
async def handle_scroll_action(
action: actions.ScrollAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
2025-04-30 18:42:44 +08:00
if action.x and action.y:
await page.mouse.move(action.x, action.y)
2025-04-11 11:18:53 -07:00
await page.evaluate(f"window.scrollBy({action.scroll_x}, {action.scroll_y})")
return [ActionSuccess()]
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(ignore_inputs=["scraped_page", "page"])
2025-04-11 11:18:53 -07:00
async def handle_keypress_action(
action: actions.KeypressAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
2025-08-10 13:16:46 -07:00
await handler_utils.keypress(page, action.keys, hold=action.hold, duration=action.duration)
2025-04-11 11:18:53 -07:00
return [ActionSuccess()]
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(ignore_inputs=["scraped_page", "page"])
2025-04-13 00:22:46 -07:00
async def handle_move_action(
action: actions.MoveAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
await page.mouse.move(action.x, action.y)
return [ActionSuccess()]
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(ignore_inputs=["scraped_page", "page"])
async def handle_drag_action(
action: actions.DragAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
2025-08-10 13:16:46 -07:00
await handler_utils.drag(page, action.start_x, action.start_y, action.path)
return [ActionSuccess()]
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(ignore_inputs=["scraped_page", "page"])
async def handle_verification_code_action(
action: actions.VerificationCodeAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
LOG.info(
"Setting verification code in skyvern context",
task_id=task.task_id,
step_id=step.step_id,
verification_code=action.verification_code,
)
current_context = skyvern_context.ensure_context()
current_context.totp_codes[task.task_id] = action.verification_code
return [ActionSuccess()]
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(ignore_inputs=["scraped_page", "page"])
2025-04-30 18:42:44 +08:00
async def handle_left_mouse_action(
action: actions.LeftMouseAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
2025-08-10 13:16:46 -07:00
await handler_utils.left_mouse(page, action.x, action.y, action.direction)
2025-04-30 18:42:44 +08:00
return [ActionSuccess()]
ActionHandler.register_action_type(ActionType.SOLVE_CAPTCHA, handle_solve_captcha_action)
ActionHandler.register_action_type(ActionType.CLICK, handle_click_action)
ActionHandler.register_action_type(ActionType.INPUT_TEXT, handle_input_text_action)
ActionHandler.register_action_type(ActionType.UPLOAD_FILE, handle_upload_file_action)
# ActionHandler.register_action_type(ActionType.DOWNLOAD_FILE, handle_download_file_action)
ActionHandler.register_action_type(ActionType.NULL_ACTION, handle_null_action)
ActionHandler.register_action_type(ActionType.SELECT_OPTION, handle_select_option_action)
ActionHandler.register_action_type(ActionType.WAIT, handle_wait_action)
ActionHandler.register_action_type(ActionType.TERMINATE, handle_terminate_action)
ActionHandler.register_action_type(ActionType.COMPLETE, handle_complete_action)
2025-01-24 16:21:26 +08:00
ActionHandler.register_action_type(ActionType.EXTRACT, handle_extract_action)
2025-04-11 11:18:53 -07:00
ActionHandler.register_action_type(ActionType.SCROLL, handle_scroll_action)
ActionHandler.register_action_type(ActionType.KEYPRESS, handle_keypress_action)
2025-04-13 00:22:46 -07:00
ActionHandler.register_action_type(ActionType.MOVE, handle_move_action)
ActionHandler.register_action_type(ActionType.DRAG, handle_drag_action)
ActionHandler.register_action_type(ActionType.VERIFICATION_CODE, handle_verification_code_action)
2025-04-30 18:42:44 +08:00
ActionHandler.register_action_type(ActionType.LEFT_MOUSE, handle_left_mouse_action)
async def get_actual_value_of_parameter_if_secret(task: Task, parameter: str) -> Any:
"""
Get the actual value of a parameter if it's a secret. If it's not a secret, return the parameter value as is.
Just return the parameter value if the task isn't a workflow's task.
This is only used for InputTextAction, UploadFileAction, and ClickAction (if it has a file_url).
"""
if task.workflow_run_id is None:
return parameter
workflow_run_context = app.WORKFLOW_CONTEXT_MANAGER.get_workflow_run_context(task.workflow_run_id)
secret_value = workflow_run_context.get_original_secret_value_or_none(parameter)
return secret_value if secret_value is not None else parameter
def generate_totp_value(task: Task, parameter: str) -> str:
if task.workflow_run_id is None:
return parameter
workflow_run_context = app.WORKFLOW_CONTEXT_MANAGER.get_workflow_run_context(task.workflow_run_id)
totp_secret_key = workflow_run_context.totp_secret_value_key(parameter)
totp_secret = workflow_run_context.get_original_secret_value_or_none(totp_secret_key)
2025-08-07 14:59:29 +08:00
if not totp_secret:
LOG.warning("No TOTP secret found, returning the parameter value as is", parameter=parameter)
return parameter
return pyotp.TOTP(totp_secret).now()
async def chain_click(
task: Task,
scraped_page: ScrapedPage,
page: Page,
action: ClickAction | UploadFileAction,
skyvern_element: SkyvernElement,
timeout: int = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> List[ActionResult]:
# Add a defensive page handler here in case a click action opens a file chooser.
# This automatically dismisses the dialog
# File choosers are impossible to close if you don't expect one. Instead of dealing with it, close it!
2024-11-14 21:33:19 +08:00
dom = DomUtil(scraped_page=scraped_page, page=page)
locator = skyvern_element.locator
# TODO (suchintan): This should likely result in an ActionFailure -- we can figure out how to do this later!
LOG.info("Chain click starts", action=action, locator=locator)
file: list[str] | str = []
if action.file_url:
file_url = await get_actual_value_of_parameter_if_secret(task, action.file_url)
file = await handler_utils.download_file(file_url, action.model_dump())
2024-07-11 02:45:13 +08:00
is_filechooser_trigger = False
async def fc_func(fc: FileChooser) -> None:
nonlocal is_filechooser_trigger
is_filechooser_trigger = True
2024-08-09 18:53:31 +08:00
await fc.set_files(files=file)
2024-07-11 02:45:13 +08:00
page.on("filechooser", fc_func)
LOG.info("Registered file chooser listener", action=action, path=file)
"""
Clicks on an element identified by the css and its parent if failed.
:param css: css of the element to click
"""
try:
2025-05-22 22:18:42 -07:00
if not await skyvern_element.navigate_to_a_href(page=page):
await locator.click(timeout=timeout)
LOG.info("Chain click: main element click succeeded", action=action, locator=locator)
return [ActionSuccess()]
2024-11-14 02:33:44 +08:00
except Exception as e:
action_results: list[ActionResult] = [ActionFailure(FailToClick(action.element_id, msg=str(e)))]
2024-10-22 02:21:05 +08:00
if skyvern_element.get_tag_name() == "label":
try:
2024-11-14 21:33:19 +08:00
LOG.info(
"Chain click: it's a label element. going to try for-click",
task_id=task.task_id,
action=action,
element=str(skyvern_element),
locator=locator,
)
if bound_element := await skyvern_element.find_label_for(dom=dom):
2024-10-22 02:21:05 +08:00
await bound_element.get_locator().click(timeout=timeout)
action_results.append(ActionSuccess())
return action_results
2024-11-14 02:33:44 +08:00
except Exception as e:
action_results.append(ActionFailure(FailToClick(action.element_id, anchor="for", msg=str(e))))
2024-11-14 21:33:19 +08:00
try:
2025-06-27 21:26:21 -04:00
# sometimes the element is the direct children of the label, instead of using for="xx" attribute
2024-11-14 21:33:19 +08:00
# since it's a click action, the target element we're searching should only be INPUT
LOG.info(
2025-06-27 21:26:21 -04:00
"Chain click: it's a label element. going to check for input of the direct children",
2024-11-14 21:33:19 +08:00
task_id=task.task_id,
action=action,
element=str(skyvern_element),
locator=locator,
)
if bound_element := await skyvern_element.find_element_in_label_children(
dom=dom, element_type=InteractiveElement.INPUT
):
await bound_element.get_locator().click(timeout=timeout)
action_results.append(ActionSuccess())
return action_results
except Exception as e:
action_results.append(
ActionFailure(FailToClick(action.element_id, anchor="direct_children", msg=str(e)))
)
2024-11-14 02:33:44 +08:00
else:
try:
2024-11-14 21:33:19 +08:00
LOG.info(
"Chain click: it's a non-label element. going to find the bound label element by attribute id and click",
task_id=task.task_id,
action=action,
element=str(skyvern_element),
locator=locator,
)
2024-11-14 02:33:44 +08:00
if bound_locator := await skyvern_element.find_bound_label_by_attr_id():
await bound_locator.click(timeout=timeout)
action_results.append(ActionSuccess())
return action_results
except Exception as e:
action_results.append(ActionFailure(FailToClick(action.element_id, anchor="attr_id", msg=str(e))))
2024-11-14 21:33:19 +08:00
try:
2025-06-27 21:26:21 -04:00
# sometimes the element is the direct children of the label, instead of using for="xx" attribute
2024-11-14 21:33:19 +08:00
# so we check the direct parent if it's a label element
LOG.info(
"Chain click: it's a non-label element. going to find the bound label element by direct parent",
task_id=task.task_id,
action=action,
element=str(skyvern_element),
locator=locator,
)
if bound_locator := await skyvern_element.find_bound_label_by_direct_parent():
await bound_locator.click(timeout=timeout)
action_results.append(ActionSuccess())
return action_results
except Exception as e:
action_results.append(ActionFailure(FailToClick(action.element_id, anchor="direct_parent", msg=str(e))))
2024-11-14 02:33:44 +08:00
if not await skyvern_element.is_visible():
LOG.info(
"Chain click: exit since the element is not visible on the page anymore",
2024-11-14 21:33:19 +08:00
task_id=task.task_id,
2024-11-14 02:33:44 +08:00
action=action,
element=str(skyvern_element),
locator=locator,
)
return action_results
2024-11-27 22:44:05 +08:00
blocking_element, blocked = await skyvern_element.find_blocking_element(
2024-11-14 02:33:44 +08:00
dom=DomUtil(scraped_page=scraped_page, page=page)
)
if blocking_element is None:
2024-11-27 22:44:05 +08:00
if not blocked:
LOG.info(
"Chain click: exit since the element is not blocking by any element",
task_id=task.task_id,
action=action,
element=str(skyvern_element),
locator=locator,
)
return action_results
try:
LOG.info(
"Chain click: element is blocked by an non-interactable element, try to click by the coordinates",
task_id=task.task_id,
action=action,
element=str(skyvern_element),
locator=locator,
)
await skyvern_element.coordinate_click(page=page)
action_results.append(ActionSuccess())
return action_results
except Exception as e:
action_results.append(
ActionFailure(FailToClick(action.element_id, anchor="coordinate_click", msg=str(e)))
)
LOG.info(
2024-11-27 22:44:05 +08:00
"Chain click: element is blocked by an non-interactable element, going to use javascript click instead of playwright click",
2024-11-14 21:33:19 +08:00
task_id=task.task_id,
action=action,
2024-11-14 02:33:44 +08:00
element=str(skyvern_element),
locator=locator,
)
2024-11-27 22:44:05 +08:00
try:
await skyvern_element.click_in_javascript()
action_results.append(ActionSuccess())
return action_results
except Exception as e:
action_results.append(ActionFailure(FailToClick(action.element_id, anchor="self_js", msg=str(e))))
return action_results
2024-11-14 02:33:44 +08:00
try:
LOG.debug(
"Chain click: verifying the blocking element is parent or sibling of the target element",
2024-11-14 21:33:19 +08:00
task_id=task.task_id,
action=action,
2024-11-14 02:33:44 +08:00
element=str(blocking_element),
locator=locator,
)
2024-11-14 02:33:44 +08:00
if await blocking_element.is_parent_of(
await skyvern_element.get_element_handler()
) or await blocking_element.is_sibling_of(await skyvern_element.get_element_handler()):
LOG.info(
"Chain click: element is blocked by other elements, going to click on the blocking element",
2024-11-14 21:33:19 +08:00
task_id=task.task_id,
2024-11-14 02:33:44 +08:00
action=action,
element=str(blocking_element),
locator=locator,
)
2024-11-14 02:33:44 +08:00
await blocking_element.get_locator().click(timeout=timeout)
action_results.append(ActionSuccess())
return action_results
except Exception as e:
action_results.append(ActionFailure(FailToClick(action.element_id, anchor="blocking_element", msg=str(e))))
return action_results
finally:
LOG.info("Remove file chooser listener", action=action)
2024-08-09 18:53:31 +08:00
# Sleep for 15 seconds after uploading a file to let the page process it
# Removing this breaks file uploads using the filechooser
# KEREM DO NOT REMOVE
if file:
2024-08-09 18:53:31 +08:00
await asyncio.sleep(15)
page.remove_listener("filechooser", fc_func)
2024-07-11 02:45:13 +08:00
if action.file_url and not is_filechooser_trigger:
LOG.warning(
"Action has file_url, but filechoose even hasn't been triggered. Upload file attempt seems to fail",
action=action,
)
return [ActionFailure(WrongElementToUploadFile(action.element_id))]
2024-06-25 01:46:54 +08:00
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(ignore_inputs=["context", "page", "dom", "text", "skyvern_element", "preserved_elements"])
2024-08-21 10:54:32 +08:00
async def choose_auto_completion_dropdown(
context: InputOrSelectContext,
2024-08-21 10:54:32 +08:00
page: Page,
scraped_page: ScrapedPage,
2024-08-21 10:54:32 +08:00
dom: DomUtil,
text: str,
skyvern_element: SkyvernElement,
step: Step,
task: Task,
preserved_elements: list[dict] | None = None,
relevance_threshold: float = 0.8,
) -> AutoCompletionResult:
preserved_elements = preserved_elements or []
clear_input = True
result = AutoCompletionResult()
current_frame = skyvern_element.get_frame()
skyvern_frame = await SkyvernFrame.create_instance(current_frame)
incremental_scraped = IncrementalScrapePage(skyvern_frame=skyvern_frame)
await incremental_scraped.start_listen_dom_increment(await skyvern_element.get_element_handler())
2024-08-21 10:54:32 +08:00
try:
await skyvern_element.press_fill(text)
# wait for new elemnts to load
await skyvern_frame.safe_wait_for_animation_end(before_wait_sec=0.5)
2024-08-21 10:54:32 +08:00
incremental_element = await incremental_scraped.get_incremental_element_tree(
2025-04-21 12:44:48 +08:00
clean_and_remove_element_tree_factory(
task=task, step=step, check_filter_funcs=[check_existed_but_not_option_element_in_dom_factory(dom)]
),
2024-08-21 10:54:32 +08:00
)
# check if elements in preserve list are still on the page
confirmed_preserved_list: list[dict] = []
for element in preserved_elements:
element_id = element.get("id")
if not element_id:
continue
locator = current_frame.locator(f'[{SKYVERN_ID_ATTR}="{element_id}"]')
cnt = await locator.count()
if cnt == 0:
continue
element_handler = await locator.element_handle(timeout=settings.BROWSER_ACTION_TIMEOUT_MS)
2024-08-21 10:54:32 +08:00
if not element_handler:
continue
current_element = await skyvern_frame.parse_element_from_html(
2025-02-18 08:58:23 +08:00
skyvern_element.get_frame_id(),
element_handler,
skyvern_element.is_interactable(),
2024-08-21 10:54:32 +08:00
)
confirmed_preserved_list.append(current_element)
if len(confirmed_preserved_list) > 0:
confirmed_preserved_list = await app.AGENT_FUNCTION.cleanup_element_tree_factory(task=task, step=step)(
2024-10-31 00:12:13 +08:00
skyvern_frame.get_frame(), skyvern_frame.get_frame().url, copy.deepcopy(confirmed_preserved_list)
2024-08-21 10:54:32 +08:00
)
confirmed_preserved_list = trim_element_tree(copy.deepcopy(confirmed_preserved_list))
incremental_element.extend(confirmed_preserved_list)
result.incremental_elements = copy.deepcopy(incremental_element)
html = ""
new_interactable_element_ids = []
if len(incremental_element) > 0:
cleaned_incremental_element = remove_duplicated_HTML_element(incremental_element)
html = incremental_scraped.build_html_tree(cleaned_incremental_element)
else:
scraped_page_after_open = await scraped_page.generate_scraped_page_without_screenshots()
new_element_ids = set(scraped_page_after_open.id_to_css_dict.keys()) - set(
scraped_page.id_to_css_dict.keys()
)
dom_after_open = DomUtil(scraped_page=scraped_page_after_open, page=page)
new_interactable_element_ids = [
element_id
for element_id in new_element_ids
if (await dom_after_open.get_skyvern_element_by_id(element_id)).is_interactable()
]
if len(new_interactable_element_ids) == 0:
raise NoIncrementalElementFoundForAutoCompletion(element_id=skyvern_element.get_id(), text=text)
LOG.info(
"New elements detected after the input",
new_elements_ids=new_interactable_element_ids,
)
result.incremental_elements = copy.deepcopy(
[scraped_page_after_open.id_to_element_dict[element_id] for element_id in new_interactable_element_ids]
)
html = scraped_page_after_open.build_element_tree()
2024-08-21 10:54:32 +08:00
auto_completion_confirm_prompt = prompt_engine.load_prompt(
"auto-completion-choose-option",
2025-01-14 13:08:35 +08:00
is_search=context.is_search_bar,
2025-02-03 19:19:39 +08:00
field_information=context.field if not context.intention else context.intention,
2024-08-21 10:54:32 +08:00
filled_value=text,
2024-09-10 14:12:38 +08:00
navigation_goal=task.navigation_goal,
navigation_payload_str=json.dumps(task.navigation_payload),
2024-08-21 10:54:32 +08:00
elements=html,
new_elements_ids=new_interactable_element_ids,
local_datetime=datetime.now(skyvern_context.ensure_context().tz_info).isoformat(),
2024-08-21 10:54:32 +08:00
)
LOG.info(
"Confirm if it's an auto completion dropdown",
step_id=step.step_id,
task_id=task.task_id,
)
json_response = await app.AUTO_COMPLETION_LLM_API_HANDLER(
prompt=auto_completion_confirm_prompt, step=step, prompt_name="auto-completion-choose-option"
)
2024-08-21 10:54:32 +08:00
element_id = json_response.get("id", "")
relevance_float = json_response.get("relevance_float", 0)
2025-01-14 13:08:35 +08:00
if json_response.get("direct_searching", False):
LOG.info(
"Decided to directly search with the current value",
value=text,
step_id=step.step_id,
task_id=task.task_id,
)
await skyvern_element.press_key("Enter")
return result
2024-08-21 10:54:32 +08:00
if not element_id:
reasoning = json_response.get("reasoning")
raise NoSuitableAutoCompleteOption(reasoning=reasoning, target_value=text)
if relevance_float < relevance_threshold:
LOG.info(
f"The closest option doesn't meet the condition(relevance_float>={relevance_threshold})",
element_id=element_id,
relevance_float=relevance_float,
)
reasoning = json_response.get("reasoning")
raise NoAutoCompleteOptionMeetCondition(
reasoning=reasoning,
required_relevance=relevance_threshold,
target_value=text,
closest_relevance=relevance_float,
)
LOG.info(
"Find a suitable option to choose",
element_id=element_id,
step_id=step.step_id,
task_id=task.task_id,
)
locator = current_frame.locator(f'[{SKYVERN_ID_ATTR}="{element_id}"]')
if await locator.count() == 0:
raise MissingElement(element_id=element_id)
await locator.click(timeout=settings.BROWSER_ACTION_TIMEOUT_MS)
2024-08-21 10:54:32 +08:00
clear_input = False
return result
2024-08-21 10:54:32 +08:00
except Exception as e:
LOG.info(
"Failed to choose the auto completion dropdown",
exc_info=True,
input_value=text,
task_id=task.task_id,
step_id=step.step_id,
)
result.action_result = ActionFailure(exception=e)
return result
finally:
await incremental_scraped.stop_listen_dom_increment()
2025-01-14 13:08:35 +08:00
if clear_input and await skyvern_element.is_visible():
2024-08-21 10:54:32 +08:00
await skyvern_element.input_clear()
def remove_duplicated_HTML_element(elements: list[dict]) -> list[dict]:
cache_map = set()
new_elements: list[dict] = []
for element in elements:
key = hash_element(element=element)
if key in cache_map:
continue
cache_map.add(key)
new_elements.append(element)
return new_elements
2024-08-21 10:54:32 +08:00
async def input_or_auto_complete_input(
input_or_select_context: InputOrSelectContext,
scraped_page: ScrapedPage,
2024-08-21 10:54:32 +08:00
page: Page,
dom: DomUtil,
text: str,
skyvern_element: SkyvernElement,
step: Step,
task: Task,
) -> ActionResult | None:
2024-08-21 10:54:32 +08:00
LOG.info(
"Trigger auto completion",
task_id=task.task_id,
step_id=step.step_id,
element_id=skyvern_element.get_id(),
)
2025-06-27 21:26:21 -04:00
# 1. press the original text to see if there's a match
2024-08-21 10:54:32 +08:00
# 2. call LLM to find 5 potential values based on the orginal text
# 3. try each potential values from #2
2025-06-27 21:26:21 -04:00
# 4. call LLM to tweak the original text according to the information from #3, then start #1 again
2024-08-21 10:54:32 +08:00
# FIXME: try the whole loop for once now, to speed up skyvern
MAX_AUTO_COMPLETE_ATTEMP = 1
2024-08-21 10:54:32 +08:00
current_attemp = 0
current_value = text
result = AutoCompletionResult()
while current_attemp < MAX_AUTO_COMPLETE_ATTEMP:
current_attemp += 1
whole_new_elements: list[dict] = []
tried_values: list[str] = []
LOG.info(
"Try the potential value for auto completion",
step_id=step.step_id,
task_id=task.task_id,
input_value=current_value,
)
result = await choose_auto_completion_dropdown(
context=input_or_select_context,
2024-08-21 10:54:32 +08:00
page=page,
scraped_page=scraped_page,
2024-08-21 10:54:32 +08:00
dom=dom,
text=current_value,
preserved_elements=result.incremental_elements,
skyvern_element=skyvern_element,
step=step,
task=task,
)
if isinstance(result.action_result, ActionSuccess):
return ActionSuccess()
if input_or_select_context.is_search_bar:
LOG.info(
"Stop generating potential values for the auto-completion since it's a search bar",
context=input_or_select_context,
step_id=step.step_id,
task_id=task.task_id,
)
return None
2024-08-21 10:54:32 +08:00
tried_values.append(current_value)
whole_new_elements.extend(result.incremental_elements)
2025-02-03 19:19:39 +08:00
field_information = (
input_or_select_context.field
if not input_or_select_context.intention
else input_or_select_context.intention
)
2024-08-21 10:54:32 +08:00
prompt = prompt_engine.load_prompt(
"auto-completion-potential-answers",
potential_value_count=AUTO_COMPLETION_POTENTIAL_VALUES_COUNT,
2025-02-03 19:19:39 +08:00
field_information=field_information,
2024-08-21 10:54:32 +08:00
current_value=current_value,
navigation_goal=task.navigation_goal,
navigation_payload_str=json.dumps(task.navigation_payload),
local_datetime=datetime.now(skyvern_context.ensure_context().tz_info).isoformat(),
2024-08-21 10:54:32 +08:00
)
LOG.info(
"Ask LLM to give potential values based on the current value",
2024-08-21 10:54:32 +08:00
current_value=current_value,
step_id=step.step_id,
task_id=task.task_id,
potential_value_count=AUTO_COMPLETION_POTENTIAL_VALUES_COUNT,
2024-08-21 10:54:32 +08:00
)
json_respone = await app.SECONDARY_LLM_API_HANDLER(
prompt=prompt, step=step, prompt_name="auto-completion-potential-answers"
)
2024-08-21 10:54:32 +08:00
values: list[dict] = json_respone.get("potential_values", [])
for each_value in values:
value: str = each_value.get("value", "")
if not value:
LOG.info(
"Empty potential value, skip this attempt",
step_id=step.step_id,
task_id=task.task_id,
value=each_value,
)
continue
LOG.info(
"Try the potential value for auto completion",
step_id=step.step_id,
task_id=task.task_id,
input_value=value,
)
result = await choose_auto_completion_dropdown(
context=input_or_select_context,
2024-08-21 10:54:32 +08:00
page=page,
scraped_page=scraped_page,
2024-08-21 10:54:32 +08:00
dom=dom,
text=value,
preserved_elements=result.incremental_elements,
skyvern_element=skyvern_element,
step=step,
task=task,
)
if isinstance(result.action_result, ActionSuccess):
return ActionSuccess()
tried_values.append(value)
whole_new_elements.extend(result.incremental_elements)
# WARN: currently, we don't trigger this logic because MAX_AUTO_COMPLETE_ATTEMP is 1, to speed up skyvern
2024-08-21 10:54:32 +08:00
if current_attemp < MAX_AUTO_COMPLETE_ATTEMP:
LOG.info(
"Ask LLM to tweak the current value based on tried input values",
step_id=step.step_id,
task_id=task.task_id,
current_value=current_value,
current_attemp=current_attemp,
)
cleaned_new_elements = remove_duplicated_HTML_element(whole_new_elements)
2024-08-21 10:54:32 +08:00
prompt = prompt_engine.load_prompt(
"auto-completion-tweak-value",
2025-02-03 19:19:39 +08:00
field_information=field_information,
2024-08-21 10:54:32 +08:00
current_value=current_value,
navigation_goal=task.navigation_goal,
navigation_payload_str=json.dumps(task.navigation_payload),
2024-08-21 10:54:32 +08:00
tried_values=json.dumps(tried_values),
popped_up_elements="".join([json_to_html(element) for element in cleaned_new_elements]),
local_datetime=datetime.now(skyvern_context.ensure_context().tz_info).isoformat(),
2024-08-21 10:54:32 +08:00
)
json_respone = await app.SECONDARY_LLM_API_HANDLER(
prompt=prompt, step=step, prompt_name="auto-completion-tweak-value"
)
2024-08-21 10:54:32 +08:00
context_reasoning = json_respone.get("reasoning")
new_current_value = json_respone.get("tweaked_value", "")
if not new_current_value:
return ActionFailure(ErrEmptyTweakValue(reasoning=context_reasoning, current_value=current_value))
LOG.info(
"Ask LLM tweaked the current value with a new value",
step_id=step.step_id,
task_id=task.task_id,
field_information=input_or_select_context.field,
2024-08-21 10:54:32 +08:00
current_value=current_value,
new_value=new_current_value,
)
current_value = new_current_value
else:
LOG.warning(
"Auto completion didn't finish, this might leave the input value to be empty.",
context=input_or_select_context,
step_id=step.step_id,
task_id=task.task_id,
)
return None
2024-08-21 10:54:32 +08:00
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(
ignore_inputs=[
"input_or_select_context",
"page",
"dom",
"skyvern_element",
"skyvern_frame",
"incremental_scraped",
"dropdown_menu_element",
"target_value",
"continue_until_close",
]
)
2024-09-09 11:34:09 +08:00
async def sequentially_select_from_dropdown(
2024-08-07 18:51:29 +08:00
action: SelectOptionAction,
2025-04-28 16:16:36 +08:00
input_or_select_context: InputOrSelectContext,
2024-08-07 18:51:29 +08:00
page: Page,
2024-08-28 14:51:05 +08:00
dom: DomUtil,
2025-01-08 14:27:50 +08:00
skyvern_element: SkyvernElement,
2024-08-07 18:51:29 +08:00
skyvern_frame: SkyvernFrame,
incremental_scraped: IncrementalScrapePage,
step: Step,
task: Task,
dropdown_menu_element: SkyvernElement | None = None,
2024-08-07 18:51:29 +08:00
force_select: bool = False,
target_value: str = "",
continue_until_close: bool = False,
) -> CustomSingleSelectResult | None:
2024-09-09 11:34:09 +08:00
"""
TODO: support to return all values retrieved from the sequentially select
Only return the last value today
"""
if not force_select and input_or_select_context.is_search_bar:
LOG.info(
"Exit custom selection mode since it's a non-force search bar",
context=input_or_select_context,
task_id=task.task_id,
step_id=step.step_id,
)
return None
# TODO: only support the third-level dropdown selection now, but for date picker, we need to support more levels as it will move the month, year, etc.
MAX_DATEPICKER_DEPTH = 30
MAX_SELECT_DEPTH = 3
max_depth = MAX_DATEPICKER_DEPTH if input_or_select_context.is_date_related else MAX_SELECT_DEPTH
2024-09-09 11:34:09 +08:00
values: list[str | None] = []
2024-09-12 09:57:04 +08:00
select_history: list[CustomSingleSelectResult] = []
single_select_result: CustomSingleSelectResult | None = None
2024-09-09 11:34:09 +08:00
2025-04-21 12:44:48 +08:00
check_filter_funcs: list[CheckFilterOutElementIDFunc] = [check_existed_but_not_option_element_in_dom_factory(dom)]
for i in range(max_depth):
2024-09-09 11:34:09 +08:00
single_select_result = await select_from_dropdown(
context=input_or_select_context,
2024-09-09 11:34:09 +08:00
page=page,
2025-01-08 14:27:50 +08:00
skyvern_element=skyvern_element,
2024-09-09 11:34:09 +08:00
skyvern_frame=skyvern_frame,
incremental_scraped=incremental_scraped,
2025-01-09 16:14:31 +08:00
check_filter_funcs=check_filter_funcs,
2024-09-09 11:34:09 +08:00
step=step,
task=task,
dropdown_menu_element=dropdown_menu_element,
2024-09-12 09:57:04 +08:00
select_history=select_history,
2024-09-09 11:34:09 +08:00
force_select=force_select,
target_value=target_value,
2024-09-09 11:34:09 +08:00
)
2025-07-07 14:43:10 +08:00
assert single_select_result is not None
2024-09-12 09:57:04 +08:00
select_history.append(single_select_result)
2024-09-09 11:34:09 +08:00
values.append(single_select_result.value)
# wait 1s until DOM finished updating
await skyvern_frame.safe_wait_for_animation_end(before_wait_sec=0.5)
2024-09-09 11:34:09 +08:00
if await single_select_result.is_done():
return single_select_result
2024-09-09 11:34:09 +08:00
if i == max_depth - 1:
2024-09-09 11:34:09 +08:00
LOG.warning(
"Reaching the max selection depth",
depth=i,
task_id=task.task_id,
step_id=step.step_id,
)
break
LOG.info(
"Seems to be a multi-level selection, continue to select until it finishes",
selected_time=i + 1,
task_id=task.task_id,
step_id=step.step_id,
)
# wait to load new options
await skyvern_frame.safe_wait_for_animation_end(before_wait_sec=0.5)
2024-09-09 11:34:09 +08:00
2025-01-09 16:14:31 +08:00
check_filter_funcs.append(
check_disappeared_element_id_in_incremental_factory(incremental_scraped=incremental_scraped)
)
2024-09-09 11:34:09 +08:00
secondary_increment_element = await incremental_scraped.get_incremental_element_tree(
clean_and_remove_element_tree_factory(
task=task,
step=step,
2025-01-09 16:14:31 +08:00
check_filter_funcs=check_filter_funcs,
2024-09-09 11:34:09 +08:00
)
)
if len(secondary_increment_element) == 0:
LOG.info(
"No incremental element detected for the next level selection, going to quit the custom select mode",
selected_time=i + 1,
task_id=task.task_id,
step_id=step.step_id,
)
return single_select_result
2024-09-09 11:34:09 +08:00
# it's for typing. it's been verified in `single_select_result.is_done()`
assert single_select_result.dropdown_menu is not None
2025-02-13 21:11:14 +08:00
if single_select_result.action_type is not None and single_select_result.action_type == ActionType.INPUT_TEXT:
LOG.info(
"It's an input mini action, going to continue the select action",
step_id=step.step_id,
task_id=task.task_id,
)
continue
if continue_until_close:
LOG.info(
"Continue the selecting until the dropdown menu is closed",
step_id=step.step_id,
task_id=task.task_id,
)
continue
2025-02-03 19:19:39 +08:00
screenshot = await page.screenshot(timeout=settings.BROWSER_SCREENSHOT_TIMEOUT_MS)
mini_goal = (
input_or_select_context.field
if not input_or_select_context.intention
else input_or_select_context.intention
)
prompt = prompt_engine.load_prompt(
"confirm-multi-selection-finish",
2025-02-03 19:19:39 +08:00
mini_goal=mini_goal,
navigation_goal=task.navigation_goal,
navigation_payload_str=json.dumps(task.navigation_payload),
elements="".join(json_to_html(element) for element in secondary_increment_element),
select_history=json.dumps(build_sequential_select_history(select_history)),
local_datetime=datetime.now(ensure_context().tz_info).isoformat(),
)
2025-06-11 08:23:44 -07:00
llm_api_handler = LLMAPIHandlerFactory.get_override_llm_api_handler(task.llm_key, default=app.LLM_API_HANDLER)
json_response = await llm_api_handler(
prompt=prompt, screenshots=[screenshot], step=step, prompt_name="confirm-multi-selection-finish"
)
2025-02-03 19:19:39 +08:00
if json_response.get("is_mini_goal_finished", False):
LOG.info("The user has finished the selection for the current opened dropdown", step_id=step.step_id)
return single_select_result
else:
if input_or_select_context.is_date_related:
if skyvern_element.get_tag_name() == InteractiveElement.INPUT and action.option.label:
try:
LOG.info(
"Try to input the date directly",
step_id=step.step_id,
task_id=task.task_id,
)
await skyvern_element.input_sequentially(action.option.label)
result = CustomSingleSelectResult(skyvern_frame=skyvern_frame)
result.action_result = ActionSuccess()
return result
except Exception:
LOG.warning(
"Failed to input the date directly",
exc_info=True,
step_id=step.step_id,
task_id=task.task_id,
)
if single_select_result and single_select_result.action_result:
single_select_result.action_result.skip_remaining_actions = True
return single_select_result
return select_history[-1] if len(select_history) > 0 else None
2024-09-12 09:57:04 +08:00
def build_sequential_select_history(history_list: list[CustomSingleSelectResult]) -> list[dict[str, Any]]:
result = [
{
"reasoning": select_result.reasoning,
"value": select_result.value,
"result": "success" if isinstance(select_result.action_result, ActionSuccess) else "failed",
}
for select_result in history_list
]
return result
2024-09-09 11:34:09 +08:00
2025-05-19 09:08:55 -07:00
class CustomSelectPromptOptions(BaseModel):
"""
This is the options for the custom select prompt.
It's used to generate the prompt for the custom select action.
is_date_related: whether the field is date related
required_field: whether the field is required
field_information: the description about the field, could be field name, action intention, action reasoning about the field, etc.
target_value: the target value of the field (generated by the LLM in the main prompt).
"""
is_date_related: bool = False
required_field: bool = False
field_information: str = ""
target_value: str | None = None
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(ignore_inputs=["scraped_page", "page"])
2025-04-28 16:16:36 +08:00
async def select_from_emerging_elements(
2025-05-19 09:08:55 -07:00
current_element_id: str,
options: CustomSelectPromptOptions,
2025-04-28 16:16:36 +08:00
page: Page,
scraped_page: ScrapedPage,
step: Step,
task: Task,
scraped_page_after_open: ScrapedPage | None = None,
new_interactable_element_ids: list[str] | None = None,
2025-04-28 16:16:36 +08:00
) -> ActionResult:
2025-05-19 09:08:55 -07:00
"""
This is the function to select an element from the new showing elements.
Currently mainly used for the dropdown menu selection.
"""
2025-04-28 16:16:36 +08:00
# TODO: support to handle the case when options are loaded by scroll
scraped_page_after_open = scraped_page_after_open or await scraped_page.generate_scraped_page_without_screenshots()
2025-04-28 16:16:36 +08:00
new_element_ids = set(scraped_page_after_open.id_to_css_dict.keys()) - set(scraped_page.id_to_css_dict.keys())
dom_after_open = DomUtil(scraped_page=scraped_page_after_open, page=page)
new_interactable_element_ids = new_interactable_element_ids or [
2025-04-28 16:16:36 +08:00
element_id
for element_id in new_element_ids
if (await dom_after_open.get_skyvern_element_by_id(element_id)).is_interactable()
]
if len(new_interactable_element_ids) == 0:
2025-05-19 09:08:55 -07:00
raise NoIncrementalElementFoundForCustomSelection(element_id=current_element_id)
2025-04-28 16:16:36 +08:00
prompt = load_prompt_with_elements(
element_tree_builder=scraped_page_after_open,
2025-04-28 16:16:36 +08:00
prompt_engine=prompt_engine,
template_name="custom-select",
2025-05-19 09:08:55 -07:00
is_date_related=options.is_date_related,
field_information=options.field_information,
required_field=options.required_field,
target_value=options.target_value,
2025-04-28 16:16:36 +08:00
navigation_goal=task.navigation_goal,
new_elements_ids=new_interactable_element_ids,
navigation_payload_str=json.dumps(task.navigation_payload),
local_datetime=datetime.now(skyvern_context.ensure_context().tz_info).isoformat(),
)
LOG.info(
"Calling LLM to find the match element",
step_id=step.step_id,
task_id=task.task_id,
)
2025-06-11 08:23:44 -07:00
llm_api_handler = LLMAPIHandlerFactory.get_override_llm_api_handler(task.llm_key, default=app.LLM_API_HANDLER)
json_response = await llm_api_handler(prompt=prompt, step=step, prompt_name="custom-select")
2025-04-28 16:16:36 +08:00
value: str | None = json_response.get("value", None)
LOG.info(
"LLM response for the matched element",
matched_value=value,
response=json_response,
step_id=step.step_id,
task_id=task.task_id,
)
2025-04-30 00:06:59 +08:00
action_type_str: str = json_response.get("action_type", "") or ""
2025-04-28 16:16:36 +08:00
action_type = ActionType(action_type_str.lower())
element_id: str | None = json_response.get("id", None)
if not element_id or action_type not in [ActionType.CLICK, ActionType.INPUT_TEXT]:
2025-04-28 16:16:36 +08:00
raise NoAvailableOptionFoundForCustomSelection(reason=json_response.get("reasoning"))
if value is not None and action_type == ActionType.INPUT_TEXT:
actual_value = await get_actual_value_of_parameter_if_secret(task=task, parameter=value)
2025-04-28 16:16:36 +08:00
LOG.info(
"No clickable option found, but found input element to search",
element_id=element_id,
)
input_element = await dom_after_open.get_skyvern_element_by_id(element_id)
await input_element.scroll_into_view()
current_text = await get_input_value(input_element.get_tag_name(), input_element.get_locator())
if current_text == actual_value:
2025-04-28 16:16:36 +08:00
return ActionSuccess()
await input_element.input_clear()
await input_element.input_sequentially(actual_value)
2025-04-28 16:16:36 +08:00
return ActionSuccess()
else:
selected_element = await dom_after_open.get_skyvern_element_by_id(element_id)
if await selected_element.get_attr("role") == "listbox":
return ActionFailure(exception=InteractWithDropdownContainer(element_id=element_id))
await selected_element.scroll_into_view()
await selected_element.click(page=page)
return ActionSuccess()
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(
ignore_inputs=[
"context",
"page",
"skyvern_element",
"skyvern_frame",
"incremental_scraped",
"check_filter_funcs",
"dropdown_menu_element",
"select_history",
"target_value",
]
)
2024-09-09 11:34:09 +08:00
async def select_from_dropdown(
context: InputOrSelectContext,
2024-09-09 11:34:09 +08:00
page: Page,
2025-01-08 14:27:50 +08:00
skyvern_element: SkyvernElement,
2024-09-09 11:34:09 +08:00
skyvern_frame: SkyvernFrame,
incremental_scraped: IncrementalScrapePage,
2025-01-09 16:14:31 +08:00
check_filter_funcs: list[CheckFilterOutElementIDFunc],
2024-09-09 11:34:09 +08:00
step: Step,
task: Task,
dropdown_menu_element: SkyvernElement | None = None,
2024-09-12 09:57:04 +08:00
select_history: list[CustomSingleSelectResult] | None = None,
2024-09-09 11:34:09 +08:00
force_select: bool = False,
target_value: str = "",
2024-09-09 11:34:09 +08:00
) -> CustomSingleSelectResult:
2024-08-07 18:51:29 +08:00
"""
2024-09-07 09:34:33 +08:00
force_select: is used to choose an element to click even there's no dropdown menu;
2025-06-27 21:26:21 -04:00
targe_value: only valid when force_select is "False". When target_value is not empty, the matched option must be relevant to target value;
2024-09-07 09:34:33 +08:00
None will be only returned when:
1. force_select is false and no dropdown menu popped
2. force_select is false and match value is not relevant to the target value
2024-08-07 18:51:29 +08:00
"""
2024-09-12 09:57:04 +08:00
select_history = [] if select_history is None else select_history
2024-09-10 17:10:47 +08:00
single_select_result = CustomSingleSelectResult(skyvern_frame=skyvern_frame)
2024-09-09 11:34:09 +08:00
timeout = settings.BROWSER_ACTION_TIMEOUT_MS
2024-08-07 18:51:29 +08:00
if dropdown_menu_element is None:
dropdown_menu_element = await locate_dropdown_menu(
2025-01-08 14:27:50 +08:00
current_anchor_element=skyvern_element,
incremental_scraped=incremental_scraped,
step=step,
task=task,
)
2024-09-09 11:34:09 +08:00
single_select_result.dropdown_menu = dropdown_menu_element
2024-08-07 18:51:29 +08:00
if not force_select and dropdown_menu_element is None:
2024-09-09 11:34:09 +08:00
return single_select_result
2024-08-07 18:51:29 +08:00
2024-09-09 11:34:09 +08:00
if dropdown_menu_element:
potential_scrollable_element = await try_to_find_potential_scrollable_element(
skyvern_element=dropdown_menu_element,
2024-08-07 18:51:29 +08:00
incremental_scraped=incremental_scraped,
step=step,
task=task,
)
2024-09-09 11:34:09 +08:00
if await skyvern_frame.get_element_scrollable(await potential_scrollable_element.get_element_handler()):
await scroll_down_to_load_all_options(
scrollable_element=potential_scrollable_element,
skyvern_frame=skyvern_frame,
page=page,
incremental_scraped=incremental_scraped,
step=step,
task=task,
)
2024-08-08 14:53:08 +08:00
trimmed_element_tree = await incremental_scraped.get_incremental_element_tree(
2025-01-09 16:14:31 +08:00
clean_and_remove_element_tree_factory(task=task, step=step, check_filter_funcs=check_filter_funcs),
2024-08-08 14:53:08 +08:00
)
incremental_scraped.set_element_tree_trimmed(trimmed_element_tree)
html = incremental_scraped.build_element_tree(html_need_skyvern_attrs=True)
2024-08-07 18:51:29 +08:00
2024-12-16 11:22:51 +08:00
skyvern_context = ensure_context()
2024-08-07 18:51:29 +08:00
prompt = prompt_engine.load_prompt(
2024-09-07 09:34:33 +08:00
"custom-select",
2025-01-09 16:14:31 +08:00
is_date_related=context.is_date_related,
2025-02-03 19:19:39 +08:00
field_information=context.field if not context.intention else context.intention,
required_field=context.is_required,
target_value=target_value,
2024-09-10 14:12:38 +08:00
navigation_goal=task.navigation_goal,
navigation_payload_str=json.dumps(task.navigation_payload),
2024-09-07 09:34:33 +08:00
elements=html,
2024-09-12 09:57:04 +08:00
select_history=json.dumps(build_sequential_select_history(select_history)) if select_history else "",
2024-12-16 11:22:51 +08:00
local_datetime=datetime.now(skyvern_context.tz_info).isoformat(),
2024-08-07 18:51:29 +08:00
)
LOG.info(
"Calling LLM to find the match element",
step_id=step.step_id,
task_id=task.task_id,
)
2025-02-04 19:16:39 +08:00
json_response = await app.SELECT_AGENT_LLM_API_HANDLER(prompt=prompt, step=step, prompt_name="custom-select")
2024-09-10 14:12:38 +08:00
value: str | None = json_response.get("value", None)
single_select_result.value = value
2024-09-12 09:57:04 +08:00
select_reason: str | None = json_response.get("reasoning", None)
single_select_result.reasoning = select_reason
2024-09-10 14:12:38 +08:00
2024-08-07 18:51:29 +08:00
LOG.info(
"LLM response for the matched element",
2024-09-10 14:12:38 +08:00
matched_value=value,
2024-08-07 18:51:29 +08:00
response=json_response,
step_id=step.step_id,
task_id=task.task_id,
)
2025-04-30 00:06:59 +08:00
action_type: str = json_response.get("action_type", "") or ""
2025-02-13 21:11:14 +08:00
action_type = action_type.lower()
single_select_result.action_type = ActionType(action_type)
2024-08-07 18:51:29 +08:00
element_id: str | None = json_response.get("id", None)
2025-02-13 21:11:14 +08:00
if not element_id or action_type not in [ActionType.CLICK, ActionType.INPUT_TEXT]:
raise NoAvailableOptionFoundForCustomSelection(reason=json_response.get("reasoning"))
2024-08-07 18:51:29 +08:00
if not force_select and target_value:
2024-09-07 09:34:33 +08:00
if not json_response.get("relevant", False):
LOG.info(
2024-09-07 09:34:33 +08:00
"The selected option is not relevant to the target value",
element_id=element_id,
task_id=task.task_id,
step_id=step.step_id,
)
2024-09-09 11:34:09 +08:00
return single_select_result
2024-09-07 09:34:33 +08:00
2025-02-13 21:11:14 +08:00
if value is not None and action_type == ActionType.INPUT_TEXT:
2024-10-29 16:04:41 +08:00
LOG.info(
"No clickable option found, but found input element to search",
element_id=element_id,
task_id=task.task_id,
step_id=step.step_id,
)
try:
actual_value = await get_actual_value_of_parameter_if_secret(task=task, parameter=value)
2024-10-29 16:04:41 +08:00
input_element = await SkyvernElement.create_from_incremental(incremental_scraped, element_id)
await input_element.scroll_into_view()
current_text = await get_input_value(input_element.get_tag_name(), input_element.get_locator())
if current_text == actual_value:
2024-10-29 16:04:41 +08:00
single_select_result.action_result = ActionSuccess()
return single_select_result
await input_element.input_clear()
await input_element.input_sequentially(actual_value)
2024-10-29 16:04:41 +08:00
single_select_result.action_result = ActionSuccess()
return single_select_result
except Exception as e:
single_select_result.action_result = ActionFailure(exception=e)
return single_select_result
try:
selected_element = await SkyvernElement.create_from_incremental(incremental_scraped, element_id)
# TODO Some popup dropdowns include <select> element, we only handle the <select> element now, to prevent infinite recursion. Need to support more types of dropdowns.
if selected_element.get_tag_name() == InteractiveElement.SELECT and value:
await selected_element.scroll_into_view()
action = SelectOptionAction(
reasoning=select_reason,
element_id=element_id,
option=SelectOption(label=value),
)
results = await normal_select(
action=action, skyvern_element=selected_element, task=task, step=step, builder=incremental_scraped
)
assert len(results) > 0
single_select_result.action_result = results[0]
return single_select_result
if await selected_element.get_attr("role") == "listbox":
single_select_result.action_result = ActionFailure(
exception=InteractWithDropdownContainer(element_id=element_id)
)
return single_select_result
await selected_element.scroll_into_view()
await selected_element.click(page=page, timeout=timeout)
2024-09-09 11:34:09 +08:00
single_select_result.action_result = ActionSuccess()
return single_select_result
except (MissingElement, MissingElementDict, MissingElementInCSSMap, MultipleElementsFound):
if not value:
raise
# sometimes we have multiple elements pointed to the same value,
# but only one option is clickable on the page
LOG.debug(
2025-06-27 21:26:21 -04:00
"Searching option with the same value in incremental elements",
value=value,
elements=incremental_scraped.element_tree,
)
locator = await incremental_scraped.select_one_element_by_value(value=value)
if not locator:
2024-09-09 11:34:09 +08:00
single_select_result.action_result = ActionFailure(exception=MissingElement())
return single_select_result
try:
LOG.info(
"Find an alternative option with the same value. Try to select the option.",
value=value,
)
await locator.click(timeout=timeout)
2024-09-09 11:34:09 +08:00
single_select_result.action_result = ActionSuccess()
return single_select_result
except Exception as e:
2024-09-09 11:34:09 +08:00
single_select_result.action_result = ActionFailure(exception=e)
return single_select_result
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(
ignore_inputs=[
"value",
"page",
"skyvern_element",
"skyvern_frame",
"dom",
"incremental_scraped",
"dropdown_menu_element",
]
)
async def select_from_dropdown_by_value(
value: str,
page: Page,
2025-01-08 14:27:50 +08:00
skyvern_element: SkyvernElement,
skyvern_frame: SkyvernFrame,
dom: DomUtil,
incremental_scraped: IncrementalScrapePage,
task: Task,
step: Step,
dropdown_menu_element: SkyvernElement | None = None,
) -> ActionResult:
timeout = settings.BROWSER_ACTION_TIMEOUT_MS
2024-09-07 09:34:33 +08:00
await incremental_scraped.get_incremental_element_tree(
2025-04-21 12:44:48 +08:00
clean_and_remove_element_tree_factory(
task=task, step=step, check_filter_funcs=[check_existed_but_not_option_element_in_dom_factory(dom)]
),
)
element_locator = await incremental_scraped.select_one_element_by_value(value=value)
if element_locator is not None:
await element_locator.click(timeout=timeout)
return ActionSuccess()
if dropdown_menu_element is None:
dropdown_menu_element = await locate_dropdown_menu(
2025-01-08 14:27:50 +08:00
current_anchor_element=skyvern_element,
incremental_scraped=incremental_scraped,
step=step,
task=task,
)
2024-09-09 11:34:09 +08:00
if not dropdown_menu_element:
raise NoElementMatchedForTargetOption(target=value, reason="No value matched")
potential_scrollable_element = await try_to_find_potential_scrollable_element(
skyvern_element=dropdown_menu_element,
incremental_scraped=incremental_scraped,
task=task,
step=step,
)
if not await skyvern_frame.get_element_scrollable(await potential_scrollable_element.get_element_handler()):
raise NoElementMatchedForTargetOption(
target=value, reason="No value matched and element can't scroll to find more options"
)
selected: bool = False
async def continue_callback(incre_scraped: IncrementalScrapePage) -> bool:
await incre_scraped.get_incremental_element_tree(
2025-04-21 12:44:48 +08:00
clean_and_remove_element_tree_factory(
task=task, step=step, check_filter_funcs=[check_existed_but_not_option_element_in_dom_factory(dom)]
),
)
element_locator = await incre_scraped.select_one_element_by_value(value=value)
if element_locator is not None:
await element_locator.click(timeout=timeout)
nonlocal selected
selected = True
return False
return True
2024-08-07 18:51:29 +08:00
await scroll_down_to_load_all_options(
2024-09-09 11:34:09 +08:00
scrollable_element=potential_scrollable_element,
page=page,
skyvern_frame=skyvern_frame,
incremental_scraped=incremental_scraped,
step=step,
task=task,
page_by_page=True,
is_continue=continue_callback,
)
if selected:
return ActionSuccess()
2024-08-07 18:51:29 +08:00
raise NoElementMatchedForTargetOption(target=value, reason="No value matched after scrolling")
async def locate_dropdown_menu(
2025-01-08 14:27:50 +08:00
current_anchor_element: SkyvernElement,
2024-08-06 13:30:52 +08:00
incremental_scraped: IncrementalScrapePage,
step: Step,
task: Task,
2024-08-06 13:30:52 +08:00
) -> SkyvernElement | None:
2025-03-11 12:33:09 -07:00
# the anchor must exist in the DOM, but no need to be visible css style
if not await current_anchor_element.is_visible(must_visible_style=False):
return None
2024-09-07 09:34:33 +08:00
skyvern_frame = incremental_scraped.skyvern_frame
for idx, element_dict in enumerate(incremental_scraped.element_tree):
2024-08-06 13:30:52 +08:00
# FIXME: confirm max to 10 nodes for now, preventing sendindg too many requests to LLM
if idx >= 10:
break
element_id = element_dict.get("id")
if not element_id:
LOG.debug(
2024-09-07 09:34:33 +08:00
"Skip the element without id for the dropdown menu confirm",
step_id=step.step_id,
task_id=task.task_id,
2024-08-06 13:30:52 +08:00
element=element_dict,
)
continue
try:
head_element = await SkyvernElement.create_from_incremental(incremental_scraped, element_id)
except Exception:
LOG.debug(
"Failed to get head element in the incremental page",
element_id=element_id,
2025-01-08 14:27:50 +08:00
step_id=step.step_id,
task_id=task.task_id,
exc_info=True,
)
continue
try:
if not await head_element.is_next_to_element(
target_locator=current_anchor_element.get_locator(),
max_x_distance=DROPDOWN_MENU_MAX_DISTANCE,
max_y_distance=DROPDOWN_MENU_MAX_DISTANCE,
):
LOG.debug(
"Skip the element since it's too far away from the anchor element",
step_id=step.step_id,
task_id=task.task_id,
element_id=element_id,
)
continue
except Exception:
LOG.info(
"Failed to calculate the distance between the elements",
element_id=element_id,
step_id=step.step_id,
task_id=task.task_id,
exc_info=True,
)
continue
2024-09-07 09:34:33 +08:00
if not await skyvern_frame.get_element_visible(await head_element.get_element_handler()):
LOG.debug(
"Skip the element since it's invisible",
step_id=step.step_id,
task_id=task.task_id,
element_id=element_id,
)
continue
2024-09-12 09:57:04 +08:00
ul_or_listbox_element_id = await head_element.find_children_element_id_by_callback(
cb=is_ul_or_listbox_element_factory(incremental_scraped=incremental_scraped, task=task, step=step),
)
if ul_or_listbox_element_id:
try:
await SkyvernElement.create_from_incremental(incremental_scraped, ul_or_listbox_element_id)
LOG.info(
"Confirm it's an opened dropdown menu since it includes <ul> or <role='listbox'>",
step_id=step.step_id,
task_id=task.task_id,
element_id=element_id,
)
return await SkyvernElement.create_from_incremental(
incre_page=incremental_scraped, element_id=element_id
)
except Exception:
LOG.debug(
"Failed to get <ul> or <role='listbox'> element in the incremental page",
element_id=element_id,
step_id=step.step_id,
task_id=task.task_id,
exc_info=True,
)
2024-09-21 21:05:40 +08:00
# sometimes taking screenshot might scroll away, need to scroll back after the screenshot
x, y = await skyvern_frame.get_scroll_x_y()
screenshot = await head_element.get_locator().screenshot(timeout=settings.BROWSER_SCREENSHOT_TIMEOUT_MS)
2024-09-21 21:05:40 +08:00
await skyvern_frame.scroll_to_x_y(x, y)
2024-09-09 11:34:09 +08:00
# TODO: better to send untrimmed HTML without skyvern attributes in the future
dropdown_confirm_prompt = prompt_engine.load_prompt("opened-dropdown-confirm")
LOG.debug(
2024-08-06 13:30:52 +08:00
"Confirm if it's an opened dropdown menu",
step_id=step.step_id,
task_id=task.task_id,
2024-08-06 13:30:52 +08:00
element=element_dict,
)
json_response = await app.SECONDARY_LLM_API_HANDLER(
prompt=dropdown_confirm_prompt, screenshots=[screenshot], step=step, prompt_name="opened-dropdown-confirm"
)
2024-08-06 13:30:52 +08:00
is_opened_dropdown_menu = json_response.get("is_opened_dropdown_menu")
if is_opened_dropdown_menu:
LOG.info(
"Opened dropdown menu found",
step_id=step.step_id,
task_id=task.task_id,
element_id=element_id,
)
2024-08-06 13:30:52 +08:00
return await SkyvernElement.create_from_incremental(incre_page=incremental_scraped, element_id=element_id)
return None
2024-09-09 11:34:09 +08:00
async def try_to_find_potential_scrollable_element(
skyvern_element: SkyvernElement,
incremental_scraped: IncrementalScrapePage,
task: Task,
step: Step,
) -> SkyvernElement:
"""
check any <ul> or <role="listbox"> element in the chidlren.
if yes, return the found element,
2025-06-27 21:26:21 -04:00
else, return the orginal one
2024-09-09 11:34:09 +08:00
"""
found_element_id = await skyvern_element.find_children_element_id_by_callback(
cb=is_ul_or_listbox_element_factory(incremental_scraped=incremental_scraped, task=task, step=step),
)
if found_element_id and found_element_id != skyvern_element.get_id():
LOG.debug(
"Found 'ul or listbox' element in children list",
element_id=found_element_id,
step_id=step.step_id,
task_id=task.task_id,
)
try:
skyvern_element = await SkyvernElement.create_from_incremental(incremental_scraped, found_element_id)
except Exception:
LOG.debug(
2025-06-27 21:26:21 -04:00
"Failed to get head element by found element id, use the original element id",
2024-09-09 11:34:09 +08:00
element_id=found_element_id,
step_id=step.step_id,
task_id=task.task_id,
exc_info=True,
)
return skyvern_element
2025-07-07 14:43:10 +08:00
@TraceManager.traced_async(
ignore_inputs=["scrollable_element", "page", "skyvern_frame", "incremental_scraped", "is_continue"]
)
2024-08-06 13:30:52 +08:00
async def scroll_down_to_load_all_options(
2024-09-09 11:34:09 +08:00
scrollable_element: SkyvernElement,
2024-08-06 13:30:52 +08:00
page: Page,
skyvern_frame: SkyvernFrame,
incremental_scraped: IncrementalScrapePage,
step: Step | None = None,
task: Task | None = None,
page_by_page: bool = False,
is_continue: Callable[[IncrementalScrapePage], Awaitable[bool]] | None = None,
2024-08-06 13:30:52 +08:00
) -> None:
LOG.info(
"Scroll down the dropdown menu to load all options",
step_id=step.step_id if step else "none",
task_id=task.task_id if task else "none",
)
timeout = settings.BROWSER_ACTION_TIMEOUT_MS
2024-08-06 13:30:52 +08:00
2024-09-09 11:34:09 +08:00
dropdown_menu_element_handle = await scrollable_element.get_locator().element_handle(timeout=timeout)
2024-08-06 13:30:52 +08:00
if dropdown_menu_element_handle is None:
2024-09-09 11:34:09 +08:00
LOG.info("element handle is None, using focus to move the cursor", element_id=scrollable_element.get_id())
await scrollable_element.get_locator().focus(timeout=timeout)
2024-08-06 13:30:52 +08:00
else:
await dropdown_menu_element_handle.scroll_into_view_if_needed(timeout=timeout)
2025-07-21 17:31:54 +08:00
await scrollable_element.move_mouse_to_safe(page=page)
2024-08-06 13:30:52 +08:00
scroll_pace = 0
previous_num = await incremental_scraped.get_incremental_elements_num()
deadline = datetime.now(timezone.utc) + timedelta(milliseconds=settings.OPTION_LOADING_TIMEOUT_MS)
2024-08-06 13:30:52 +08:00
while datetime.now(timezone.utc) < deadline:
# make sure we can scroll to the bottom
scroll_interval = settings.BROWSER_HEIGHT * 5
2024-08-06 13:30:52 +08:00
if dropdown_menu_element_handle is None:
2024-09-09 11:34:09 +08:00
LOG.info("element handle is None, using mouse to scroll down", element_id=scrollable_element.get_id())
2024-08-06 13:30:52 +08:00
await page.mouse.wheel(0, scroll_interval)
scroll_pace += scroll_interval
else:
await skyvern_frame.scroll_to_element_bottom(dropdown_menu_element_handle, page_by_page)
2024-09-09 11:34:09 +08:00
# wait until animation ends, otherwise the scroll operation could be overwritten
await skyvern_frame.safe_wait_for_animation_end(before_wait_sec=0.5)
2024-08-06 13:30:52 +08:00
2025-06-27 21:26:21 -04:00
# scroll a little back and scroll down to trigger the loading
2024-08-28 14:51:05 +08:00
await page.mouse.wheel(0, -1e-5)
await page.mouse.wheel(0, 1e-5)
2024-08-06 13:30:52 +08:00
# wait for while to load new options
await skyvern_frame.safe_wait_for_animation_end(before_wait_sec=0.5)
2024-08-06 13:30:52 +08:00
current_num = await incremental_scraped.get_incremental_elements_num()
LOG.info(
"Current incremental elements count during the scrolling",
num=current_num,
step_id=step.step_id if step else "none",
task_id=task.task_id if task else "none",
)
if is_continue is not None and not await is_continue(incremental_scraped):
return
2024-08-06 13:30:52 +08:00
if previous_num == current_num:
break
previous_num = current_num
else:
LOG.warning("Timeout to load all options, maybe some options will be missed")
2025-06-27 21:26:21 -04:00
# scroll back to the start point and wait for a while to make all options invisible on the page
2024-08-06 13:30:52 +08:00
if dropdown_menu_element_handle is None:
2024-09-09 11:34:09 +08:00
LOG.info("element handle is None, using mouse to scroll back", element_id=scrollable_element.get_id())
2024-08-06 13:30:52 +08:00
await page.mouse.wheel(0, -scroll_pace)
else:
await skyvern_frame.scroll_to_element_top(dropdown_menu_element_handle)
await skyvern_frame.safe_wait_for_animation_end(before_wait_sec=0.5)
2024-08-06 13:30:52 +08:00
2024-06-25 01:46:54 +08:00
async def normal_select(
action: actions.SelectOptionAction,
skyvern_element: SkyvernElement,
2024-11-22 11:45:04 +08:00
task: Task,
step: Step,
builder: ElementTreeBuilder,
2024-06-25 01:46:54 +08:00
) -> List[ActionResult]:
2024-07-16 01:41:56 +08:00
try:
current_text = await skyvern_element.get_attr("selected")
2025-05-23 01:30:35 -07:00
if current_text and (current_text == action.option.label or current_text == action.option.value):
2024-07-16 01:41:56 +08:00
return [ActionSuccess()]
except Exception:
LOG.info("failed to confirm if the select option has been done, force to take the action again.")
2024-06-25 01:46:54 +08:00
action_result: List[ActionResult] = []
is_success = False
2024-07-16 01:41:56 +08:00
locator = skyvern_element.get_locator()
input_or_select_context = await _get_input_or_select_context(
action=action,
element_tree_builder=builder,
step=step,
skyvern_element=skyvern_element,
2024-11-22 11:45:04 +08:00
)
LOG.info(
"Parsed input/select context",
context=input_or_select_context,
task_id=task.task_id,
step_id=step.step_id,
)
await skyvern_element.refresh_select_options()
2024-11-22 11:45:04 +08:00
options_html = skyvern_element.build_HTML()
2025-02-03 19:19:39 +08:00
field_information = (
input_or_select_context.field if not input_or_select_context.intention else input_or_select_context.intention
)
2024-11-22 11:45:04 +08:00
prompt = prompt_engine.load_prompt(
"normal-select",
2025-02-03 19:19:39 +08:00
field_information=field_information,
2024-11-22 11:45:04 +08:00
required_field=input_or_select_context.is_required,
navigation_goal=task.navigation_goal,
navigation_payload_str=json.dumps(task.navigation_payload),
options=options_html,
local_datetime=datetime.now(skyvern_context.ensure_context().tz_info).isoformat(),
2024-11-22 11:45:04 +08:00
)
json_response = await app.SELECT_AGENT_LLM_API_HANDLER(prompt=prompt, step=step, prompt_name="normal-select")
2024-11-22 11:45:04 +08:00
index: int | None = json_response.get("index")
value: str | None = json_response.get("value")
if not is_success and value is not None:
2024-06-25 01:46:54 +08:00
try:
# click by value (if it matches)
await locator.select_option(
2024-11-22 11:45:04 +08:00
value=value,
timeout=settings.BROWSER_ACTION_TIMEOUT_MS,
2024-06-25 01:46:54 +08:00
)
is_success = True
action_result.append(ActionSuccess())
except Exception:
action_result.append(ActionFailure(FailToSelectByValue(action.element_id)))
2024-11-22 11:45:04 +08:00
LOG.info(
2024-06-25 01:46:54 +08:00
"Failed to take select action by value",
exc_info=True,
action=action,
locator=locator,
2024-06-25 01:46:54 +08:00
)
2025-09-11 13:10:02 +08:00
if not is_success and value is not None:
try:
# click by label (if it matches)
await locator.select_option(
label=value,
timeout=settings.BROWSER_ACTION_TIMEOUT_MS,
)
is_success = True
action_result.append(ActionSuccess())
except Exception:
action_result.append(ActionFailure(FailToSelectByLabel(action.element_id)))
LOG.info(
"Failed to take select action by label",
exc_info=True,
action=action,
locator=locator,
)
2024-11-22 11:45:04 +08:00
if not is_success and index is not None:
if index >= len(skyvern_element.get_options()):
2024-06-25 01:46:54 +08:00
action_result.append(ActionFailure(OptionIndexOutOfBound(action.element_id)))
2024-11-22 11:45:04 +08:00
LOG.info(
2024-06-25 01:46:54 +08:00
"option index is out of bound",
action=action,
locator=locator,
2024-06-25 01:46:54 +08:00
)
else:
try:
# This means the supplied index was for the select element, not a reference to the css dict
2024-06-25 01:46:54 +08:00
await locator.select_option(
2024-11-22 11:45:04 +08:00
index=index,
timeout=settings.BROWSER_ACTION_TIMEOUT_MS,
2024-06-25 01:46:54 +08:00
)
is_success = True
action_result.append(ActionSuccess())
except Exception:
action_result.append(ActionFailure(FailToSelectByIndex(action.element_id)))
2024-11-22 11:45:04 +08:00
LOG.info(
2024-06-25 01:46:54 +08:00
"Failed to click on the option by index",
exc_info=True,
action=action,
locator=locator,
2024-06-25 01:46:54 +08:00
)
if len(action_result) == 0:
action_result.append(ActionFailure(EmptySelect(element_id=action.element_id)))
return action_result
def get_anchor_to_click(scraped_page: ScrapedPage, element_id: str) -> str | None:
"""
Get the anchor tag under the label to click
"""
LOG.info("Getting anchor tag to click", element_id=element_id)
for ele in scraped_page.elements:
if "id" in ele and ele["id"] == element_id:
for child in ele["children"]:
if "tagName" in child and child["tagName"] == "a":
return scraped_page.id_to_css_dict[child["id"]]
return None
def get_select_id_in_label_children(scraped_page: ScrapedPage, element_id: str) -> str | None:
2024-04-26 17:51:50 +08:00
"""
search <select> in the children of <label>
"""
LOG.info("Searching select in the label children", element_id=element_id)
element = scraped_page.id_to_element_dict.get(element_id, None)
if element is None:
return None
for child in element.get("children", []):
if child.get("tagName", "") == "select":
return child.get("id", None)
return None
def get_checkbox_id_in_label_children(scraped_page: ScrapedPage, element_id: str) -> str | None:
2024-05-15 02:15:18 +08:00
"""
search checkbox/radio in the children of <label>
"""
LOG.info("Searching checkbox/radio in the label children", element_id=element_id)
element = scraped_page.id_to_element_dict.get(element_id, None)
if element is None:
return None
for child in element.get("children", []):
if child.get("tagName", "") == "input" and child.get("attributes", {}).get("type") in ["checkbox", "radio"]:
return child.get("id", None)
return None
async def extract_information_for_navigation_goal(
task: Task,
step: Step,
scraped_page: ScrapedPage,
) -> ScrapeResult:
"""
Scrapes a webpage and returns the scraped response, including:
1. JSON representation of what the user is seeing
2. The scraped page
"""
scraped_page_refreshed = await scraped_page.refresh()
2024-12-16 11:22:51 +08:00
context = ensure_context()
extract_information_prompt = load_prompt_with_elements(
element_tree_builder=scraped_page_refreshed,
prompt_engine=prompt_engine,
template_name="extract-information",
html_need_skyvern_attrs=False,
navigation_goal=task.navigation_goal,
navigation_payload=task.navigation_payload,
previous_extracted_information=task.extracted_information,
data_extraction_goal=task.data_extraction_goal,
extracted_information_schema=task.extracted_information_schema,
current_url=scraped_page_refreshed.url,
extracted_text=scraped_page_refreshed.extracted_text,
error_code_mapping_str=(json.dumps(task.error_code_mapping) if task.error_code_mapping else None),
2024-12-16 11:22:51 +08:00
local_datetime=datetime.now(context.tz_info).isoformat(),
)
2025-05-31 11:11:25 -07:00
llm_key_override = task.llm_key
if await service_utils.is_cua_task(task=task):
2025-05-31 11:11:25 -07:00
# CUA tasks should use the default data extraction llm key
llm_key_override = None
# Use the appropriate LLM handler based on the feature flag
llm_api_handler = LLMAPIHandlerFactory.get_override_llm_api_handler(
llm_key_override, default=app.EXTRACTION_LLM_API_HANDLER
)
2025-06-11 08:23:44 -07:00
json_response = await llm_api_handler(
prompt=extract_information_prompt,
2024-03-16 23:13:18 -07:00
step=step,
screenshots=scraped_page.screenshots,
prompt_name="extract-information",
)
return ScrapeResult(
scraped_data=json_response,
)
async def click_listbox_option(
scraped_page: ScrapedPage,
page: Page,
action: actions.SelectOptionAction,
listbox_element_id: str,
) -> bool:
listbox_element = scraped_page.id_to_element_dict.get(listbox_element_id)
if listbox_element is None:
return False
# this is a listbox element, get all the children
if "children" not in listbox_element:
return False
LOG.info("starting bfs", listbox_element_id=listbox_element_id)
bfs_queue = [child for child in listbox_element["children"]]
while bfs_queue:
child = bfs_queue.pop(0)
LOG.info("popped child", element_id=child["id"])
if "attributes" in child and "role" in child["attributes"] and child["attributes"]["role"] == "option":
LOG.info("found option", element_id=child["id"])
text = child["text"] if "text" in child else ""
if text and (text == action.option.label or text == action.option.value):
dom = DomUtil(scraped_page=scraped_page, page=page)
try:
skyvern_element = await dom.get_skyvern_element_by_id(child["id"])
locator = skyvern_element.locator
await locator.click(timeout=1000)
return True
except Exception:
LOG.error(
"Failed to click on the option",
action=action,
exc_info=True,
)
if "children" in child:
bfs_queue.extend(child["children"])
return False
2024-07-10 00:04:03 +08:00
async def get_input_value(tag_name: str, locator: Locator) -> str | None:
if tag_name in COMMON_INPUT_TAGS:
return await locator.input_value()
# for span, div, p or other tags:
return await locator.inner_text()
class AbstractActionForContextParse(BaseModel):
reasoning: str | None
element_id: str
intention: str | None
2025-04-28 16:16:36 +08:00
async def _get_input_or_select_context(
action: InputTextAction | SelectOptionAction | AbstractActionForContextParse,
skyvern_element: SkyvernElement,
element_tree_builder: ElementTreeBuilder,
step: Step,
ancestor_depth: int = 5,
2025-04-28 16:16:36 +08:00
) -> InputOrSelectContext:
# Early return optimization: if action already has input_or_select_context, use it
if not isinstance(action, AbstractActionForContextParse) and action.input_or_select_context is not None:
return action.input_or_select_context
# Ancestor depth optimization: use ancestor element for deep DOM structures
skyvern_frame = await SkyvernFrame.create_instance(skyvern_element.get_frame())
try:
depth = await skyvern_frame.get_element_dom_depth(await skyvern_element.get_element_handler())
except Exception:
LOG.warning("Failed to get element depth, using the original element tree", exc_info=True)
depth = 0
if depth > ancestor_depth:
# use ancestor to build the context
path = "/".join([".."] * ancestor_depth)
locator = skyvern_element.get_locator().locator(path)
try:
element_handle = await locator.element_handle(timeout=settings.BROWSER_ACTION_TIMEOUT_MS)
if element_handle is not None:
elements, element_tree = await skyvern_frame.build_tree_from_element(
starter=element_handle,
frame=skyvern_element.get_frame_id(),
)
2025-09-11 18:10:05 -07:00
clean_up_func = app.AGENT_FUNCTION.cleanup_element_tree_factory(step=step)
element_tree = await clean_up_func(skyvern_element.get_frame(), "", copy.deepcopy(element_tree))
element_tree_trimmed = trim_element_tree(copy.deepcopy(element_tree))
element_tree_builder = ScrapedPage(
elements=elements,
element_tree=element_tree,
element_tree_trimmed=element_tree_trimmed,
_browser_state=None,
_clean_up_func=None,
_scrape_exclude=None,
)
except Exception:
LOG.warning("Failed to get sub element tree, using the original element tree", exc_info=True, path=path)
2025-04-28 16:16:36 +08:00
prompt = load_prompt_with_elements(
element_tree_builder=element_tree_builder,
2025-04-28 16:16:36 +08:00
prompt_engine=prompt_engine,
template_name="parse-input-or-select-context",
action_reasoning=action.reasoning,
element_id=action.element_id,
)
# Use centralized parse-select handler (set at init or via scripts)
json_response = await app.PARSE_SELECT_LLM_API_HANDLER(
2025-04-28 16:16:36 +08:00
prompt=prompt, step=step, prompt_name="parse-input-or-select-context"
)
json_response["intention"] = action.intention
input_or_select_context = InputOrSelectContext.model_validate(json_response)
LOG.info(
"Parsed input/select context",
context=input_or_select_context,
)
return input_or_select_context
async def extract_user_defined_errors(
task: Task, step: Step, scraped_page: ScrapedPage, reasoning: str | None = None
) -> list[UserDefinedError]:
action_history = await get_action_history(task=task, current_step=step)
scraped_page_refreshed = await scraped_page.refresh(draw_boxes=False)
prompt = prompt_engine.load_prompt(
"surface-user-defined-errors",
navigation_goal=task.navigation_goal,
navigation_payload_str=json.dumps(task.navigation_payload),
elements=scraped_page_refreshed.build_element_tree(fmt=ElementTreeFormat.HTML),
2025-09-12 11:13:13 +08:00
current_url=scraped_page_refreshed.url,
action_history=json.dumps(action_history),
2025-09-12 11:13:13 +08:00
error_code_mapping_str=json.dumps(task.error_code_mapping) if task.error_code_mapping else "{}",
local_datetime=datetime.now(skyvern_context.ensure_context().tz_info).isoformat(),
reasoning=reasoning,
)
json_response = await app.EXTRACTION_LLM_API_HANDLER(
prompt=prompt,
screenshots=scraped_page_refreshed.screenshots,
step=step,
prompt_name="surface-user-defined-errors",
)
return [UserDefinedError.model_validate(error) for error in json_response.get("errors", [])]