import asyncio
import json
import os
import uuid
from typing import Any, Awaitable, Callable, List
import structlog
from deprecation import deprecated
from playwright.async_api import Locator, Page, TimeoutError
from skyvern.constants import INPUT_TEXT_TIMEOUT, REPO_ROOT_DIR
from skyvern.exceptions import (
ImaginaryFileUrl,
InputActionOnSelect2Dropdown,
InvalidElementForTextInput,
MissingElement,
MissingFileUrl,
MultipleElementsFound,
)
from skyvern.forge import app
from skyvern.forge.prompts import prompt_engine
from skyvern.forge.sdk.api.files import (
download_file,
get_number_of_files_in_directory,
get_path_for_workflow_download_directory,
)
from skyvern.forge.sdk.models import Step
from skyvern.forge.sdk.schemas.tasks import Task
from skyvern.forge.sdk.services.bitwarden import BitwardenConstants
from skyvern.forge.sdk.settings_manager import SettingsManager
from skyvern.webeye.actions import actions
from skyvern.webeye.actions.actions import (
Action,
ActionType,
CheckboxAction,
ClickAction,
ScrapeResult,
SelectOptionAction,
UploadFileAction,
WebAction,
)
from skyvern.webeye.actions.responses import ActionFailure, ActionResult, ActionSuccess
from skyvern.webeye.browser_factory import BrowserState
from skyvern.webeye.scraper.scraper import ScrapedPage
from skyvern.webeye.utils.dom import DomUtil, InteractiveElement, Select2Dropdown, resolve_locator
LOG = structlog.get_logger()
TEXT_INPUT_DELAY = 10 # 10ms between each character input
COMMON_INPUT_TAGS = {"input", "textarea", "select"}
class ActionHandler:
_handled_action_types: dict[
ActionType,
Callable[[Action, Page, ScrapedPage, Task, Step], Awaitable[list[ActionResult]]],
] = {}
_setup_action_types: dict[
ActionType,
Callable[[Action, Page, ScrapedPage, Task, Step], Awaitable[list[ActionResult]]],
] = {}
_teardown_action_types: dict[
ActionType,
Callable[[Action, Page, ScrapedPage, Task, Step], Awaitable[list[ActionResult]]],
] = {}
@classmethod
def register_action_type(
cls,
action_type: ActionType,
handler: Callable[[Action, Page, ScrapedPage, Task, Step], Awaitable[list[ActionResult]]],
) -> None:
cls._handled_action_types[action_type] = handler
@classmethod
def register_setup_for_action_type(
cls,
action_type: ActionType,
handler: Callable[[Action, Page, ScrapedPage, Task, Step], Awaitable[list[ActionResult]]],
) -> None:
cls._setup_action_types[action_type] = handler
@classmethod
def register_teardown_for_action_type(
cls,
action_type: ActionType,
handler: Callable[[Action, Page, ScrapedPage, Task, Step], Awaitable[list[ActionResult]]],
) -> None:
cls._teardown_action_types[action_type] = handler
@staticmethod
async def handle_action(
scraped_page: ScrapedPage,
task: Task,
step: Step,
browser_state: BrowserState,
action: Action,
) -> list[ActionResult]:
LOG.info("Handling action", action=action)
page = await browser_state.get_or_create_page()
try:
if action.action_type in ActionHandler._handled_action_types:
actions_result: list[ActionResult] = []
# do setup before action handler
if setup := ActionHandler._setup_action_types.get(action.action_type):
results = await setup(action, page, scraped_page, task, step)
actions_result.extend(results)
if results and results[-1] != ActionSuccess:
return actions_result
# do the handler
handler = ActionHandler._handled_action_types[action.action_type]
results = await handler(action, page, scraped_page, task, step)
actions_result.extend(results)
if not results or type(actions_result[-1]) != ActionSuccess:
return actions_result
# do the teardown
teardown = ActionHandler._teardown_action_types.get(action.action_type)
if not teardown:
return actions_result
results = await teardown(action, page, scraped_page, task, step)
actions_result.extend(results)
return actions_result
else:
LOG.error(
"Unsupported action type in handler",
action=action,
type=type(action),
)
return [ActionFailure(Exception(f"Unsupported action type: {type(action)}"))]
except MissingElement as e:
LOG.info(
"Known exceptions",
action=action,
exception_type=type(e),
exception_message=str(e),
)
return [ActionFailure(e)]
except MultipleElementsFound as e:
LOG.exception(
"Cannot handle multiple elements with the same xpath in one action.",
action=action,
)
return [ActionFailure(e)]
except Exception as e:
LOG.exception("Unhandled exception in action handler", action=action)
return [ActionFailure(e)]
async def handle_solve_captcha_action(
action: actions.SolveCaptchaAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
LOG.warning(
"Please solve the captcha on the page, you have 30 seconds",
action=action,
)
await asyncio.sleep(30)
return [ActionSuccess()]
async def handle_click_action(
action: actions.ClickAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
num_downloaded_files_before = 0
download_dir = None
if task.workflow_run_id:
download_dir = get_path_for_workflow_download_directory(task.workflow_run_id)
num_downloaded_files_before = get_number_of_files_in_directory(download_dir)
LOG.info(
"Number of files in download directory before click",
num_downloaded_files_before=num_downloaded_files_before,
download_dir=download_dir,
)
xpath, frame = await validate_actions_in_dom(action, page, scraped_page)
await asyncio.sleep(0.3)
if action.download:
results = await handle_click_to_download_file_action(action, page, scraped_page)
else:
results = await chain_click(
task,
scraped_page,
page,
action,
xpath,
frame,
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
)
if results and task.workflow_run_id and download_dir:
LOG.info("Sleeping for 5 seconds to let the download finish")
await asyncio.sleep(5)
num_downloaded_files_after = get_number_of_files_in_directory(download_dir)
LOG.info(
"Number of files in download directory after click",
num_downloaded_files_after=num_downloaded_files_after,
download_dir=download_dir,
)
if num_downloaded_files_after > num_downloaded_files_before:
results[-1].download_triggered = True
return results
async def handle_click_to_download_file_action(
action: actions.ClickAction,
page: Page,
scraped_page: ScrapedPage,
) -> list[ActionResult]:
xpath, frame = await validate_actions_in_dom(action, page, scraped_page)
locator = resolve_locator(scraped_page, page, frame, xpath)
try:
await locator.click(
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
modifiers=["Alt"],
)
except Exception as e:
LOG.exception("ClickAction with download failed", action=action, exc_info=True)
return [ActionFailure(e, download_triggered=False)]
return [ActionSuccess()]
async def handle_input_text_action(
action: actions.InputTextAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
dom = DomUtil(scraped_page, page)
skyvern_element = await dom.get_skyvern_element_by_id(action.element_id)
if await skyvern_element.is_select2_dropdown():
return [ActionFailure(InputActionOnSelect2Dropdown(element_id=action.element_id))]
xpath, frame = await validate_actions_in_dom(action, page, scraped_page)
locator = resolve_locator(scraped_page, page, frame, xpath)
current_text = await get_input_value(locator)
if current_text == action.text:
return [ActionSuccess()]
# before filling text, we need to validate if the element can be filled if it's not one of COMMON_INPUT_TAGS
tag_name = scraped_page.id_to_element_dict[action.element_id]["tagName"].lower()
text = get_actual_value_of_parameter_if_secret(task, action.text)
try:
await locator.clear(timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS)
except TimeoutError:
LOG.info("None input tag clear timeout", action=action)
return [ActionFailure(InvalidElementForTextInput(element_id=action.element_id, tag_name=tag_name))]
except Exception:
LOG.warning("Failed to clear the input field", action=action, exc_info=True)
return [ActionFailure(InvalidElementForTextInput(element_id=action.element_id, tag_name=tag_name))]
if tag_name not in COMMON_INPUT_TAGS:
await locator.fill(text, timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS)
return [ActionSuccess()]
# If the input is a text input, we type the text character by character
# 3 times the time it takes to type the text so it has time to finish typing
await locator.press_sequentially(text, timeout=INPUT_TEXT_TIMEOUT)
return [ActionSuccess()]
async def handle_upload_file_action(
action: actions.UploadFileAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
if not action.file_url:
LOG.warning("InputFileAction has no file_url", action=action)
return [ActionFailure(MissingFileUrl())]
# ************************************************************************************************************** #
# After this point if the file_url is a secret, it will be replaced with the actual value
# In order to make sure we don't log the secret value, we log the action with the original value action.file_url
# ************************************************************************************************************** #
file_url = get_actual_value_of_parameter_if_secret(task, action.file_url)
if file_url not in str(task.navigation_payload):
LOG.warning(
"LLM might be imagining the file url, which is not in navigation payload",
action=action,
file_url=action.file_url,
)
return [ActionFailure(ImaginaryFileUrl(action.file_url))]
xpath, frame = await validate_actions_in_dom(action, page, scraped_page)
file_path = await download_file(file_url)
locator = resolve_locator(scraped_page, page, frame, xpath)
is_file_input = await is_file_input_element(locator)
if is_file_input:
LOG.info("Taking UploadFileAction. Found file input tag", action=action)
if file_path:
locator = resolve_locator(scraped_page, page, frame, xpath)
await locator.set_input_files(
file_path,
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
)
# Sleep for 10 seconds after uploading a file to let the page process it
await asyncio.sleep(10)
return [ActionSuccess()]
else:
return [ActionFailure(Exception(f"Failed to download file from {action.file_url}"))]
else:
LOG.info("Taking UploadFileAction. Found non file input tag", action=action)
# treat it as a click action
action.is_upload_file_tag = False
return await chain_click(
task,
scraped_page,
page,
action,
xpath,
frame,
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
)
@deprecated("This function is deprecated. Downloads are handled by the click action handler now.")
async def handle_download_file_action(
action: actions.DownloadFileAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
xpath, frame = await validate_actions_in_dom(action, page, scraped_page)
file_name = f"{action.file_name or uuid.uuid4()}"
full_file_path = f"{REPO_ROOT_DIR}/downloads/{task.workflow_run_id or task.task_id}/{file_name}"
try:
# Start waiting for the download
async with page.expect_download() as download_info:
await asyncio.sleep(0.3)
locator = resolve_locator(scraped_page, page, frame, xpath)
await locator.click(
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
modifiers=["Alt"],
)
download = await download_info.value
# Create download folders if they don't exist
download_folder = f"{REPO_ROOT_DIR}/downloads/{task.workflow_run_id or task.task_id}"
os.makedirs(download_folder, exist_ok=True)
# Wait for the download process to complete and save the downloaded file
await download.save_as(full_file_path)
except Exception as e:
LOG.exception(
"DownloadFileAction: Failed to download file",
action=action,
full_file_path=full_file_path,
)
return [ActionFailure(e)]
return [ActionSuccess(data={"file_path": full_file_path})]
async def handle_null_action(
action: actions.NullAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
return [ActionSuccess()]
async def handle_select_option_action(
action: actions.SelectOptionAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> list[ActionResult]:
dom = DomUtil(scraped_page, page)
skyvern_element = await dom.get_skyvern_element_by_id(action.element_id)
xpath, frame = await validate_actions_in_dom(action, page, scraped_page)
locator = resolve_locator(scraped_page, page, frame, xpath)
tag_name = await get_tag_name_lowercase(locator)
element_dict = scraped_page.id_to_element_dict[action.element_id]
LOG.info(
"SelectOptionAction",
action=action,
tag_name=tag_name,
element_dict=element_dict,
)
# if element is not a select option, prioritize clicking the linked element if any
if tag_name != "select" and "linked_element" in element_dict:
LOG.info(
"SelectOptionAction is not on a select tag and found a linked element",
action=action,
linked_element=element_dict["linked_element"],
)
listbox_click_success = await click_listbox_option(scraped_page, page, action, element_dict["linked_element"])
if listbox_click_success:
LOG.info(
"Successfully clicked linked element",
action=action,
linked_element=element_dict["linked_element"],
)
return [ActionSuccess()]
LOG.warning(
"Failed to click linked element",
action=action,
linked_element=element_dict["linked_element"],
)
# check if the element is an a tag first. If yes, click it instead of selecting the option
if tag_name == "label":
# label pointed to select2 element
select2_element_id: str | None = None
# search anchor first and then search anchor
select2_element_id = skyvern_element.find_element_id_in_label_children(InteractiveElement.A)
if select2_element_id is None:
select2_element_id = skyvern_element.find_element_id_in_label_children(InteractiveElement.INPUT)
if select2_element_id is not None:
select2_skyvern_element = await dom.get_skyvern_element_by_id(element_id=select2_element_id)
if await select2_skyvern_element.is_select2_dropdown():
LOG.info(
"SelectOptionAction is on