general selection (#675)
This commit is contained in:
@@ -3,7 +3,7 @@ import json
|
||||
import os
|
||||
import urllib.parse
|
||||
import uuid
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any, Awaitable, Callable, List
|
||||
|
||||
import structlog
|
||||
@@ -20,12 +20,13 @@ from skyvern.exceptions import (
|
||||
FailToSelectByLabel,
|
||||
FailToSelectByValue,
|
||||
ImaginaryFileUrl,
|
||||
InputActionOnSelect2Dropdown,
|
||||
InvalidElementForTextInput,
|
||||
MissingElement,
|
||||
MissingFileUrl,
|
||||
MultipleElementsFound,
|
||||
NoSelectableElementFound,
|
||||
NoElementMatchedForTargetOption,
|
||||
NoIncrementalElementFoundForCustomSelection,
|
||||
NoLabelOrValueForCustomSelection,
|
||||
OptionIndexOutOfBound,
|
||||
WrongElementToUploadFile,
|
||||
)
|
||||
@@ -36,6 +37,7 @@ from skyvern.forge.sdk.api.files import (
|
||||
get_number_of_files_in_directory,
|
||||
get_path_for_workflow_download_directory,
|
||||
)
|
||||
from skyvern.forge.sdk.api.llm.api_handler_factory import LLMAPIHandler
|
||||
from skyvern.forge.sdk.core.aiohttp_helper import aiohttp_post
|
||||
from skyvern.forge.sdk.core.security import generate_skyvern_signature
|
||||
from skyvern.forge.sdk.db.enums import OrganizationAuthTokenType
|
||||
@@ -56,8 +58,8 @@ from skyvern.webeye.actions.actions import (
|
||||
)
|
||||
from skyvern.webeye.actions.responses import ActionFailure, ActionResult, ActionSuccess
|
||||
from skyvern.webeye.browser_factory import BrowserState, get_download_dir
|
||||
from skyvern.webeye.scraper.scraper import ElementTreeFormat, ScrapedPage
|
||||
from skyvern.webeye.utils.dom import AbstractSelectDropdown, DomUtil, SkyvernElement
|
||||
from skyvern.webeye.scraper.scraper import ElementTreeFormat, IncrementalScrapePage, ScrapedPage
|
||||
from skyvern.webeye.utils.dom import DomUtil, InteractiveElement, SkyvernElement
|
||||
from skyvern.webeye.utils.page import SkyvernFrame
|
||||
|
||||
LOG = structlog.get_logger()
|
||||
@@ -286,8 +288,6 @@ async def handle_input_text_action(
|
||||
) -> list[ActionResult]:
|
||||
dom = DomUtil(scraped_page, page)
|
||||
skyvern_element = await dom.get_skyvern_element_by_id(action.element_id)
|
||||
if await skyvern_element.is_select2_dropdown():
|
||||
return [ActionFailure(InputActionOnSelect2Dropdown(element_id=action.element_id))]
|
||||
|
||||
current_text = await get_input_value(skyvern_element.get_tag_name(), skyvern_element.get_locator())
|
||||
if current_text == action.text:
|
||||
@@ -469,122 +469,21 @@ async def handle_select_option_action(
|
||||
)
|
||||
return [ActionFailure(ErrFoundSelectableElement(action.element_id, e))]
|
||||
|
||||
if selectable_child is None:
|
||||
LOG.error(
|
||||
"No selectable element found in chidren",
|
||||
tag_name=tag_name,
|
||||
action=action,
|
||||
)
|
||||
return [ActionFailure(NoSelectableElementFound(action.element_id))]
|
||||
|
||||
LOG.info(
|
||||
"Found selectable element in the children",
|
||||
tag_name=selectable_child.get_tag_name(),
|
||||
element_id=selectable_child.get_id(),
|
||||
)
|
||||
select_action = SelectOptionAction(element_id=selectable_child.get_id(), option=action.option)
|
||||
return await handle_select_option_action(select_action, page, scraped_page, task, step)
|
||||
|
||||
select_framework: AbstractSelectDropdown | None = None
|
||||
|
||||
if await skyvern_element.is_combobox_dropdown():
|
||||
LOG.info(
|
||||
"This is a combobox dropdown",
|
||||
action=action,
|
||||
)
|
||||
select_framework = await skyvern_element.get_combobox_dropdown()
|
||||
if await skyvern_element.is_select2_dropdown():
|
||||
LOG.info(
|
||||
"This is a select2 dropdown",
|
||||
action=action,
|
||||
)
|
||||
select_framework = await skyvern_element.get_select2_dropdown()
|
||||
if await skyvern_element.is_react_select_dropdown():
|
||||
LOG.info(
|
||||
"This is a react select dropdown",
|
||||
action=action,
|
||||
)
|
||||
select_framework = await skyvern_element.get_react_select_dropdown()
|
||||
|
||||
if select_framework is not None:
|
||||
timeout = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
|
||||
|
||||
try:
|
||||
current_value = await select_framework.get_current_value()
|
||||
if current_value == action.option.label or current_value == action.option.value:
|
||||
return [ActionSuccess()]
|
||||
except Exception:
|
||||
if selectable_child:
|
||||
LOG.info(
|
||||
"failed to confirm if the select option has been done, force to take the action again.",
|
||||
exc_info=True,
|
||||
"Found selectable element in the children",
|
||||
tag_name=selectable_child.get_tag_name(),
|
||||
element_id=selectable_child.get_id(),
|
||||
)
|
||||
select_action = SelectOptionAction(element_id=selectable_child.get_id(), option=action.option)
|
||||
return await handle_select_option_action(select_action, page, scraped_page, task, step)
|
||||
|
||||
await select_framework.open()
|
||||
options = await select_framework.get_options()
|
||||
|
||||
result: List[ActionResult] = []
|
||||
# select by label first, then by index
|
||||
if action.option.label is not None or action.option.value is not None:
|
||||
try:
|
||||
for option in options:
|
||||
option_content = option.get("text")
|
||||
option_index = option.get("optionIndex", None)
|
||||
if option_index is None:
|
||||
LOG.warning(
|
||||
f"{select_framework.name()} option index is None",
|
||||
option=option,
|
||||
)
|
||||
continue
|
||||
if action.option.label == option_content or action.option.value == option_content:
|
||||
await select_framework.select_by_index(index=option_index, timeout=timeout)
|
||||
result.append(ActionSuccess())
|
||||
return result
|
||||
LOG.info(
|
||||
f"no target {select_framework.name()} option matched by label, try to select by index",
|
||||
action=action,
|
||||
)
|
||||
except Exception as e:
|
||||
result.append(ActionFailure(e))
|
||||
LOG.info(
|
||||
f"failed to select by label in {select_framework.name()}, try to select by index",
|
||||
exc_info=True,
|
||||
action=action,
|
||||
)
|
||||
|
||||
if action.option.index is not None:
|
||||
if action.option.index >= len(options):
|
||||
result.append(ActionFailure(OptionIndexOutOfBound(action.element_id)))
|
||||
else:
|
||||
try:
|
||||
option_content = options[action.option.index].get("text")
|
||||
if option_content != action.option.label:
|
||||
LOG.warning(
|
||||
"Select option label is not consistant to the action value. Might select wrong option.",
|
||||
option_content=option_content,
|
||||
action=action,
|
||||
)
|
||||
await select_framework.select_by_index(index=action.option.index, timeout=timeout)
|
||||
result.append(ActionSuccess())
|
||||
return result
|
||||
except Exception:
|
||||
result.append(ActionFailure(FailToSelectByIndex(action.element_id)))
|
||||
LOG.info(
|
||||
f"failed to select by index in {select_framework.name()}",
|
||||
exc_info=True,
|
||||
action=action,
|
||||
)
|
||||
|
||||
if len(result) == 0:
|
||||
result.append(ActionFailure(EmptySelect(action.element_id)))
|
||||
|
||||
if isinstance(result[-1], ActionFailure):
|
||||
LOG.info(
|
||||
f"Failed to select a {select_framework.name()} option, close the dropdown",
|
||||
action=action,
|
||||
)
|
||||
await select_framework.close()
|
||||
|
||||
return result
|
||||
if tag_name == InteractiveElement.SELECT:
|
||||
LOG.info(
|
||||
"SelectOptionAction is on <select>",
|
||||
action=action,
|
||||
)
|
||||
return await normal_select(action=action, skyvern_element=skyvern_element)
|
||||
|
||||
if await skyvern_element.is_checkbox():
|
||||
LOG.info(
|
||||
@@ -602,7 +501,99 @@ async def handle_select_option_action(
|
||||
click_action = ClickAction(element_id=action.element_id)
|
||||
return await chain_click(task, scraped_page, page, click_action, skyvern_element)
|
||||
|
||||
return await normal_select(action=action, skyvern_element=skyvern_element)
|
||||
LOG.info(
|
||||
"Trigger custom select",
|
||||
action=action,
|
||||
)
|
||||
|
||||
timeout = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
|
||||
skyvern_frame = await SkyvernFrame.create_instance(skyvern_element.get_frame())
|
||||
incremental_scraped = IncrementalScrapePage(skyvern_frame=skyvern_frame)
|
||||
llm_handler = app.SECONDARY_LLM_API_HANDLER
|
||||
is_open = False
|
||||
|
||||
try:
|
||||
await incremental_scraped.start_listen_dom_increment()
|
||||
await skyvern_element.get_locator().focus(timeout=timeout)
|
||||
|
||||
if tag_name == InteractiveElement.INPUT:
|
||||
await skyvern_element.get_locator().press("ArrowDown", timeout=timeout)
|
||||
else:
|
||||
await skyvern_element.get_locator().click(timeout=timeout)
|
||||
|
||||
# wait 5s for options to load
|
||||
await asyncio.sleep(5)
|
||||
is_open = True
|
||||
|
||||
incremental_element = await incremental_scraped.get_incremental_element_tree(
|
||||
app.AGENT_FUNCTION.cleanup_element_tree
|
||||
)
|
||||
if len(incremental_element) == 0:
|
||||
raise NoIncrementalElementFoundForCustomSelection(element_id=action.element_id)
|
||||
|
||||
dropdown_menu_element = await locate_dropdown_meanu(
|
||||
incremental_scraped=incremental_scraped,
|
||||
element_trees=incremental_element,
|
||||
llm_handler=llm_handler,
|
||||
step=step,
|
||||
task=task,
|
||||
)
|
||||
|
||||
if dropdown_menu_element and dropdown_menu_element.get_scrollable():
|
||||
await scroll_down_to_load_all_options(
|
||||
dropdown_menu_element=dropdown_menu_element,
|
||||
skyvern_frame=skyvern_frame,
|
||||
page=page,
|
||||
incremental_scraped=incremental_scraped,
|
||||
step=step,
|
||||
task=task,
|
||||
)
|
||||
|
||||
await incremental_scraped.get_incremental_element_tree(app.AGENT_FUNCTION.cleanup_element_tree)
|
||||
# TODO: maybe take a screenshot for every tree head element to figure out which is the dropdown menu
|
||||
html = incremental_scraped.build_html_tree()
|
||||
|
||||
target_value = action.option.label or action.option.value
|
||||
if target_value is None:
|
||||
raise NoLabelOrValueForCustomSelection(element_id=action.element_id)
|
||||
|
||||
prompt = prompt_engine.load_prompt(
|
||||
"custom-select", context_reasoning=action.reasoning, target_value=target_value, elements=html
|
||||
)
|
||||
|
||||
LOG.info(
|
||||
"Calling LLM to find the match element",
|
||||
target_value=target_value,
|
||||
step_id=step.step_id,
|
||||
task_id=task.task_id,
|
||||
)
|
||||
json_response = await llm_handler(prompt=prompt, step=step)
|
||||
LOG.info(
|
||||
"LLM response for the matched element",
|
||||
target_value=target_value,
|
||||
response=json_response,
|
||||
step_id=step.step_id,
|
||||
task_id=task.task_id,
|
||||
)
|
||||
|
||||
element_id: str | None = json_response.get("id", None)
|
||||
if not element_id:
|
||||
raise NoElementMatchedForTargetOption(target=target_value, reason=json_response.get("reasoning"))
|
||||
|
||||
selected_element = await SkyvernElement.create_from_incremental(incremental_scraped, element_id)
|
||||
await selected_element.scroll_into_view()
|
||||
await selected_element.get_locator().click(timeout=timeout)
|
||||
return [ActionSuccess()]
|
||||
|
||||
except Exception as e:
|
||||
if is_open:
|
||||
await skyvern_element.scroll_into_view()
|
||||
await skyvern_element.coordinate_click(page=page)
|
||||
await skyvern_element.get_locator().press("Escape", timeout=timeout)
|
||||
LOG.exception("custome select error")
|
||||
return [ActionFailure(exception=e)]
|
||||
finally:
|
||||
await incremental_scraped.stop_listen_dom_increment()
|
||||
|
||||
|
||||
async def handle_checkbox_action(
|
||||
@@ -836,6 +827,113 @@ async def chain_click(
|
||||
return [ActionFailure(WrongElementToUploadFile(action.element_id))]
|
||||
|
||||
|
||||
async def locate_dropdown_meanu(
|
||||
incremental_scraped: IncrementalScrapePage,
|
||||
element_trees: list[dict],
|
||||
llm_handler: LLMAPIHandler,
|
||||
step: Step | None = None,
|
||||
task: Task | None = None,
|
||||
) -> SkyvernElement | None:
|
||||
for idx, element_dict in enumerate(element_trees):
|
||||
# FIXME: confirm max to 10 nodes for now, preventing sendindg too many requests to LLM
|
||||
if idx >= 10:
|
||||
break
|
||||
|
||||
element_id = element_dict.get("id")
|
||||
if not element_id:
|
||||
LOG.info(
|
||||
"Skip the non-interactable element for the dropdown menu confirm",
|
||||
step_id=step.step_id if step else "none",
|
||||
task_id=task.task_id if task else "none",
|
||||
element=element_dict,
|
||||
)
|
||||
continue
|
||||
head_element = await SkyvernElement.create_from_incremental(incremental_scraped, element_id)
|
||||
screenshot = await head_element.get_locator().screenshot(
|
||||
timeout=SettingsManager.get_settings().BROWSER_SCREENSHOT_TIMEOUT_MS
|
||||
)
|
||||
dropdown_confirm_prompt = prompt_engine.load_prompt("opened-dropdown-confirm")
|
||||
LOG.info(
|
||||
"Confirm if it's an opened dropdown menu",
|
||||
step_id=step.step_id if step else "none",
|
||||
task_id=task.task_id if task else "none",
|
||||
element=element_dict,
|
||||
)
|
||||
json_response = await llm_handler(prompt=dropdown_confirm_prompt, screenshots=[screenshot], step=step)
|
||||
is_opened_dropdown_menu = json_response.get("is_opened_dropdown_menu")
|
||||
if is_opened_dropdown_menu:
|
||||
return await SkyvernElement.create_from_incremental(incre_page=incremental_scraped, element_id=element_id)
|
||||
return None
|
||||
|
||||
|
||||
async def scroll_down_to_load_all_options(
|
||||
dropdown_menu_element: SkyvernElement,
|
||||
page: Page,
|
||||
skyvern_frame: SkyvernFrame,
|
||||
incremental_scraped: IncrementalScrapePage,
|
||||
step: Step | None = None,
|
||||
task: Task | None = None,
|
||||
) -> None:
|
||||
LOG.info(
|
||||
"Scroll down the dropdown menu to load all options",
|
||||
step_id=step.step_id if step else "none",
|
||||
task_id=task.task_id if task else "none",
|
||||
)
|
||||
timeout = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
|
||||
|
||||
dropdown_menu_element_handle = await dropdown_menu_element.get_locator().element_handle(timeout=timeout)
|
||||
if dropdown_menu_element_handle is None:
|
||||
LOG.info("element handle is None, using focus to move the cursor", element_id=dropdown_menu_element.get_id())
|
||||
await dropdown_menu_element.get_locator().focus(timeout=timeout)
|
||||
else:
|
||||
await dropdown_menu_element_handle.scroll_into_view_if_needed(timeout=timeout)
|
||||
|
||||
await dropdown_menu_element.move_mouse_to(page=page)
|
||||
|
||||
scroll_pace = 0
|
||||
previous_num = await incremental_scraped.get_incremental_elements_num()
|
||||
|
||||
deadline = datetime.now(timezone.utc) + timedelta(
|
||||
milliseconds=SettingsManager.get_settings().OPTION_LOADING_TIMEOUT_MS
|
||||
)
|
||||
while datetime.now(timezone.utc) < deadline:
|
||||
# make sure we can scroll to the bottom
|
||||
scroll_interval = SettingsManager.get_settings().BROWSER_HEIGHT * 5
|
||||
if dropdown_menu_element_handle is None:
|
||||
LOG.info("element handle is None, using mouse to scroll down", element_id=dropdown_menu_element.get_id())
|
||||
await page.mouse.wheel(0, scroll_interval)
|
||||
scroll_pace += scroll_interval
|
||||
else:
|
||||
await skyvern_frame.scroll_to_element_bottom(dropdown_menu_element_handle)
|
||||
|
||||
# scoll a little back and scoll down to trigger the loading
|
||||
await page.mouse.wheel(0, -20)
|
||||
await page.mouse.wheel(0, 20)
|
||||
# wait for while to load new options
|
||||
await asyncio.sleep(5)
|
||||
|
||||
current_num = await incremental_scraped.get_incremental_elements_num()
|
||||
LOG.info(
|
||||
"Current incremental elements count during the scrolling",
|
||||
num=current_num,
|
||||
step_id=step.step_id if step else "none",
|
||||
task_id=task.task_id if task else "none",
|
||||
)
|
||||
if previous_num == current_num:
|
||||
break
|
||||
previous_num = current_num
|
||||
else:
|
||||
LOG.warning("Timeout to load all options, maybe some options will be missed")
|
||||
|
||||
# scoll back to the start point and wait for a while to make all options invisible on the page
|
||||
if dropdown_menu_element_handle is None:
|
||||
LOG.info("element handle is None, using mouse to scroll back", element_id=dropdown_menu_element.get_id())
|
||||
await page.mouse.wheel(0, -scroll_pace)
|
||||
else:
|
||||
await skyvern_frame.scroll_to_element_top(dropdown_menu_element_handle)
|
||||
await asyncio.sleep(5)
|
||||
|
||||
|
||||
async def normal_select(
|
||||
action: actions.SelectOptionAction,
|
||||
skyvern_element: SkyvernElement,
|
||||
|
||||
@@ -386,19 +386,8 @@ function isInteractable(element) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (
|
||||
tagName === "div" ||
|
||||
tagName === "img" ||
|
||||
tagName === "span" ||
|
||||
tagName === "a" ||
|
||||
tagName === "i"
|
||||
) {
|
||||
const computedStyle = window.getComputedStyle(element);
|
||||
const hasPointer = computedStyle.cursor === "pointer";
|
||||
return hasPointer;
|
||||
}
|
||||
|
||||
// support listbox and options underneath it
|
||||
// div element should be checked here before the css pointer
|
||||
if (
|
||||
(tagName === "ul" || tagName === "div") &&
|
||||
element.hasAttribute("role") &&
|
||||
@@ -414,9 +403,53 @@ function isInteractable(element) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (
|
||||
tagName === "div" &&
|
||||
element.hasAttribute("aria-disabled") &&
|
||||
element.getAttribute("aria-disabled").toLowerCase() === "false"
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (
|
||||
tagName === "div" ||
|
||||
tagName === "img" ||
|
||||
tagName === "span" ||
|
||||
tagName === "a" ||
|
||||
tagName === "i"
|
||||
) {
|
||||
const computedStyle = window.getComputedStyle(element);
|
||||
const hasPointer = computedStyle.cursor === "pointer";
|
||||
return hasPointer;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
function isScrollable(element) {
|
||||
const scrollHeight = element.scrollHeight || 0;
|
||||
const clientHeight = element.clientHeight || 0;
|
||||
const scrollWidth = element.scrollWidth || 0;
|
||||
const clientWidth = element.clientWidth || 0;
|
||||
|
||||
const hasScrollableContent =
|
||||
scrollHeight > clientHeight || scrollWidth > clientWidth;
|
||||
const hasScrollableOverflow = isScrollableOverflow(element);
|
||||
return hasScrollableContent && hasScrollableOverflow;
|
||||
}
|
||||
|
||||
function isScrollableOverflow(element) {
|
||||
const style = window.getComputedStyle(element);
|
||||
return (
|
||||
style.overflow === "auto" ||
|
||||
style.overflow === "scroll" ||
|
||||
style.overflowX === "auto" ||
|
||||
style.overflowX === "scroll" ||
|
||||
style.overflowY === "auto" ||
|
||||
style.overflowY === "scroll"
|
||||
);
|
||||
}
|
||||
|
||||
const isComboboxDropdown = (element) => {
|
||||
if (element.tagName.toLowerCase() !== "input") {
|
||||
return false;
|
||||
@@ -436,8 +469,8 @@ const isComboboxDropdown = (element) => {
|
||||
|
||||
const isSelect2Dropdown = (element) => {
|
||||
return (
|
||||
element.tagName.toLowerCase() === "span" &&
|
||||
element.className.toString().includes("select2-chosen")
|
||||
element.tagName.toLowerCase() === "a" &&
|
||||
element.className.toString().includes("select2-choice")
|
||||
);
|
||||
};
|
||||
|
||||
@@ -805,6 +838,14 @@ function uniqueId() {
|
||||
}
|
||||
|
||||
async function buildTreeFromBody(frame = "main.frame", open_select = false) {
|
||||
return buildElementTree(document.body, frame, open_select);
|
||||
}
|
||||
|
||||
async function buildElementTree(
|
||||
starter = document.body,
|
||||
frame = "main.frame",
|
||||
open_select = false,
|
||||
) {
|
||||
var elements = [];
|
||||
var resultArray = [];
|
||||
|
||||
@@ -863,6 +904,13 @@ async function buildTreeFromBody(frame = "main.frame", open_select = false) {
|
||||
// don't trim any attr of this element if keepAllAttr=True
|
||||
keepAllAttr:
|
||||
elementTagNameLower === "svg" || element.closest("svg") !== null,
|
||||
isSelectable:
|
||||
elementTagNameLower === "select" ||
|
||||
isReactSelectDropdown(element) ||
|
||||
isComboboxDropdown(element) ||
|
||||
isSelect2Dropdown(element) ||
|
||||
isSelect2MultiChoice(element),
|
||||
isScrollable: isScrollable(element),
|
||||
};
|
||||
|
||||
let isInShadowRoot = element.getRootNode() instanceof ShadowRoot;
|
||||
@@ -882,94 +930,8 @@ async function buildTreeFromBody(frame = "main.frame", open_select = false) {
|
||||
let selectedValue = "";
|
||||
if (elementTagNameLower === "select") {
|
||||
[selectOptions, selectedValue] = getSelectOptions(element);
|
||||
} else if (attrs["role"] && attrs["role"].toLowerCase() === "listbox") {
|
||||
// if "role" key is inside attrs, then get all the elements with role "option" and get their text
|
||||
selectOptions = getListboxOptions(element);
|
||||
} else if (open_select && isReactSelectDropdown(element)) {
|
||||
element.dispatchEvent(
|
||||
new MouseEvent("mouseup", {
|
||||
bubbles: true,
|
||||
view: window,
|
||||
}),
|
||||
);
|
||||
element.dispatchEvent(
|
||||
new MouseEvent("mousedown", {
|
||||
bubbles: true,
|
||||
view: window,
|
||||
}),
|
||||
);
|
||||
|
||||
selectOptions = await getReactSelectOptions(element);
|
||||
|
||||
// click again to close
|
||||
element.dispatchEvent(
|
||||
new MouseEvent("mouseup", {
|
||||
bubbles: true,
|
||||
view: window,
|
||||
}),
|
||||
);
|
||||
element.dispatchEvent(
|
||||
new MouseEvent("mousedown", {
|
||||
bubbles: true,
|
||||
view: window,
|
||||
}),
|
||||
);
|
||||
element.dispatchEvent(
|
||||
new KeyboardEvent("keydown", {
|
||||
keyCode: 27,
|
||||
bubbles: true,
|
||||
key: "Escape",
|
||||
}),
|
||||
);
|
||||
} else if (open_select && isComboboxDropdown(element)) {
|
||||
// open combobox dropdown to get options
|
||||
element.click();
|
||||
const listBox = element
|
||||
.getRootNode()
|
||||
.getElementById(element.getAttribute("aria-controls"));
|
||||
if (listBox) {
|
||||
selectOptions = getListboxOptions(listBox);
|
||||
}
|
||||
// HACK: press Tab to close the dropdown
|
||||
element.dispatchEvent(
|
||||
new KeyboardEvent("keydown", {
|
||||
keyCode: 9,
|
||||
bubbles: true,
|
||||
key: "Tab",
|
||||
}),
|
||||
);
|
||||
} else if (open_select && isSelect2Dropdown(element)) {
|
||||
// click element to show options
|
||||
element.dispatchEvent(
|
||||
new MouseEvent("mousedown", {
|
||||
bubbles: true,
|
||||
view: window,
|
||||
}),
|
||||
);
|
||||
|
||||
selectOptions = await getSelect2Options(element);
|
||||
|
||||
// HACK: click again to close the dropdown
|
||||
element.dispatchEvent(
|
||||
new MouseEvent("mousedown", {
|
||||
bubbles: true,
|
||||
view: window,
|
||||
}),
|
||||
);
|
||||
} else if (open_select && isSelect2MultiChoice(element)) {
|
||||
// click element to show options
|
||||
element.click();
|
||||
selectOptions = await getSelect2Options(element);
|
||||
|
||||
// HACK: press ESC to close the dropdown
|
||||
element.dispatchEvent(
|
||||
new KeyboardEvent("keydown", {
|
||||
keyCode: 27,
|
||||
bubbles: true,
|
||||
key: "Escape",
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
if (selectOptions) {
|
||||
elementObj.options = selectOptions;
|
||||
}
|
||||
@@ -1308,9 +1270,8 @@ async function buildTreeFromBody(frame = "main.frame", open_select = false) {
|
||||
return trimmedResults;
|
||||
};
|
||||
|
||||
// TODO: Handle iframes
|
||||
// setup before parsing the dom
|
||||
await processElement(document.body, null);
|
||||
await processElement(starter, null);
|
||||
|
||||
for (var element of elements) {
|
||||
if (
|
||||
@@ -1568,6 +1529,22 @@ async function scrollToNextPage(draw_boxes) {
|
||||
return window.scrollY;
|
||||
}
|
||||
|
||||
function scrollToElementBottom(element) {
|
||||
element.scroll({
|
||||
top: element.scrollHeight,
|
||||
left: 0,
|
||||
behavior: "instant",
|
||||
});
|
||||
}
|
||||
|
||||
function scrollToElementTop(element) {
|
||||
element.scroll({
|
||||
top: 0,
|
||||
left: 0,
|
||||
behavior: "instant",
|
||||
});
|
||||
}
|
||||
|
||||
async function sleep(ms) {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
@@ -1589,3 +1566,140 @@ function findNodeById(arr, targetId, path = []) {
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function getElementDomDepth(elementNode) {
|
||||
let depth = 0;
|
||||
const rootElement = elementNode.getRootNode().firstElementChild;
|
||||
while (elementNode !== rootElement && elementNode.parentElement) {
|
||||
depth++;
|
||||
elementNode = elementNode.parentElement;
|
||||
}
|
||||
return depth;
|
||||
}
|
||||
|
||||
if (window.globalOneTimeIncrementElements === undefined) {
|
||||
window.globalOneTimeIncrementElements = [];
|
||||
}
|
||||
|
||||
if (window.globalObserverForDOMIncrement === undefined) {
|
||||
window.globalObserverForDOMIncrement = new MutationObserver(function (
|
||||
mutationsList,
|
||||
observer,
|
||||
) {
|
||||
for (const mutation of mutationsList) {
|
||||
if (mutation.type === "attributes") {
|
||||
if (mutation.attributeName === "style") {
|
||||
// TODO: need to confirm that elemnent is hidden previously
|
||||
node = mutation.target;
|
||||
if (node.nodeType === Node.TEXT_NODE) continue;
|
||||
const newStyle = window.getComputedStyle(node);
|
||||
const newDisplay = newStyle.display;
|
||||
if (newDisplay !== "none") {
|
||||
window.globalOneTimeIncrementElements.push({
|
||||
targetNode: node,
|
||||
newNodes: [node],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: we maybe need to detect the visiblity change from class
|
||||
// if (mutation.attributeName === "class") {
|
||||
// }
|
||||
}
|
||||
|
||||
if (mutation.type === "childList") {
|
||||
let changedNode = {
|
||||
targetNode: mutation.target, // TODO: for future usage, when we want to parse new elements into a tree
|
||||
};
|
||||
let newNodes = [];
|
||||
if (mutation.addedNodes && mutation.addedNodes.length > 0) {
|
||||
for (const node of mutation.addedNodes) {
|
||||
// skip the text nodes, they won't be interactable
|
||||
if (node.nodeType === Node.TEXT_NODE) continue;
|
||||
newNodes.push(node);
|
||||
}
|
||||
}
|
||||
if (newNodes.length > 0) {
|
||||
changedNode.newNodes = newNodes;
|
||||
window.globalOneTimeIncrementElements.push(changedNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function startGlobalIncrementalObserver() {
|
||||
window.globalOneTimeIncrementElements = [];
|
||||
window.globalObserverForDOMIncrement.takeRecords(); // cleanup the older data
|
||||
window.globalObserverForDOMIncrement.observe(document.body, {
|
||||
attributes: true,
|
||||
attributeOldValue: true,
|
||||
childList: true,
|
||||
subtree: true,
|
||||
characterData: true,
|
||||
});
|
||||
}
|
||||
|
||||
function stopGlobalIncrementalObserver() {
|
||||
window.globalObserverForDOMIncrement.disconnect();
|
||||
window.globalObserverForDOMIncrement.takeRecords(); // cleanup the older data
|
||||
window.globalOneTimeIncrementElements = [];
|
||||
}
|
||||
|
||||
async function getIncrementElements(frame) {
|
||||
const domDepthMap = new Map();
|
||||
|
||||
for (const element of window.globalOneTimeIncrementElements) {
|
||||
// calculate the depth of targetNode element for sorting
|
||||
const depth = getElementDomDepth(element.targetNode);
|
||||
let newNodesTreeList = [];
|
||||
if (domDepthMap.has(depth)) {
|
||||
newNodesTreeList = domDepthMap.get(depth);
|
||||
}
|
||||
|
||||
for (const child of element.newNodes) {
|
||||
const [_, newNodeTree] = await buildElementTree(child, frame, false);
|
||||
if (newNodeTree.length > 0) {
|
||||
newNodesTreeList.push(...newNodeTree);
|
||||
}
|
||||
}
|
||||
domDepthMap.set(depth, newNodesTreeList);
|
||||
}
|
||||
|
||||
// cleanup the chidren tree, remove the duplicated element
|
||||
// search starting from the shallowest node:
|
||||
// 1. if deeper, the node could only be the children of the shallower one or no related one.
|
||||
// 2. if depth is same, the node could only be duplicated one or no related one.
|
||||
const idToElement = new Map();
|
||||
const cleanedTreeList = [];
|
||||
const sortedDepth = Array.from(domDepthMap.keys()).sort();
|
||||
for (let idx = 0; idx < sortedDepth.length; idx++) {
|
||||
const depth = sortedDepth[idx];
|
||||
const treeList = domDepthMap.get(depth);
|
||||
|
||||
for (const treeHeadElement of treeList) {
|
||||
// check if the element is existed
|
||||
if (idToElement.has(treeHeadElement.id)) {
|
||||
continue;
|
||||
}
|
||||
cleanedTreeList.push(treeHeadElement);
|
||||
|
||||
// flatten the tree
|
||||
let pendingElements = [treeHeadElement];
|
||||
let curIndex = 0;
|
||||
while (curIndex < pendingElements.length) {
|
||||
const curElement = pendingElements[curIndex];
|
||||
if (idToElement.has(curElement.id)) {
|
||||
curIndex++;
|
||||
continue;
|
||||
}
|
||||
|
||||
idToElement.set(curElement.id, curElement);
|
||||
pendingElements.push(...curElement.children);
|
||||
curIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return [Array.from(idToElement.values()), cleanedTreeList];
|
||||
}
|
||||
|
||||
@@ -96,6 +96,9 @@ def json_to_html(element: dict) -> str:
|
||||
attributes_html = " ".join(build_attribute(key, value) for key, value in attributes.items())
|
||||
|
||||
tag = element["tagName"]
|
||||
if element.get("isSelectable", False):
|
||||
tag = "select"
|
||||
|
||||
text = element.get("text", "")
|
||||
# build children HTML
|
||||
children_html = "".join(json_to_html(child) for child in element.get("children", []))
|
||||
@@ -112,6 +115,21 @@ def json_to_html(element: dict) -> str:
|
||||
return f'<{tag}{attributes_html if not attributes_html else " "+attributes_html}>{text}{children_html+option_html}</{tag}>'
|
||||
|
||||
|
||||
def build_element_dict(elements: list[dict]) -> tuple[dict[str, str], dict[str, dict], dict[str, str]]:
|
||||
id_to_css_dict: dict[str, str] = {}
|
||||
id_to_element_dict: dict[str, dict] = {}
|
||||
id_to_frame_dict: dict[str, str] = {}
|
||||
|
||||
for element in elements:
|
||||
element_id: str = element.get("id", "")
|
||||
# get_interactable_element_tree marks each interactable element with a unique_id attribute
|
||||
id_to_css_dict[element_id] = f"[{SKYVERN_ID_ATTR}='{element_id}']"
|
||||
id_to_element_dict[element_id] = element
|
||||
id_to_frame_dict[element_id] = element["frame"]
|
||||
|
||||
return id_to_css_dict, id_to_element_dict, id_to_frame_dict
|
||||
|
||||
|
||||
class ElementTreeFormat(StrEnum):
|
||||
JSON = "json"
|
||||
HTML = "html"
|
||||
@@ -266,16 +284,7 @@ async def scrape_web_unsafe(
|
||||
elements, element_tree = await get_interactable_element_tree(page, scrape_exclude)
|
||||
element_tree = await cleanup_element_tree(url, copy.deepcopy(element_tree))
|
||||
|
||||
id_to_css_dict = {}
|
||||
id_to_element_dict = {}
|
||||
id_to_frame_dict = {}
|
||||
|
||||
for element in elements:
|
||||
element_id = element["id"]
|
||||
# get_interactable_element_tree marks each interactable element with a unique_id attribute
|
||||
id_to_css_dict[element_id] = f"[{SKYVERN_ID_ATTR}='{element_id}']"
|
||||
id_to_element_dict[element_id] = element
|
||||
id_to_frame_dict[element_id] = element["frame"]
|
||||
id_to_css_dict, id_to_element_dict, id_to_frame_dict = build_element_dict(elements)
|
||||
|
||||
text_content = await get_frame_text(page.main_frame)
|
||||
|
||||
@@ -378,6 +387,65 @@ async def get_interactable_element_tree(
|
||||
return elements, element_tree
|
||||
|
||||
|
||||
class IncrementalScrapePage:
|
||||
id_to_element_dict: dict[str, dict] = {}
|
||||
id_to_css_dict: dict[str, str]
|
||||
elements: list[dict]
|
||||
element_tree: list[dict]
|
||||
element_tree_trimmed: list[dict]
|
||||
|
||||
def __init__(self, skyvern_frame: SkyvernFrame) -> None:
|
||||
self.skyvern_frame = skyvern_frame
|
||||
|
||||
async def get_incremental_element_tree(
|
||||
self,
|
||||
cleanup_element_tree: Callable[[str, list[dict]], Awaitable[list[dict]]],
|
||||
) -> list[dict]:
|
||||
frame = self.skyvern_frame.get_frame()
|
||||
|
||||
frame_id = "main.frame"
|
||||
if isinstance(frame, Frame):
|
||||
try:
|
||||
frame_element = await frame.frame_element()
|
||||
frame_id = await frame_element.get_attribute("unique_id")
|
||||
except Exception:
|
||||
# TODO: do we really care about the frame_id ?
|
||||
LOG.warning(
|
||||
"Unable to get frame_element",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
js_script = f"async () => await getIncrementElements('{frame_id}')"
|
||||
incremental_elements, incremental_tree = await frame.evaluate(js_script)
|
||||
# we listen the incremental elements seperated by frames, so all elements will be in the same SkyvernFrame
|
||||
self.id_to_css_dict, self.id_to_element_dict, _ = build_element_dict(incremental_elements)
|
||||
|
||||
self.elements = incremental_elements
|
||||
|
||||
incremental_tree = await cleanup_element_tree(frame.url, copy.deepcopy(incremental_tree))
|
||||
trimmed_element_tree = trim_element_tree(copy.deepcopy(incremental_tree))
|
||||
|
||||
self.element_tree = incremental_tree
|
||||
self.element_tree_trimmed = trimmed_element_tree
|
||||
|
||||
return self.element_tree_trimmed
|
||||
|
||||
async def start_listen_dom_increment(self) -> None:
|
||||
js_script = "() => startGlobalIncrementalObserver()"
|
||||
await self.skyvern_frame.get_frame().evaluate(js_script)
|
||||
|
||||
async def stop_listen_dom_increment(self) -> None:
|
||||
js_script = "() => stopGlobalIncrementalObserver()"
|
||||
await self.skyvern_frame.get_frame().evaluate(js_script)
|
||||
|
||||
async def get_incremental_elements_num(self) -> int:
|
||||
js_script = "() => window.globalOneTimeIncrementElements.length"
|
||||
return await self.skyvern_frame.get_frame().evaluate(js_script)
|
||||
|
||||
def build_html_tree(self) -> str:
|
||||
return "".join([json_to_html(element) for element in self.element_tree_trimmed])
|
||||
|
||||
|
||||
def trim_element_tree(elements: list[dict]) -> list[dict]:
|
||||
queue = []
|
||||
for element in elements:
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import typing
|
||||
from abc import ABC, abstractmethod
|
||||
from enum import StrEnum
|
||||
from random import uniform
|
||||
|
||||
import structlog
|
||||
from playwright.async_api import Frame, FrameLocator, Locator, Page
|
||||
@@ -21,11 +23,12 @@ from skyvern.exceptions import (
|
||||
MultipleDropdownAnchorErr,
|
||||
MultipleElementsFound,
|
||||
NoDropdownAnchorErr,
|
||||
NoElementBoudingBox,
|
||||
NoneFrameError,
|
||||
SkyvernException,
|
||||
)
|
||||
from skyvern.forge.sdk.settings_manager import SettingsManager
|
||||
from skyvern.webeye.scraper.scraper import ScrapedPage
|
||||
from skyvern.webeye.scraper.scraper import IncrementalScrapePage, ScrapedPage
|
||||
from skyvern.webeye.utils.page import SkyvernFrame
|
||||
|
||||
LOG = structlog.get_logger()
|
||||
@@ -94,6 +97,35 @@ class SkyvernElement:
|
||||
When you try to interact with these elements by python, you are supposed to use this class as an interface.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
async def create_from_incremental(cls, incre_page: IncrementalScrapePage, element_id: str) -> SkyvernElement:
|
||||
element_dict = incre_page.id_to_element_dict.get(element_id)
|
||||
if element_dict is None:
|
||||
raise MissingElementDict(element_id)
|
||||
|
||||
css_selector = incre_page.id_to_css_dict.get(element_id)
|
||||
if not css_selector:
|
||||
raise MissingElementInCSSMap(element_id)
|
||||
|
||||
frame = incre_page.skyvern_frame.get_frame()
|
||||
locator = frame.locator(css_selector)
|
||||
|
||||
num_elements = await locator.count()
|
||||
if num_elements < 1:
|
||||
LOG.warning("No elements found with css. Validation failed.", css=css_selector, element_id=element_id)
|
||||
raise MissingElement(selector=css_selector, element_id=element_id)
|
||||
|
||||
elif num_elements > 1:
|
||||
LOG.warning(
|
||||
"Multiple elements found with css. Expected 1. Validation failed.",
|
||||
num_elements=num_elements,
|
||||
selector=css_selector,
|
||||
element_id=element_id,
|
||||
)
|
||||
raise MultipleElementsFound(num=num_elements, selector=css_selector, element_id=element_id)
|
||||
|
||||
return cls(locator, frame, element_dict)
|
||||
|
||||
def __init__(self, locator: Locator, frame: Page | Frame, static_element: dict) -> None:
|
||||
self.__static_element = static_element
|
||||
self.__frame = frame
|
||||
@@ -147,12 +179,13 @@ class SkyvernElement:
|
||||
return self.__static_element.get("interactable", False)
|
||||
|
||||
async def is_selectable(self) -> bool:
|
||||
return (
|
||||
await self.is_select2_dropdown()
|
||||
or await self.is_react_select_dropdown()
|
||||
or await self.is_combobox_dropdown()
|
||||
or self.get_tag_name() in SELECTABLE_ELEMENT
|
||||
)
|
||||
return self.get_selectable() or self.get_tag_name() in SELECTABLE_ELEMENT
|
||||
|
||||
def get_scrollable(self) -> bool:
|
||||
return self.__static_element.get("isScrollable", False)
|
||||
|
||||
def get_selectable(self) -> bool:
|
||||
return self.__static_element.get("isSelectable", False)
|
||||
|
||||
def get_tag_name(self) -> str:
|
||||
return self.__static_element.get("tagName", "")
|
||||
@@ -294,6 +327,36 @@ class SkyvernElement:
|
||||
async def input_clear(self, timeout: float = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS) -> None:
|
||||
await self.get_locator().clear(timeout=timeout)
|
||||
|
||||
async def move_mouse_to(
|
||||
self, page: Page, timeout: float = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
|
||||
) -> tuple[float, float]:
|
||||
bounding_box = await self.get_locator().bounding_box(timeout=timeout)
|
||||
if not bounding_box:
|
||||
raise NoElementBoudingBox(element_id=self.get_id())
|
||||
x, y, width, height = bounding_box["x"], bounding_box["y"], bounding_box["width"], bounding_box["height"]
|
||||
|
||||
# calculate the click point, use open interval to avoid clicking on the border
|
||||
epsilon = 0.01
|
||||
dest_x = uniform(x + epsilon, x + width - epsilon) if width > 2 * epsilon else (x + width) / 2
|
||||
dest_y = uniform(y + epsilon, y + height - epsilon) if height > 2 * epsilon else (y + height) / 2
|
||||
await page.mouse.move(dest_x, dest_y)
|
||||
|
||||
return dest_x, dest_y
|
||||
|
||||
async def coordinate_click(
|
||||
self, page: Page, timeout: float = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
|
||||
) -> None:
|
||||
click_x, click_y = await self.move_mouse_to(page=page, timeout=timeout)
|
||||
await page.mouse.click(click_x, click_y)
|
||||
|
||||
async def scroll_into_view(self, timeout: float = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS) -> None:
|
||||
element_handler = await self.get_locator().element_handle()
|
||||
if element_handler is None:
|
||||
LOG.warning("element handler is None. ", element_id=self.get_id())
|
||||
return
|
||||
await element_handler.scroll_into_view_if_needed(timeout=timeout)
|
||||
await asyncio.sleep(2) # wait for scrolling into the target
|
||||
|
||||
|
||||
class DomUtil:
|
||||
"""
|
||||
|
||||
@@ -145,6 +145,14 @@ class SkyvernFrame:
|
||||
async with asyncio.timeout(timeout):
|
||||
return await self.frame.content()
|
||||
|
||||
async def scroll_to_element_bottom(self, element: ElementHandle) -> None:
|
||||
js_script = "(element) => scrollToElementBottom(element)"
|
||||
return await self.frame.evaluate(js_script, element)
|
||||
|
||||
async def scroll_to_element_top(self, element: ElementHandle) -> None:
|
||||
js_script = "(element) => scrollToElementTop(element)"
|
||||
return await self.frame.evaluate(js_script, element)
|
||||
|
||||
async def get_select2_options(self, element: ElementHandle) -> List[Dict[str, Any]]:
|
||||
await self.frame.evaluate(JS_FUNCTION_DEFS)
|
||||
js_script = "async (element) => await getSelect2Options(element)"
|
||||
|
||||
Reference in New Issue
Block a user