general selection (#675)

This commit is contained in:
LawyZheng
2024-08-06 13:30:52 +08:00
committed by GitHub
parent 845ae8d3e4
commit cba0f68a5e
10 changed files with 655 additions and 241 deletions

View File

@@ -3,7 +3,7 @@ import json
import os
import urllib.parse
import uuid
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from typing import Any, Awaitable, Callable, List
import structlog
@@ -20,12 +20,13 @@ from skyvern.exceptions import (
FailToSelectByLabel,
FailToSelectByValue,
ImaginaryFileUrl,
InputActionOnSelect2Dropdown,
InvalidElementForTextInput,
MissingElement,
MissingFileUrl,
MultipleElementsFound,
NoSelectableElementFound,
NoElementMatchedForTargetOption,
NoIncrementalElementFoundForCustomSelection,
NoLabelOrValueForCustomSelection,
OptionIndexOutOfBound,
WrongElementToUploadFile,
)
@@ -36,6 +37,7 @@ from skyvern.forge.sdk.api.files import (
get_number_of_files_in_directory,
get_path_for_workflow_download_directory,
)
from skyvern.forge.sdk.api.llm.api_handler_factory import LLMAPIHandler
from skyvern.forge.sdk.core.aiohttp_helper import aiohttp_post
from skyvern.forge.sdk.core.security import generate_skyvern_signature
from skyvern.forge.sdk.db.enums import OrganizationAuthTokenType
@@ -56,8 +58,8 @@ from skyvern.webeye.actions.actions import (
)
from skyvern.webeye.actions.responses import ActionFailure, ActionResult, ActionSuccess
from skyvern.webeye.browser_factory import BrowserState, get_download_dir
from skyvern.webeye.scraper.scraper import ElementTreeFormat, ScrapedPage
from skyvern.webeye.utils.dom import AbstractSelectDropdown, DomUtil, SkyvernElement
from skyvern.webeye.scraper.scraper import ElementTreeFormat, IncrementalScrapePage, ScrapedPage
from skyvern.webeye.utils.dom import DomUtil, InteractiveElement, SkyvernElement
from skyvern.webeye.utils.page import SkyvernFrame
LOG = structlog.get_logger()
@@ -286,8 +288,6 @@ async def handle_input_text_action(
) -> list[ActionResult]:
dom = DomUtil(scraped_page, page)
skyvern_element = await dom.get_skyvern_element_by_id(action.element_id)
if await skyvern_element.is_select2_dropdown():
return [ActionFailure(InputActionOnSelect2Dropdown(element_id=action.element_id))]
current_text = await get_input_value(skyvern_element.get_tag_name(), skyvern_element.get_locator())
if current_text == action.text:
@@ -469,122 +469,21 @@ async def handle_select_option_action(
)
return [ActionFailure(ErrFoundSelectableElement(action.element_id, e))]
if selectable_child is None:
LOG.error(
"No selectable element found in chidren",
tag_name=tag_name,
action=action,
)
return [ActionFailure(NoSelectableElementFound(action.element_id))]
LOG.info(
"Found selectable element in the children",
tag_name=selectable_child.get_tag_name(),
element_id=selectable_child.get_id(),
)
select_action = SelectOptionAction(element_id=selectable_child.get_id(), option=action.option)
return await handle_select_option_action(select_action, page, scraped_page, task, step)
select_framework: AbstractSelectDropdown | None = None
if await skyvern_element.is_combobox_dropdown():
LOG.info(
"This is a combobox dropdown",
action=action,
)
select_framework = await skyvern_element.get_combobox_dropdown()
if await skyvern_element.is_select2_dropdown():
LOG.info(
"This is a select2 dropdown",
action=action,
)
select_framework = await skyvern_element.get_select2_dropdown()
if await skyvern_element.is_react_select_dropdown():
LOG.info(
"This is a react select dropdown",
action=action,
)
select_framework = await skyvern_element.get_react_select_dropdown()
if select_framework is not None:
timeout = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
try:
current_value = await select_framework.get_current_value()
if current_value == action.option.label or current_value == action.option.value:
return [ActionSuccess()]
except Exception:
if selectable_child:
LOG.info(
"failed to confirm if the select option has been done, force to take the action again.",
exc_info=True,
"Found selectable element in the children",
tag_name=selectable_child.get_tag_name(),
element_id=selectable_child.get_id(),
)
select_action = SelectOptionAction(element_id=selectable_child.get_id(), option=action.option)
return await handle_select_option_action(select_action, page, scraped_page, task, step)
await select_framework.open()
options = await select_framework.get_options()
result: List[ActionResult] = []
# select by label first, then by index
if action.option.label is not None or action.option.value is not None:
try:
for option in options:
option_content = option.get("text")
option_index = option.get("optionIndex", None)
if option_index is None:
LOG.warning(
f"{select_framework.name()} option index is None",
option=option,
)
continue
if action.option.label == option_content or action.option.value == option_content:
await select_framework.select_by_index(index=option_index, timeout=timeout)
result.append(ActionSuccess())
return result
LOG.info(
f"no target {select_framework.name()} option matched by label, try to select by index",
action=action,
)
except Exception as e:
result.append(ActionFailure(e))
LOG.info(
f"failed to select by label in {select_framework.name()}, try to select by index",
exc_info=True,
action=action,
)
if action.option.index is not None:
if action.option.index >= len(options):
result.append(ActionFailure(OptionIndexOutOfBound(action.element_id)))
else:
try:
option_content = options[action.option.index].get("text")
if option_content != action.option.label:
LOG.warning(
"Select option label is not consistant to the action value. Might select wrong option.",
option_content=option_content,
action=action,
)
await select_framework.select_by_index(index=action.option.index, timeout=timeout)
result.append(ActionSuccess())
return result
except Exception:
result.append(ActionFailure(FailToSelectByIndex(action.element_id)))
LOG.info(
f"failed to select by index in {select_framework.name()}",
exc_info=True,
action=action,
)
if len(result) == 0:
result.append(ActionFailure(EmptySelect(action.element_id)))
if isinstance(result[-1], ActionFailure):
LOG.info(
f"Failed to select a {select_framework.name()} option, close the dropdown",
action=action,
)
await select_framework.close()
return result
if tag_name == InteractiveElement.SELECT:
LOG.info(
"SelectOptionAction is on <select>",
action=action,
)
return await normal_select(action=action, skyvern_element=skyvern_element)
if await skyvern_element.is_checkbox():
LOG.info(
@@ -602,7 +501,99 @@ async def handle_select_option_action(
click_action = ClickAction(element_id=action.element_id)
return await chain_click(task, scraped_page, page, click_action, skyvern_element)
return await normal_select(action=action, skyvern_element=skyvern_element)
LOG.info(
"Trigger custom select",
action=action,
)
timeout = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
skyvern_frame = await SkyvernFrame.create_instance(skyvern_element.get_frame())
incremental_scraped = IncrementalScrapePage(skyvern_frame=skyvern_frame)
llm_handler = app.SECONDARY_LLM_API_HANDLER
is_open = False
try:
await incremental_scraped.start_listen_dom_increment()
await skyvern_element.get_locator().focus(timeout=timeout)
if tag_name == InteractiveElement.INPUT:
await skyvern_element.get_locator().press("ArrowDown", timeout=timeout)
else:
await skyvern_element.get_locator().click(timeout=timeout)
# wait 5s for options to load
await asyncio.sleep(5)
is_open = True
incremental_element = await incremental_scraped.get_incremental_element_tree(
app.AGENT_FUNCTION.cleanup_element_tree
)
if len(incremental_element) == 0:
raise NoIncrementalElementFoundForCustomSelection(element_id=action.element_id)
dropdown_menu_element = await locate_dropdown_meanu(
incremental_scraped=incremental_scraped,
element_trees=incremental_element,
llm_handler=llm_handler,
step=step,
task=task,
)
if dropdown_menu_element and dropdown_menu_element.get_scrollable():
await scroll_down_to_load_all_options(
dropdown_menu_element=dropdown_menu_element,
skyvern_frame=skyvern_frame,
page=page,
incremental_scraped=incremental_scraped,
step=step,
task=task,
)
await incremental_scraped.get_incremental_element_tree(app.AGENT_FUNCTION.cleanup_element_tree)
# TODO: maybe take a screenshot for every tree head element to figure out which is the dropdown menu
html = incremental_scraped.build_html_tree()
target_value = action.option.label or action.option.value
if target_value is None:
raise NoLabelOrValueForCustomSelection(element_id=action.element_id)
prompt = prompt_engine.load_prompt(
"custom-select", context_reasoning=action.reasoning, target_value=target_value, elements=html
)
LOG.info(
"Calling LLM to find the match element",
target_value=target_value,
step_id=step.step_id,
task_id=task.task_id,
)
json_response = await llm_handler(prompt=prompt, step=step)
LOG.info(
"LLM response for the matched element",
target_value=target_value,
response=json_response,
step_id=step.step_id,
task_id=task.task_id,
)
element_id: str | None = json_response.get("id", None)
if not element_id:
raise NoElementMatchedForTargetOption(target=target_value, reason=json_response.get("reasoning"))
selected_element = await SkyvernElement.create_from_incremental(incremental_scraped, element_id)
await selected_element.scroll_into_view()
await selected_element.get_locator().click(timeout=timeout)
return [ActionSuccess()]
except Exception as e:
if is_open:
await skyvern_element.scroll_into_view()
await skyvern_element.coordinate_click(page=page)
await skyvern_element.get_locator().press("Escape", timeout=timeout)
LOG.exception("custome select error")
return [ActionFailure(exception=e)]
finally:
await incremental_scraped.stop_listen_dom_increment()
async def handle_checkbox_action(
@@ -836,6 +827,113 @@ async def chain_click(
return [ActionFailure(WrongElementToUploadFile(action.element_id))]
async def locate_dropdown_meanu(
incremental_scraped: IncrementalScrapePage,
element_trees: list[dict],
llm_handler: LLMAPIHandler,
step: Step | None = None,
task: Task | None = None,
) -> SkyvernElement | None:
for idx, element_dict in enumerate(element_trees):
# FIXME: confirm max to 10 nodes for now, preventing sendindg too many requests to LLM
if idx >= 10:
break
element_id = element_dict.get("id")
if not element_id:
LOG.info(
"Skip the non-interactable element for the dropdown menu confirm",
step_id=step.step_id if step else "none",
task_id=task.task_id if task else "none",
element=element_dict,
)
continue
head_element = await SkyvernElement.create_from_incremental(incremental_scraped, element_id)
screenshot = await head_element.get_locator().screenshot(
timeout=SettingsManager.get_settings().BROWSER_SCREENSHOT_TIMEOUT_MS
)
dropdown_confirm_prompt = prompt_engine.load_prompt("opened-dropdown-confirm")
LOG.info(
"Confirm if it's an opened dropdown menu",
step_id=step.step_id if step else "none",
task_id=task.task_id if task else "none",
element=element_dict,
)
json_response = await llm_handler(prompt=dropdown_confirm_prompt, screenshots=[screenshot], step=step)
is_opened_dropdown_menu = json_response.get("is_opened_dropdown_menu")
if is_opened_dropdown_menu:
return await SkyvernElement.create_from_incremental(incre_page=incremental_scraped, element_id=element_id)
return None
async def scroll_down_to_load_all_options(
dropdown_menu_element: SkyvernElement,
page: Page,
skyvern_frame: SkyvernFrame,
incremental_scraped: IncrementalScrapePage,
step: Step | None = None,
task: Task | None = None,
) -> None:
LOG.info(
"Scroll down the dropdown menu to load all options",
step_id=step.step_id if step else "none",
task_id=task.task_id if task else "none",
)
timeout = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
dropdown_menu_element_handle = await dropdown_menu_element.get_locator().element_handle(timeout=timeout)
if dropdown_menu_element_handle is None:
LOG.info("element handle is None, using focus to move the cursor", element_id=dropdown_menu_element.get_id())
await dropdown_menu_element.get_locator().focus(timeout=timeout)
else:
await dropdown_menu_element_handle.scroll_into_view_if_needed(timeout=timeout)
await dropdown_menu_element.move_mouse_to(page=page)
scroll_pace = 0
previous_num = await incremental_scraped.get_incremental_elements_num()
deadline = datetime.now(timezone.utc) + timedelta(
milliseconds=SettingsManager.get_settings().OPTION_LOADING_TIMEOUT_MS
)
while datetime.now(timezone.utc) < deadline:
# make sure we can scroll to the bottom
scroll_interval = SettingsManager.get_settings().BROWSER_HEIGHT * 5
if dropdown_menu_element_handle is None:
LOG.info("element handle is None, using mouse to scroll down", element_id=dropdown_menu_element.get_id())
await page.mouse.wheel(0, scroll_interval)
scroll_pace += scroll_interval
else:
await skyvern_frame.scroll_to_element_bottom(dropdown_menu_element_handle)
# scoll a little back and scoll down to trigger the loading
await page.mouse.wheel(0, -20)
await page.mouse.wheel(0, 20)
# wait for while to load new options
await asyncio.sleep(5)
current_num = await incremental_scraped.get_incremental_elements_num()
LOG.info(
"Current incremental elements count during the scrolling",
num=current_num,
step_id=step.step_id if step else "none",
task_id=task.task_id if task else "none",
)
if previous_num == current_num:
break
previous_num = current_num
else:
LOG.warning("Timeout to load all options, maybe some options will be missed")
# scoll back to the start point and wait for a while to make all options invisible on the page
if dropdown_menu_element_handle is None:
LOG.info("element handle is None, using mouse to scroll back", element_id=dropdown_menu_element.get_id())
await page.mouse.wheel(0, -scroll_pace)
else:
await skyvern_frame.scroll_to_element_top(dropdown_menu_element_handle)
await asyncio.sleep(5)
async def normal_select(
action: actions.SelectOptionAction,
skyvern_element: SkyvernElement,

View File

@@ -386,19 +386,8 @@ function isInteractable(element) {
return true;
}
if (
tagName === "div" ||
tagName === "img" ||
tagName === "span" ||
tagName === "a" ||
tagName === "i"
) {
const computedStyle = window.getComputedStyle(element);
const hasPointer = computedStyle.cursor === "pointer";
return hasPointer;
}
// support listbox and options underneath it
// div element should be checked here before the css pointer
if (
(tagName === "ul" || tagName === "div") &&
element.hasAttribute("role") &&
@@ -414,9 +403,53 @@ function isInteractable(element) {
return true;
}
if (
tagName === "div" &&
element.hasAttribute("aria-disabled") &&
element.getAttribute("aria-disabled").toLowerCase() === "false"
) {
return true;
}
if (
tagName === "div" ||
tagName === "img" ||
tagName === "span" ||
tagName === "a" ||
tagName === "i"
) {
const computedStyle = window.getComputedStyle(element);
const hasPointer = computedStyle.cursor === "pointer";
return hasPointer;
}
return false;
}
function isScrollable(element) {
const scrollHeight = element.scrollHeight || 0;
const clientHeight = element.clientHeight || 0;
const scrollWidth = element.scrollWidth || 0;
const clientWidth = element.clientWidth || 0;
const hasScrollableContent =
scrollHeight > clientHeight || scrollWidth > clientWidth;
const hasScrollableOverflow = isScrollableOverflow(element);
return hasScrollableContent && hasScrollableOverflow;
}
function isScrollableOverflow(element) {
const style = window.getComputedStyle(element);
return (
style.overflow === "auto" ||
style.overflow === "scroll" ||
style.overflowX === "auto" ||
style.overflowX === "scroll" ||
style.overflowY === "auto" ||
style.overflowY === "scroll"
);
}
const isComboboxDropdown = (element) => {
if (element.tagName.toLowerCase() !== "input") {
return false;
@@ -436,8 +469,8 @@ const isComboboxDropdown = (element) => {
const isSelect2Dropdown = (element) => {
return (
element.tagName.toLowerCase() === "span" &&
element.className.toString().includes("select2-chosen")
element.tagName.toLowerCase() === "a" &&
element.className.toString().includes("select2-choice")
);
};
@@ -805,6 +838,14 @@ function uniqueId() {
}
async function buildTreeFromBody(frame = "main.frame", open_select = false) {
return buildElementTree(document.body, frame, open_select);
}
async function buildElementTree(
starter = document.body,
frame = "main.frame",
open_select = false,
) {
var elements = [];
var resultArray = [];
@@ -863,6 +904,13 @@ async function buildTreeFromBody(frame = "main.frame", open_select = false) {
// don't trim any attr of this element if keepAllAttr=True
keepAllAttr:
elementTagNameLower === "svg" || element.closest("svg") !== null,
isSelectable:
elementTagNameLower === "select" ||
isReactSelectDropdown(element) ||
isComboboxDropdown(element) ||
isSelect2Dropdown(element) ||
isSelect2MultiChoice(element),
isScrollable: isScrollable(element),
};
let isInShadowRoot = element.getRootNode() instanceof ShadowRoot;
@@ -882,94 +930,8 @@ async function buildTreeFromBody(frame = "main.frame", open_select = false) {
let selectedValue = "";
if (elementTagNameLower === "select") {
[selectOptions, selectedValue] = getSelectOptions(element);
} else if (attrs["role"] && attrs["role"].toLowerCase() === "listbox") {
// if "role" key is inside attrs, then get all the elements with role "option" and get their text
selectOptions = getListboxOptions(element);
} else if (open_select && isReactSelectDropdown(element)) {
element.dispatchEvent(
new MouseEvent("mouseup", {
bubbles: true,
view: window,
}),
);
element.dispatchEvent(
new MouseEvent("mousedown", {
bubbles: true,
view: window,
}),
);
selectOptions = await getReactSelectOptions(element);
// click again to close
element.dispatchEvent(
new MouseEvent("mouseup", {
bubbles: true,
view: window,
}),
);
element.dispatchEvent(
new MouseEvent("mousedown", {
bubbles: true,
view: window,
}),
);
element.dispatchEvent(
new KeyboardEvent("keydown", {
keyCode: 27,
bubbles: true,
key: "Escape",
}),
);
} else if (open_select && isComboboxDropdown(element)) {
// open combobox dropdown to get options
element.click();
const listBox = element
.getRootNode()
.getElementById(element.getAttribute("aria-controls"));
if (listBox) {
selectOptions = getListboxOptions(listBox);
}
// HACK: press Tab to close the dropdown
element.dispatchEvent(
new KeyboardEvent("keydown", {
keyCode: 9,
bubbles: true,
key: "Tab",
}),
);
} else if (open_select && isSelect2Dropdown(element)) {
// click element to show options
element.dispatchEvent(
new MouseEvent("mousedown", {
bubbles: true,
view: window,
}),
);
selectOptions = await getSelect2Options(element);
// HACK: click again to close the dropdown
element.dispatchEvent(
new MouseEvent("mousedown", {
bubbles: true,
view: window,
}),
);
} else if (open_select && isSelect2MultiChoice(element)) {
// click element to show options
element.click();
selectOptions = await getSelect2Options(element);
// HACK: press ESC to close the dropdown
element.dispatchEvent(
new KeyboardEvent("keydown", {
keyCode: 27,
bubbles: true,
key: "Escape",
}),
);
}
if (selectOptions) {
elementObj.options = selectOptions;
}
@@ -1308,9 +1270,8 @@ async function buildTreeFromBody(frame = "main.frame", open_select = false) {
return trimmedResults;
};
// TODO: Handle iframes
// setup before parsing the dom
await processElement(document.body, null);
await processElement(starter, null);
for (var element of elements) {
if (
@@ -1568,6 +1529,22 @@ async function scrollToNextPage(draw_boxes) {
return window.scrollY;
}
function scrollToElementBottom(element) {
element.scroll({
top: element.scrollHeight,
left: 0,
behavior: "instant",
});
}
function scrollToElementTop(element) {
element.scroll({
top: 0,
left: 0,
behavior: "instant",
});
}
async function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
@@ -1589,3 +1566,140 @@ function findNodeById(arr, targetId, path = []) {
}
return null;
}
function getElementDomDepth(elementNode) {
let depth = 0;
const rootElement = elementNode.getRootNode().firstElementChild;
while (elementNode !== rootElement && elementNode.parentElement) {
depth++;
elementNode = elementNode.parentElement;
}
return depth;
}
if (window.globalOneTimeIncrementElements === undefined) {
window.globalOneTimeIncrementElements = [];
}
if (window.globalObserverForDOMIncrement === undefined) {
window.globalObserverForDOMIncrement = new MutationObserver(function (
mutationsList,
observer,
) {
for (const mutation of mutationsList) {
if (mutation.type === "attributes") {
if (mutation.attributeName === "style") {
// TODO: need to confirm that elemnent is hidden previously
node = mutation.target;
if (node.nodeType === Node.TEXT_NODE) continue;
const newStyle = window.getComputedStyle(node);
const newDisplay = newStyle.display;
if (newDisplay !== "none") {
window.globalOneTimeIncrementElements.push({
targetNode: node,
newNodes: [node],
});
}
}
// TODO: we maybe need to detect the visiblity change from class
// if (mutation.attributeName === "class") {
// }
}
if (mutation.type === "childList") {
let changedNode = {
targetNode: mutation.target, // TODO: for future usage, when we want to parse new elements into a tree
};
let newNodes = [];
if (mutation.addedNodes && mutation.addedNodes.length > 0) {
for (const node of mutation.addedNodes) {
// skip the text nodes, they won't be interactable
if (node.nodeType === Node.TEXT_NODE) continue;
newNodes.push(node);
}
}
if (newNodes.length > 0) {
changedNode.newNodes = newNodes;
window.globalOneTimeIncrementElements.push(changedNode);
}
}
}
});
}
function startGlobalIncrementalObserver() {
window.globalOneTimeIncrementElements = [];
window.globalObserverForDOMIncrement.takeRecords(); // cleanup the older data
window.globalObserverForDOMIncrement.observe(document.body, {
attributes: true,
attributeOldValue: true,
childList: true,
subtree: true,
characterData: true,
});
}
function stopGlobalIncrementalObserver() {
window.globalObserverForDOMIncrement.disconnect();
window.globalObserverForDOMIncrement.takeRecords(); // cleanup the older data
window.globalOneTimeIncrementElements = [];
}
async function getIncrementElements(frame) {
const domDepthMap = new Map();
for (const element of window.globalOneTimeIncrementElements) {
// calculate the depth of targetNode element for sorting
const depth = getElementDomDepth(element.targetNode);
let newNodesTreeList = [];
if (domDepthMap.has(depth)) {
newNodesTreeList = domDepthMap.get(depth);
}
for (const child of element.newNodes) {
const [_, newNodeTree] = await buildElementTree(child, frame, false);
if (newNodeTree.length > 0) {
newNodesTreeList.push(...newNodeTree);
}
}
domDepthMap.set(depth, newNodesTreeList);
}
// cleanup the chidren tree, remove the duplicated element
// search starting from the shallowest node:
// 1. if deeper, the node could only be the children of the shallower one or no related one.
// 2. if depth is same, the node could only be duplicated one or no related one.
const idToElement = new Map();
const cleanedTreeList = [];
const sortedDepth = Array.from(domDepthMap.keys()).sort();
for (let idx = 0; idx < sortedDepth.length; idx++) {
const depth = sortedDepth[idx];
const treeList = domDepthMap.get(depth);
for (const treeHeadElement of treeList) {
// check if the element is existed
if (idToElement.has(treeHeadElement.id)) {
continue;
}
cleanedTreeList.push(treeHeadElement);
// flatten the tree
let pendingElements = [treeHeadElement];
let curIndex = 0;
while (curIndex < pendingElements.length) {
const curElement = pendingElements[curIndex];
if (idToElement.has(curElement.id)) {
curIndex++;
continue;
}
idToElement.set(curElement.id, curElement);
pendingElements.push(...curElement.children);
curIndex++;
}
}
}
return [Array.from(idToElement.values()), cleanedTreeList];
}

View File

@@ -96,6 +96,9 @@ def json_to_html(element: dict) -> str:
attributes_html = " ".join(build_attribute(key, value) for key, value in attributes.items())
tag = element["tagName"]
if element.get("isSelectable", False):
tag = "select"
text = element.get("text", "")
# build children HTML
children_html = "".join(json_to_html(child) for child in element.get("children", []))
@@ -112,6 +115,21 @@ def json_to_html(element: dict) -> str:
return f'<{tag}{attributes_html if not attributes_html else " "+attributes_html}>{text}{children_html+option_html}</{tag}>'
def build_element_dict(elements: list[dict]) -> tuple[dict[str, str], dict[str, dict], dict[str, str]]:
id_to_css_dict: dict[str, str] = {}
id_to_element_dict: dict[str, dict] = {}
id_to_frame_dict: dict[str, str] = {}
for element in elements:
element_id: str = element.get("id", "")
# get_interactable_element_tree marks each interactable element with a unique_id attribute
id_to_css_dict[element_id] = f"[{SKYVERN_ID_ATTR}='{element_id}']"
id_to_element_dict[element_id] = element
id_to_frame_dict[element_id] = element["frame"]
return id_to_css_dict, id_to_element_dict, id_to_frame_dict
class ElementTreeFormat(StrEnum):
JSON = "json"
HTML = "html"
@@ -266,16 +284,7 @@ async def scrape_web_unsafe(
elements, element_tree = await get_interactable_element_tree(page, scrape_exclude)
element_tree = await cleanup_element_tree(url, copy.deepcopy(element_tree))
id_to_css_dict = {}
id_to_element_dict = {}
id_to_frame_dict = {}
for element in elements:
element_id = element["id"]
# get_interactable_element_tree marks each interactable element with a unique_id attribute
id_to_css_dict[element_id] = f"[{SKYVERN_ID_ATTR}='{element_id}']"
id_to_element_dict[element_id] = element
id_to_frame_dict[element_id] = element["frame"]
id_to_css_dict, id_to_element_dict, id_to_frame_dict = build_element_dict(elements)
text_content = await get_frame_text(page.main_frame)
@@ -378,6 +387,65 @@ async def get_interactable_element_tree(
return elements, element_tree
class IncrementalScrapePage:
id_to_element_dict: dict[str, dict] = {}
id_to_css_dict: dict[str, str]
elements: list[dict]
element_tree: list[dict]
element_tree_trimmed: list[dict]
def __init__(self, skyvern_frame: SkyvernFrame) -> None:
self.skyvern_frame = skyvern_frame
async def get_incremental_element_tree(
self,
cleanup_element_tree: Callable[[str, list[dict]], Awaitable[list[dict]]],
) -> list[dict]:
frame = self.skyvern_frame.get_frame()
frame_id = "main.frame"
if isinstance(frame, Frame):
try:
frame_element = await frame.frame_element()
frame_id = await frame_element.get_attribute("unique_id")
except Exception:
# TODO: do we really care about the frame_id ?
LOG.warning(
"Unable to get frame_element",
exc_info=True,
)
js_script = f"async () => await getIncrementElements('{frame_id}')"
incremental_elements, incremental_tree = await frame.evaluate(js_script)
# we listen the incremental elements seperated by frames, so all elements will be in the same SkyvernFrame
self.id_to_css_dict, self.id_to_element_dict, _ = build_element_dict(incremental_elements)
self.elements = incremental_elements
incremental_tree = await cleanup_element_tree(frame.url, copy.deepcopy(incremental_tree))
trimmed_element_tree = trim_element_tree(copy.deepcopy(incremental_tree))
self.element_tree = incremental_tree
self.element_tree_trimmed = trimmed_element_tree
return self.element_tree_trimmed
async def start_listen_dom_increment(self) -> None:
js_script = "() => startGlobalIncrementalObserver()"
await self.skyvern_frame.get_frame().evaluate(js_script)
async def stop_listen_dom_increment(self) -> None:
js_script = "() => stopGlobalIncrementalObserver()"
await self.skyvern_frame.get_frame().evaluate(js_script)
async def get_incremental_elements_num(self) -> int:
js_script = "() => window.globalOneTimeIncrementElements.length"
return await self.skyvern_frame.get_frame().evaluate(js_script)
def build_html_tree(self) -> str:
return "".join([json_to_html(element) for element in self.element_tree_trimmed])
def trim_element_tree(elements: list[dict]) -> list[dict]:
queue = []
for element in elements:

View File

@@ -1,8 +1,10 @@
from __future__ import annotations
import asyncio
import typing
from abc import ABC, abstractmethod
from enum import StrEnum
from random import uniform
import structlog
from playwright.async_api import Frame, FrameLocator, Locator, Page
@@ -21,11 +23,12 @@ from skyvern.exceptions import (
MultipleDropdownAnchorErr,
MultipleElementsFound,
NoDropdownAnchorErr,
NoElementBoudingBox,
NoneFrameError,
SkyvernException,
)
from skyvern.forge.sdk.settings_manager import SettingsManager
from skyvern.webeye.scraper.scraper import ScrapedPage
from skyvern.webeye.scraper.scraper import IncrementalScrapePage, ScrapedPage
from skyvern.webeye.utils.page import SkyvernFrame
LOG = structlog.get_logger()
@@ -94,6 +97,35 @@ class SkyvernElement:
When you try to interact with these elements by python, you are supposed to use this class as an interface.
"""
@classmethod
async def create_from_incremental(cls, incre_page: IncrementalScrapePage, element_id: str) -> SkyvernElement:
element_dict = incre_page.id_to_element_dict.get(element_id)
if element_dict is None:
raise MissingElementDict(element_id)
css_selector = incre_page.id_to_css_dict.get(element_id)
if not css_selector:
raise MissingElementInCSSMap(element_id)
frame = incre_page.skyvern_frame.get_frame()
locator = frame.locator(css_selector)
num_elements = await locator.count()
if num_elements < 1:
LOG.warning("No elements found with css. Validation failed.", css=css_selector, element_id=element_id)
raise MissingElement(selector=css_selector, element_id=element_id)
elif num_elements > 1:
LOG.warning(
"Multiple elements found with css. Expected 1. Validation failed.",
num_elements=num_elements,
selector=css_selector,
element_id=element_id,
)
raise MultipleElementsFound(num=num_elements, selector=css_selector, element_id=element_id)
return cls(locator, frame, element_dict)
def __init__(self, locator: Locator, frame: Page | Frame, static_element: dict) -> None:
self.__static_element = static_element
self.__frame = frame
@@ -147,12 +179,13 @@ class SkyvernElement:
return self.__static_element.get("interactable", False)
async def is_selectable(self) -> bool:
return (
await self.is_select2_dropdown()
or await self.is_react_select_dropdown()
or await self.is_combobox_dropdown()
or self.get_tag_name() in SELECTABLE_ELEMENT
)
return self.get_selectable() or self.get_tag_name() in SELECTABLE_ELEMENT
def get_scrollable(self) -> bool:
return self.__static_element.get("isScrollable", False)
def get_selectable(self) -> bool:
return self.__static_element.get("isSelectable", False)
def get_tag_name(self) -> str:
return self.__static_element.get("tagName", "")
@@ -294,6 +327,36 @@ class SkyvernElement:
async def input_clear(self, timeout: float = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS) -> None:
await self.get_locator().clear(timeout=timeout)
async def move_mouse_to(
self, page: Page, timeout: float = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
) -> tuple[float, float]:
bounding_box = await self.get_locator().bounding_box(timeout=timeout)
if not bounding_box:
raise NoElementBoudingBox(element_id=self.get_id())
x, y, width, height = bounding_box["x"], bounding_box["y"], bounding_box["width"], bounding_box["height"]
# calculate the click point, use open interval to avoid clicking on the border
epsilon = 0.01
dest_x = uniform(x + epsilon, x + width - epsilon) if width > 2 * epsilon else (x + width) / 2
dest_y = uniform(y + epsilon, y + height - epsilon) if height > 2 * epsilon else (y + height) / 2
await page.mouse.move(dest_x, dest_y)
return dest_x, dest_y
async def coordinate_click(
self, page: Page, timeout: float = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
) -> None:
click_x, click_y = await self.move_mouse_to(page=page, timeout=timeout)
await page.mouse.click(click_x, click_y)
async def scroll_into_view(self, timeout: float = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS) -> None:
element_handler = await self.get_locator().element_handle()
if element_handler is None:
LOG.warning("element handler is None. ", element_id=self.get_id())
return
await element_handler.scroll_into_view_if_needed(timeout=timeout)
await asyncio.sleep(2) # wait for scrolling into the target
class DomUtil:
"""

View File

@@ -145,6 +145,14 @@ class SkyvernFrame:
async with asyncio.timeout(timeout):
return await self.frame.content()
async def scroll_to_element_bottom(self, element: ElementHandle) -> None:
js_script = "(element) => scrollToElementBottom(element)"
return await self.frame.evaluate(js_script, element)
async def scroll_to_element_top(self, element: ElementHandle) -> None:
js_script = "(element) => scrollToElementTop(element)"
return await self.frame.evaluate(js_script, element)
async def get_select2_options(self, element: ElementHandle) -> List[Dict[str, Any]]:
await self.frame.evaluate(JS_FUNCTION_DEFS)
js_script = "async (element) => await getSelect2Options(element)"