general selection (#675)

This commit is contained in:
LawyZheng
2024-08-06 13:30:52 +08:00
committed by GitHub
parent 845ae8d3e4
commit cba0f68a5e
10 changed files with 655 additions and 241 deletions

View File

@@ -386,19 +386,8 @@ function isInteractable(element) {
return true;
}
if (
tagName === "div" ||
tagName === "img" ||
tagName === "span" ||
tagName === "a" ||
tagName === "i"
) {
const computedStyle = window.getComputedStyle(element);
const hasPointer = computedStyle.cursor === "pointer";
return hasPointer;
}
// support listbox and options underneath it
// div element should be checked here before the css pointer
if (
(tagName === "ul" || tagName === "div") &&
element.hasAttribute("role") &&
@@ -414,9 +403,53 @@ function isInteractable(element) {
return true;
}
if (
tagName === "div" &&
element.hasAttribute("aria-disabled") &&
element.getAttribute("aria-disabled").toLowerCase() === "false"
) {
return true;
}
if (
tagName === "div" ||
tagName === "img" ||
tagName === "span" ||
tagName === "a" ||
tagName === "i"
) {
const computedStyle = window.getComputedStyle(element);
const hasPointer = computedStyle.cursor === "pointer";
return hasPointer;
}
return false;
}
function isScrollable(element) {
const scrollHeight = element.scrollHeight || 0;
const clientHeight = element.clientHeight || 0;
const scrollWidth = element.scrollWidth || 0;
const clientWidth = element.clientWidth || 0;
const hasScrollableContent =
scrollHeight > clientHeight || scrollWidth > clientWidth;
const hasScrollableOverflow = isScrollableOverflow(element);
return hasScrollableContent && hasScrollableOverflow;
}
function isScrollableOverflow(element) {
const style = window.getComputedStyle(element);
return (
style.overflow === "auto" ||
style.overflow === "scroll" ||
style.overflowX === "auto" ||
style.overflowX === "scroll" ||
style.overflowY === "auto" ||
style.overflowY === "scroll"
);
}
const isComboboxDropdown = (element) => {
if (element.tagName.toLowerCase() !== "input") {
return false;
@@ -436,8 +469,8 @@ const isComboboxDropdown = (element) => {
const isSelect2Dropdown = (element) => {
return (
element.tagName.toLowerCase() === "span" &&
element.className.toString().includes("select2-chosen")
element.tagName.toLowerCase() === "a" &&
element.className.toString().includes("select2-choice")
);
};
@@ -805,6 +838,14 @@ function uniqueId() {
}
async function buildTreeFromBody(frame = "main.frame", open_select = false) {
return buildElementTree(document.body, frame, open_select);
}
async function buildElementTree(
starter = document.body,
frame = "main.frame",
open_select = false,
) {
var elements = [];
var resultArray = [];
@@ -863,6 +904,13 @@ async function buildTreeFromBody(frame = "main.frame", open_select = false) {
// don't trim any attr of this element if keepAllAttr=True
keepAllAttr:
elementTagNameLower === "svg" || element.closest("svg") !== null,
isSelectable:
elementTagNameLower === "select" ||
isReactSelectDropdown(element) ||
isComboboxDropdown(element) ||
isSelect2Dropdown(element) ||
isSelect2MultiChoice(element),
isScrollable: isScrollable(element),
};
let isInShadowRoot = element.getRootNode() instanceof ShadowRoot;
@@ -882,94 +930,8 @@ async function buildTreeFromBody(frame = "main.frame", open_select = false) {
let selectedValue = "";
if (elementTagNameLower === "select") {
[selectOptions, selectedValue] = getSelectOptions(element);
} else if (attrs["role"] && attrs["role"].toLowerCase() === "listbox") {
// if "role" key is inside attrs, then get all the elements with role "option" and get their text
selectOptions = getListboxOptions(element);
} else if (open_select && isReactSelectDropdown(element)) {
element.dispatchEvent(
new MouseEvent("mouseup", {
bubbles: true,
view: window,
}),
);
element.dispatchEvent(
new MouseEvent("mousedown", {
bubbles: true,
view: window,
}),
);
selectOptions = await getReactSelectOptions(element);
// click again to close
element.dispatchEvent(
new MouseEvent("mouseup", {
bubbles: true,
view: window,
}),
);
element.dispatchEvent(
new MouseEvent("mousedown", {
bubbles: true,
view: window,
}),
);
element.dispatchEvent(
new KeyboardEvent("keydown", {
keyCode: 27,
bubbles: true,
key: "Escape",
}),
);
} else if (open_select && isComboboxDropdown(element)) {
// open combobox dropdown to get options
element.click();
const listBox = element
.getRootNode()
.getElementById(element.getAttribute("aria-controls"));
if (listBox) {
selectOptions = getListboxOptions(listBox);
}
// HACK: press Tab to close the dropdown
element.dispatchEvent(
new KeyboardEvent("keydown", {
keyCode: 9,
bubbles: true,
key: "Tab",
}),
);
} else if (open_select && isSelect2Dropdown(element)) {
// click element to show options
element.dispatchEvent(
new MouseEvent("mousedown", {
bubbles: true,
view: window,
}),
);
selectOptions = await getSelect2Options(element);
// HACK: click again to close the dropdown
element.dispatchEvent(
new MouseEvent("mousedown", {
bubbles: true,
view: window,
}),
);
} else if (open_select && isSelect2MultiChoice(element)) {
// click element to show options
element.click();
selectOptions = await getSelect2Options(element);
// HACK: press ESC to close the dropdown
element.dispatchEvent(
new KeyboardEvent("keydown", {
keyCode: 27,
bubbles: true,
key: "Escape",
}),
);
}
if (selectOptions) {
elementObj.options = selectOptions;
}
@@ -1308,9 +1270,8 @@ async function buildTreeFromBody(frame = "main.frame", open_select = false) {
return trimmedResults;
};
// TODO: Handle iframes
// setup before parsing the dom
await processElement(document.body, null);
await processElement(starter, null);
for (var element of elements) {
if (
@@ -1568,6 +1529,22 @@ async function scrollToNextPage(draw_boxes) {
return window.scrollY;
}
function scrollToElementBottom(element) {
element.scroll({
top: element.scrollHeight,
left: 0,
behavior: "instant",
});
}
function scrollToElementTop(element) {
element.scroll({
top: 0,
left: 0,
behavior: "instant",
});
}
async function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
@@ -1589,3 +1566,140 @@ function findNodeById(arr, targetId, path = []) {
}
return null;
}
function getElementDomDepth(elementNode) {
let depth = 0;
const rootElement = elementNode.getRootNode().firstElementChild;
while (elementNode !== rootElement && elementNode.parentElement) {
depth++;
elementNode = elementNode.parentElement;
}
return depth;
}
if (window.globalOneTimeIncrementElements === undefined) {
window.globalOneTimeIncrementElements = [];
}
if (window.globalObserverForDOMIncrement === undefined) {
window.globalObserverForDOMIncrement = new MutationObserver(function (
mutationsList,
observer,
) {
for (const mutation of mutationsList) {
if (mutation.type === "attributes") {
if (mutation.attributeName === "style") {
// TODO: need to confirm that elemnent is hidden previously
node = mutation.target;
if (node.nodeType === Node.TEXT_NODE) continue;
const newStyle = window.getComputedStyle(node);
const newDisplay = newStyle.display;
if (newDisplay !== "none") {
window.globalOneTimeIncrementElements.push({
targetNode: node,
newNodes: [node],
});
}
}
// TODO: we maybe need to detect the visiblity change from class
// if (mutation.attributeName === "class") {
// }
}
if (mutation.type === "childList") {
let changedNode = {
targetNode: mutation.target, // TODO: for future usage, when we want to parse new elements into a tree
};
let newNodes = [];
if (mutation.addedNodes && mutation.addedNodes.length > 0) {
for (const node of mutation.addedNodes) {
// skip the text nodes, they won't be interactable
if (node.nodeType === Node.TEXT_NODE) continue;
newNodes.push(node);
}
}
if (newNodes.length > 0) {
changedNode.newNodes = newNodes;
window.globalOneTimeIncrementElements.push(changedNode);
}
}
}
});
}
function startGlobalIncrementalObserver() {
window.globalOneTimeIncrementElements = [];
window.globalObserverForDOMIncrement.takeRecords(); // cleanup the older data
window.globalObserverForDOMIncrement.observe(document.body, {
attributes: true,
attributeOldValue: true,
childList: true,
subtree: true,
characterData: true,
});
}
function stopGlobalIncrementalObserver() {
window.globalObserverForDOMIncrement.disconnect();
window.globalObserverForDOMIncrement.takeRecords(); // cleanup the older data
window.globalOneTimeIncrementElements = [];
}
async function getIncrementElements(frame) {
const domDepthMap = new Map();
for (const element of window.globalOneTimeIncrementElements) {
// calculate the depth of targetNode element for sorting
const depth = getElementDomDepth(element.targetNode);
let newNodesTreeList = [];
if (domDepthMap.has(depth)) {
newNodesTreeList = domDepthMap.get(depth);
}
for (const child of element.newNodes) {
const [_, newNodeTree] = await buildElementTree(child, frame, false);
if (newNodeTree.length > 0) {
newNodesTreeList.push(...newNodeTree);
}
}
domDepthMap.set(depth, newNodesTreeList);
}
// cleanup the chidren tree, remove the duplicated element
// search starting from the shallowest node:
// 1. if deeper, the node could only be the children of the shallower one or no related one.
// 2. if depth is same, the node could only be duplicated one or no related one.
const idToElement = new Map();
const cleanedTreeList = [];
const sortedDepth = Array.from(domDepthMap.keys()).sort();
for (let idx = 0; idx < sortedDepth.length; idx++) {
const depth = sortedDepth[idx];
const treeList = domDepthMap.get(depth);
for (const treeHeadElement of treeList) {
// check if the element is existed
if (idToElement.has(treeHeadElement.id)) {
continue;
}
cleanedTreeList.push(treeHeadElement);
// flatten the tree
let pendingElements = [treeHeadElement];
let curIndex = 0;
while (curIndex < pendingElements.length) {
const curElement = pendingElements[curIndex];
if (idToElement.has(curElement.id)) {
curIndex++;
continue;
}
idToElement.set(curElement.id, curElement);
pendingElements.push(...curElement.children);
curIndex++;
}
}
}
return [Array.from(idToElement.values()), cleanedTreeList];
}

View File

@@ -96,6 +96,9 @@ def json_to_html(element: dict) -> str:
attributes_html = " ".join(build_attribute(key, value) for key, value in attributes.items())
tag = element["tagName"]
if element.get("isSelectable", False):
tag = "select"
text = element.get("text", "")
# build children HTML
children_html = "".join(json_to_html(child) for child in element.get("children", []))
@@ -112,6 +115,21 @@ def json_to_html(element: dict) -> str:
return f'<{tag}{attributes_html if not attributes_html else " "+attributes_html}>{text}{children_html+option_html}</{tag}>'
def build_element_dict(elements: list[dict]) -> tuple[dict[str, str], dict[str, dict], dict[str, str]]:
id_to_css_dict: dict[str, str] = {}
id_to_element_dict: dict[str, dict] = {}
id_to_frame_dict: dict[str, str] = {}
for element in elements:
element_id: str = element.get("id", "")
# get_interactable_element_tree marks each interactable element with a unique_id attribute
id_to_css_dict[element_id] = f"[{SKYVERN_ID_ATTR}='{element_id}']"
id_to_element_dict[element_id] = element
id_to_frame_dict[element_id] = element["frame"]
return id_to_css_dict, id_to_element_dict, id_to_frame_dict
class ElementTreeFormat(StrEnum):
JSON = "json"
HTML = "html"
@@ -266,16 +284,7 @@ async def scrape_web_unsafe(
elements, element_tree = await get_interactable_element_tree(page, scrape_exclude)
element_tree = await cleanup_element_tree(url, copy.deepcopy(element_tree))
id_to_css_dict = {}
id_to_element_dict = {}
id_to_frame_dict = {}
for element in elements:
element_id = element["id"]
# get_interactable_element_tree marks each interactable element with a unique_id attribute
id_to_css_dict[element_id] = f"[{SKYVERN_ID_ATTR}='{element_id}']"
id_to_element_dict[element_id] = element
id_to_frame_dict[element_id] = element["frame"]
id_to_css_dict, id_to_element_dict, id_to_frame_dict = build_element_dict(elements)
text_content = await get_frame_text(page.main_frame)
@@ -378,6 +387,65 @@ async def get_interactable_element_tree(
return elements, element_tree
class IncrementalScrapePage:
id_to_element_dict: dict[str, dict] = {}
id_to_css_dict: dict[str, str]
elements: list[dict]
element_tree: list[dict]
element_tree_trimmed: list[dict]
def __init__(self, skyvern_frame: SkyvernFrame) -> None:
self.skyvern_frame = skyvern_frame
async def get_incremental_element_tree(
self,
cleanup_element_tree: Callable[[str, list[dict]], Awaitable[list[dict]]],
) -> list[dict]:
frame = self.skyvern_frame.get_frame()
frame_id = "main.frame"
if isinstance(frame, Frame):
try:
frame_element = await frame.frame_element()
frame_id = await frame_element.get_attribute("unique_id")
except Exception:
# TODO: do we really care about the frame_id ?
LOG.warning(
"Unable to get frame_element",
exc_info=True,
)
js_script = f"async () => await getIncrementElements('{frame_id}')"
incremental_elements, incremental_tree = await frame.evaluate(js_script)
# we listen the incremental elements seperated by frames, so all elements will be in the same SkyvernFrame
self.id_to_css_dict, self.id_to_element_dict, _ = build_element_dict(incremental_elements)
self.elements = incremental_elements
incremental_tree = await cleanup_element_tree(frame.url, copy.deepcopy(incremental_tree))
trimmed_element_tree = trim_element_tree(copy.deepcopy(incremental_tree))
self.element_tree = incremental_tree
self.element_tree_trimmed = trimmed_element_tree
return self.element_tree_trimmed
async def start_listen_dom_increment(self) -> None:
js_script = "() => startGlobalIncrementalObserver()"
await self.skyvern_frame.get_frame().evaluate(js_script)
async def stop_listen_dom_increment(self) -> None:
js_script = "() => stopGlobalIncrementalObserver()"
await self.skyvern_frame.get_frame().evaluate(js_script)
async def get_incremental_elements_num(self) -> int:
js_script = "() => window.globalOneTimeIncrementElements.length"
return await self.skyvern_frame.get_frame().evaluate(js_script)
def build_html_tree(self) -> str:
return "".join([json_to_html(element) for element in self.element_tree_trimmed])
def trim_element_tree(elements: list[dict]) -> list[dict]:
queue = []
for element in elements: