refactor select2 (#485)
This commit is contained in:
@@ -216,6 +216,10 @@ function isElementVisible(element) {
|
||||
if (element.tagName.toLowerCase() === "option")
|
||||
return element.parentElement && isElementVisible(element.parentElement);
|
||||
|
||||
if (element.className.toString().includes("select2-offscreen")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const style = getElementComputedStyle(element);
|
||||
if (!style) return true;
|
||||
if (style.display === "contents") {
|
||||
@@ -414,6 +418,20 @@ const isComboboxDropdown = (element) => {
|
||||
return role && haspopup && controls && readonly;
|
||||
};
|
||||
|
||||
const isSelect2Dropdown = (element) => {
|
||||
return (
|
||||
element.tagName.toLowerCase() === "span" &&
|
||||
element.className.toString().includes("select2-chosen")
|
||||
);
|
||||
};
|
||||
|
||||
const isSelect2MultiChoice = (element) => {
|
||||
return (
|
||||
element.tagName.toLowerCase() === "input" &&
|
||||
element.className.toString().includes("select2-input")
|
||||
);
|
||||
};
|
||||
|
||||
const checkParentClass = (className) => {
|
||||
const targetParentClasses = ["field", "entry"];
|
||||
for (let i = 0; i < targetParentClasses.length; i++) {
|
||||
@@ -594,6 +612,58 @@ function getListboxOptions(element) {
|
||||
return selectOptions;
|
||||
}
|
||||
|
||||
async function getSelect2OptionElements() {
|
||||
let optionList = [];
|
||||
|
||||
while (true) {
|
||||
oldOptionCount = optionList.length;
|
||||
let newOptionList = document.querySelectorAll(
|
||||
"#select2-drop li[role='option']",
|
||||
);
|
||||
if (newOptionList.length === oldOptionCount) {
|
||||
console.log("no more options loaded, wait 5s to query again");
|
||||
// sometimes need more time to load the options, so sleep 10s and try again
|
||||
await sleep(5000); // wait 5s
|
||||
newOptionList = document.querySelectorAll(
|
||||
"#select2-drop li[role='option']",
|
||||
);
|
||||
console.log(newOptionList.length, " options found, after 5s");
|
||||
}
|
||||
|
||||
optionList = newOptionList;
|
||||
if (optionList.length === 0 || optionList.length === oldOptionCount) {
|
||||
break;
|
||||
}
|
||||
|
||||
lastOption = optionList[optionList.length - 1];
|
||||
if (!lastOption.className.toString().includes("select2-more-results")) {
|
||||
break;
|
||||
}
|
||||
lastOption.scrollIntoView();
|
||||
}
|
||||
|
||||
return optionList;
|
||||
}
|
||||
|
||||
async function getSelect2Options() {
|
||||
const optionList = await getSelect2OptionElements();
|
||||
|
||||
let selectOptions = [];
|
||||
for (let i = 0; i < optionList.length; i++) {
|
||||
let ele = optionList[i];
|
||||
if (ele.className.toString().includes("select2-more-results")) {
|
||||
continue;
|
||||
}
|
||||
|
||||
selectOptions.push({
|
||||
optionIndex: i,
|
||||
text: removeMultipleSpaces(ele.textContent),
|
||||
});
|
||||
}
|
||||
|
||||
return selectOptions;
|
||||
}
|
||||
|
||||
function uniqueId() {
|
||||
const characters =
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
|
||||
@@ -605,63 +675,11 @@ function uniqueId() {
|
||||
return result;
|
||||
}
|
||||
|
||||
function buildTreeFromBody(frame = "main.frame") {
|
||||
async function buildTreeFromBody(frame = "main.frame", open_select = false) {
|
||||
var elements = [];
|
||||
var resultArray = [];
|
||||
|
||||
const checkSelect2 = () => {
|
||||
const showInvisible = (element) => {
|
||||
if (element.style.display === "none") {
|
||||
element.style.removeProperty("display");
|
||||
return true;
|
||||
}
|
||||
|
||||
const removedClass = [];
|
||||
for (let i = 0; i < element.classList.length; i++) {
|
||||
const className = element.classList[i];
|
||||
if (className.includes("hidden")) {
|
||||
removedClass.push(className);
|
||||
}
|
||||
}
|
||||
if (removedClass.length !== 0) {
|
||||
removedClass.forEach((className) => {
|
||||
element.classList.remove(className);
|
||||
});
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
// according to select2(https://select2.org/getting-started/basic-usage)
|
||||
// select2-container seems to be the most common class in select2,
|
||||
// and the invisible select seems to be the sibling to the "select2-container" element.
|
||||
const selectContainers = document.querySelectorAll(".select2-container");
|
||||
|
||||
selectContainers.forEach((element) => {
|
||||
// search select in previous
|
||||
let _pre = element.previousElementSibling;
|
||||
while (_pre) {
|
||||
if (_pre.tagName.toLowerCase() === "select" && showInvisible(_pre)) {
|
||||
// only hide the select2 container when an alternative select found
|
||||
element.style.display = "none";
|
||||
return;
|
||||
}
|
||||
_pre = _pre.previousElementSibling;
|
||||
}
|
||||
|
||||
// search select in next
|
||||
let _next = element.nextElementSibling;
|
||||
while (_next) {
|
||||
if (_next.tagName.toLowerCase() === "select" && showInvisible(_next)) {
|
||||
// only hide the select2 container when an alternative select found
|
||||
element.style.display = "none";
|
||||
return;
|
||||
}
|
||||
_next = _next.nextElementSibling;
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
function buildElementObject(element, interactable) {
|
||||
async function buildElementObject(element, interactable) {
|
||||
var element_id = element.getAttribute("unique_id") ?? uniqueId();
|
||||
var elementTagNameLower = element.tagName.toLowerCase();
|
||||
element.setAttribute("unique_id", element_id);
|
||||
@@ -718,7 +736,7 @@ function buildTreeFromBody(frame = "main.frame") {
|
||||
} else if (attrs["role"] && attrs["role"].toLowerCase() === "listbox") {
|
||||
// if "role" key is inside attrs, then get all the elements with role "option" and get their text
|
||||
selectOptions = getListboxOptions(element);
|
||||
} else if (isComboboxDropdown(element)) {
|
||||
} else if (open_select && isComboboxDropdown(element)) {
|
||||
// open combobox dropdown to get options
|
||||
element.click();
|
||||
const listBox = document.getElementById(
|
||||
@@ -735,6 +753,37 @@ function buildTreeFromBody(frame = "main.frame") {
|
||||
key: "Tab",
|
||||
}),
|
||||
);
|
||||
} else if (open_select && isSelect2Dropdown(element)) {
|
||||
// click element to show options
|
||||
element.dispatchEvent(
|
||||
new MouseEvent("mousedown", {
|
||||
bubbles: true,
|
||||
view: window,
|
||||
}),
|
||||
);
|
||||
|
||||
selectOptions = await getSelect2Options();
|
||||
|
||||
// HACK: click again to close the dropdown
|
||||
element.dispatchEvent(
|
||||
new MouseEvent("mousedown", {
|
||||
bubbles: true,
|
||||
view: window,
|
||||
}),
|
||||
);
|
||||
} else if (open_select && isSelect2MultiChoice(element)) {
|
||||
// click element to show options
|
||||
element.click();
|
||||
selectOptions = await getSelect2Options();
|
||||
|
||||
// HACK: press ESC to close the dropdown
|
||||
element.dispatchEvent(
|
||||
new KeyboardEvent("keydown", {
|
||||
keyCode: 27,
|
||||
bubbles: true,
|
||||
key: "Escape",
|
||||
}),
|
||||
);
|
||||
}
|
||||
if (selectOptions) {
|
||||
elementObj.options = selectOptions;
|
||||
@@ -750,7 +799,7 @@ function buildTreeFromBody(frame = "main.frame") {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
function processElement(element, parentId) {
|
||||
async function processElement(element, parentId) {
|
||||
if (element === null) {
|
||||
console.log("get a null element");
|
||||
return;
|
||||
@@ -766,7 +815,7 @@ function buildTreeFromBody(frame = "main.frame") {
|
||||
|
||||
// Check if the element is interactable
|
||||
if (isInteractable(element)) {
|
||||
var elementObj = buildElementObject(element, true);
|
||||
var elementObj = await buildElementObject(element, true);
|
||||
elements.push(elementObj);
|
||||
// If the element is interactable but has no interactable parent,
|
||||
// then it starts a new tree, so add it to the result array
|
||||
@@ -788,12 +837,14 @@ function buildTreeFromBody(frame = "main.frame") {
|
||||
return elementObj;
|
||||
}
|
||||
// Recursively process the children of the element
|
||||
getChildElements(element).forEach((child) => {
|
||||
processElement(child, elementObj.id);
|
||||
});
|
||||
const children = getChildElements(element);
|
||||
for (let i = 0; i < children.length; i++) {
|
||||
const childElement = children[i];
|
||||
await processElement(childElement, elementObj.id);
|
||||
}
|
||||
return elementObj;
|
||||
} else if (element.tagName.toLowerCase() === "iframe") {
|
||||
let iframeElementObject = buildElementObject(element, false);
|
||||
let iframeElementObject = await buildElementObject(element, false);
|
||||
|
||||
elements.push(iframeElementObject);
|
||||
resultArray.push(iframeElementObject);
|
||||
@@ -820,7 +871,7 @@ function buildTreeFromBody(frame = "main.frame") {
|
||||
// we don't use element context in HTML format,
|
||||
// so we need to make sure we parse all text node to avoid missing text in HTML.
|
||||
if (textContent && textContent.length <= 5000) {
|
||||
var elementObj = buildElementObject(element, false);
|
||||
var elementObj = await buildElementObject(element, false);
|
||||
elements.push(elementObj);
|
||||
if (parentId === null) {
|
||||
resultArray.push(elementObj);
|
||||
@@ -833,9 +884,12 @@ function buildTreeFromBody(frame = "main.frame") {
|
||||
parentId = elementObj.id;
|
||||
}
|
||||
}
|
||||
getChildElements(element).forEach((child) => {
|
||||
processElement(child, parentId);
|
||||
});
|
||||
|
||||
const children = getChildElements(element);
|
||||
for (let i = 0; i < children.length; i++) {
|
||||
const childElement = children[i];
|
||||
await processElement(childElement, parentId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1030,8 +1084,7 @@ function buildTreeFromBody(frame = "main.frame") {
|
||||
|
||||
// TODO: Handle iframes
|
||||
// setup before parsing the dom
|
||||
checkSelect2();
|
||||
processElement(document.body, null);
|
||||
await processElement(document.body, null);
|
||||
|
||||
for (var element of elements) {
|
||||
if (
|
||||
@@ -1247,17 +1300,17 @@ function removeBoundingBoxes() {
|
||||
}
|
||||
}
|
||||
|
||||
function scrollToTop(draw_boxes) {
|
||||
async function scrollToTop(draw_boxes) {
|
||||
removeBoundingBoxes();
|
||||
window.scroll({ left: 0, top: 0, behavior: "instant" });
|
||||
if (draw_boxes) {
|
||||
var elementsAndResultArray = buildTreeFromBody();
|
||||
var elementsAndResultArray = await buildTreeFromBody();
|
||||
drawBoundingBoxes(elementsAndResultArray[0]);
|
||||
}
|
||||
return window.scrollY;
|
||||
}
|
||||
|
||||
function scrollToNextPage(draw_boxes) {
|
||||
async function scrollToNextPage(draw_boxes) {
|
||||
// remove bounding boxes, scroll to next page with 200px overlap, then draw bounding boxes again
|
||||
// return true if there is a next page, false otherwise
|
||||
removeBoundingBoxes();
|
||||
@@ -1267,8 +1320,12 @@ function scrollToNextPage(draw_boxes) {
|
||||
behavior: "instant",
|
||||
});
|
||||
if (draw_boxes) {
|
||||
var elementsAndResultArray = buildTreeFromBody();
|
||||
var elementsAndResultArray = await buildTreeFromBody();
|
||||
drawBoundingBoxes(elementsAndResultArray[0]);
|
||||
}
|
||||
return window.scrollY;
|
||||
}
|
||||
|
||||
async function sleep(ms) {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
@@ -297,6 +297,12 @@ async def scrape_web_unsafe(
|
||||
)
|
||||
|
||||
|
||||
async def get_select2_options(page: Page) -> list[dict[str, Any]]:
|
||||
await page.evaluate(JS_FUNCTION_DEFS)
|
||||
js_script = "async () => await getSelect2Options()"
|
||||
return await page.evaluate(js_script)
|
||||
|
||||
|
||||
async def get_interactable_element_tree_in_frame(
|
||||
frames: list[Frame], elements: list[dict], element_tree: list[dict]
|
||||
) -> tuple[list[dict], list[dict]]:
|
||||
@@ -315,7 +321,7 @@ async def get_interactable_element_tree_in_frame(
|
||||
|
||||
unique_id = await frame_element.get_attribute("unique_id")
|
||||
|
||||
frame_js_script = f"() => buildTreeFromBody('{unique_id}')"
|
||||
frame_js_script = f"async () => await buildTreeFromBody('{unique_id}', true)"
|
||||
|
||||
await frame.evaluate(JS_FUNCTION_DEFS)
|
||||
frame_elements, frame_element_tree = await frame.evaluate(frame_js_script)
|
||||
@@ -345,7 +351,7 @@ async def get_interactable_element_tree(page: Page) -> tuple[list[dict], list[di
|
||||
:return: Tuple containing the element tree and a map of element IDs to elements.
|
||||
"""
|
||||
await page.evaluate(JS_FUNCTION_DEFS)
|
||||
main_frame_js_script = "() => buildTreeFromBody('main.frame')"
|
||||
main_frame_js_script = "async () => await buildTreeFromBody('main.frame', true)"
|
||||
elements, element_tree = await page.evaluate(main_frame_js_script)
|
||||
|
||||
# FIXME: some unexpected exception in iframe. turn off temporarily
|
||||
@@ -365,7 +371,7 @@ async def scroll_to_top(page: Page, drow_boxes: bool) -> float:
|
||||
:return: Screenshot of the page.
|
||||
"""
|
||||
await page.evaluate(JS_FUNCTION_DEFS)
|
||||
js_script = f"() => scrollToTop({str(drow_boxes).lower()})"
|
||||
js_script = f"async () => await scrollToTop({str(drow_boxes).lower()})"
|
||||
scroll_y_px = await page.evaluate(js_script)
|
||||
return scroll_y_px
|
||||
|
||||
@@ -378,7 +384,7 @@ async def scroll_to_next_page(page: Page, drow_boxes: bool) -> bool:
|
||||
:return: Screenshot of the page.
|
||||
"""
|
||||
await page.evaluate(JS_FUNCTION_DEFS)
|
||||
js_script = f"() => scrollToNextPage({str(drow_boxes).lower()})"
|
||||
js_script = f"async () => await scrollToNextPage({str(drow_boxes).lower()})"
|
||||
scroll_y_px = await page.evaluate(js_script)
|
||||
return scroll_y_px
|
||||
|
||||
|
||||
Reference in New Issue
Block a user