remove the old context tree (#268)
This commit is contained in:
@@ -544,7 +544,7 @@ function getListboxOptions(element) {
|
||||
return selectOptions;
|
||||
}
|
||||
|
||||
function buildTreeFromBody(new_ctx = false) {
|
||||
function buildTreeFromBody() {
|
||||
var elements = [];
|
||||
var resultArray = [];
|
||||
|
||||
@@ -634,7 +634,6 @@ function buildTreeFromBody(new_ctx = false) {
|
||||
}
|
||||
|
||||
if (
|
||||
new_ctx &&
|
||||
checkRequiredFromStyle(element) &&
|
||||
!attrs["required"] &&
|
||||
!attrs["aria-required"]
|
||||
@@ -712,7 +711,7 @@ function buildTreeFromBody(new_ctx = false) {
|
||||
elements[interactableParentId].children.push(elementObj);
|
||||
}
|
||||
// options already added to the select.options, no need to add options anymore
|
||||
if (new_ctx && elementObj.options && elementObj.options.length > 0) {
|
||||
if (elementObj.options && elementObj.options.length > 0) {
|
||||
return elementObj;
|
||||
}
|
||||
// Recursively process the children of the element
|
||||
@@ -744,7 +743,7 @@ function buildTreeFromBody(new_ctx = false) {
|
||||
if (parentEle) {
|
||||
if (
|
||||
targetParentElements.has(parentEle.tagName.toLowerCase()) ||
|
||||
(new_ctx && checkParentClass(parentEle.className.toLowerCase()))
|
||||
checkParentClass(parentEle.className.toLowerCase())
|
||||
) {
|
||||
targetContextualParent = parentEle;
|
||||
}
|
||||
@@ -939,7 +938,7 @@ function buildTreeFromBody(new_ctx = false) {
|
||||
element.context = context;
|
||||
}
|
||||
|
||||
if (new_ctx && checkStringIncludeRequire(context)) {
|
||||
if (checkStringIncludeRequire(context)) {
|
||||
if (
|
||||
!element.attributes["required"] &&
|
||||
!element.attributes["aria-required"]
|
||||
@@ -949,10 +948,6 @@ function buildTreeFromBody(new_ctx = false) {
|
||||
}
|
||||
}
|
||||
|
||||
if (!new_ctx) {
|
||||
return [elements, resultArray];
|
||||
}
|
||||
|
||||
resultArray = removeOrphanNode(resultArray);
|
||||
resultArray.forEach((root) => {
|
||||
trimDuplicatedText(root);
|
||||
|
||||
@@ -184,7 +184,7 @@ async def scrape_web_unsafe(
|
||||
await remove_bounding_boxes(page)
|
||||
await scroll_to_top(page, drow_boxes=False)
|
||||
|
||||
elements, element_tree = await get_interactable_element_tree(page, browser_state.new_context_tree)
|
||||
elements, element_tree = await get_interactable_element_tree(page)
|
||||
element_tree = cleanup_elements(copy.deepcopy(element_tree))
|
||||
|
||||
_build_element_links(elements)
|
||||
@@ -211,15 +211,15 @@ async def scrape_web_unsafe(
|
||||
)
|
||||
|
||||
|
||||
async def get_interactable_element_tree(page: Page, new_context_tree: bool) -> tuple[list[dict], list[dict]]:
|
||||
async def get_interactable_element_tree(page: Page) -> tuple[list[dict], list[dict]]:
|
||||
"""
|
||||
Get the element tree of the page, including all the elements that are interactable.
|
||||
:param page: Page instance to get the element tree from.
|
||||
:return: Tuple containing the element tree and a map of element IDs to elements.
|
||||
"""
|
||||
await page.evaluate(JS_FUNCTION_DEFS)
|
||||
js_script = "(new_ctx) => buildTreeFromBody(new_ctx)"
|
||||
elements, element_tree = await page.evaluate(js_script, new_context_tree)
|
||||
js_script = "() => buildTreeFromBody()"
|
||||
elements, element_tree = await page.evaluate(js_script)
|
||||
return elements, element_tree
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user