Use 4-char element ids instead of sequential integers (#361)
Co-authored-by: LawyZheng <lawyzheng1106@gmail.com>
This commit is contained in:
@@ -47,7 +47,7 @@ class ScriptNotFound(SkyvernException):
|
|||||||
|
|
||||||
|
|
||||||
class MissingElement(SkyvernException):
|
class MissingElement(SkyvernException):
|
||||||
def __init__(self, xpath: str | None = None, element_id: int | None = None):
|
def __init__(self, xpath: str | None = None, element_id: str | None = None):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
f"Found no elements. Might be due to previous actions which removed this element."
|
f"Found no elements. Might be due to previous actions which removed this element."
|
||||||
f" xpath={xpath} element_id={element_id}",
|
f" xpath={xpath} element_id={element_id}",
|
||||||
@@ -55,7 +55,7 @@ class MissingElement(SkyvernException):
|
|||||||
|
|
||||||
|
|
||||||
class MultipleElementsFound(SkyvernException):
|
class MultipleElementsFound(SkyvernException):
|
||||||
def __init__(self, num: int, xpath: str | None = None, element_id: int | None = None):
|
def __init__(self, num: int, xpath: str | None = None, element_id: str | None = None):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
f"Found {num} elements. Expected 1. num_elements={num} xpath={xpath} element_id={element_id}",
|
f"Found {num} elements. Expected 1. num_elements={num} xpath={xpath} element_id={element_id}",
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -493,7 +493,7 @@ class ForgeAgent:
|
|||||||
|
|
||||||
# build a linked action chain by the action_idx
|
# build a linked action chain by the action_idx
|
||||||
action_linked_list: list[ActionLinkedNode] = []
|
action_linked_list: list[ActionLinkedNode] = []
|
||||||
element_id_to_action_index: dict[int, int] = dict()
|
element_id_to_action_index: dict[str, int] = dict()
|
||||||
for action_idx, action in enumerate(actions):
|
for action_idx, action in enumerate(actions):
|
||||||
node = ActionLinkedNode(action=action)
|
node = ActionLinkedNode(action=action)
|
||||||
action_linked_list.append(node)
|
action_linked_list.append(node)
|
||||||
@@ -508,7 +508,7 @@ class ForgeAgent:
|
|||||||
|
|
||||||
element_id_to_action_index[action.element_id] = action_idx
|
element_id_to_action_index[action.element_id] = action_idx
|
||||||
|
|
||||||
element_id_to_last_action: dict[int, int] = dict()
|
element_id_to_last_action: dict[str, int] = dict()
|
||||||
for action_idx, action_node in enumerate(action_linked_list):
|
for action_idx, action_node in enumerate(action_linked_list):
|
||||||
action = action_node.action
|
action = action_node.action
|
||||||
if isinstance(action, WebAction):
|
if isinstance(action, WebAction):
|
||||||
|
|||||||
@@ -13,12 +13,12 @@ Reply in JSON format with the following keys:
|
|||||||
"reasoning": str, // The reasoning behind the action. Be specific, referencing any user information and their fields and element ids in your reasoning. Mention why you chose the action type, and why you chose the element id. Keep the reasoning short and to the point.
|
"reasoning": str, // The reasoning behind the action. Be specific, referencing any user information and their fields and element ids in your reasoning. Mention why you chose the action type, and why you chose the element id. Keep the reasoning short and to the point.
|
||||||
"confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
|
"confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
|
||||||
"action_type": str, // It's a string enum: "CLICK", "INPUT_TEXT", "UPLOAD_FILE", "SELECT_OPTION", "WAIT", "SOLVE_CAPTCHA", "COMPLETE", "TERMINATE". "CLICK" is an element you'd like to click. "INPUT_TEXT" is an element you'd like to input text into. "UPLOAD_FILE" is an element you'd like to upload a file into. "SELECT_OPTION" is an element you'd like to select an option from. "WAIT" action should be used if there are no actions to take and there is some indication on screen that waiting could yield more actions. "WAIT" should not be used if there are actions to take. "SOLVE_CAPTCHA" should be used if there's a captcha to solve on the screen. "COMPLETE" is used when the user goal has been achieved AND if there's any data extraction goal, you should be able to get data from the page. Never return a COMPLETE action unless you confirm user goal is achieved through the elements or the screenshots. "TERMINATE" is used to terminate the whole task with a failure when it doesn't seem like the user goal can be achieved. Do not use "TERMINATE" if waiting could lead the user towards the goal. Only return "TERMINATE" if you are on a page where the user goal cannot be achieved. If you are returning "COMPLETE" or "TERMINATE", never return any other action in the same response. The "COMPLETE" and "TERMINATE" actions can only be returned once in the whole task. When they are returned, they have to be the only action in the response.
|
"action_type": str, // It's a string enum: "CLICK", "INPUT_TEXT", "UPLOAD_FILE", "SELECT_OPTION", "WAIT", "SOLVE_CAPTCHA", "COMPLETE", "TERMINATE". "CLICK" is an element you'd like to click. "INPUT_TEXT" is an element you'd like to input text into. "UPLOAD_FILE" is an element you'd like to upload a file into. "SELECT_OPTION" is an element you'd like to select an option from. "WAIT" action should be used if there are no actions to take and there is some indication on screen that waiting could yield more actions. "WAIT" should not be used if there are actions to take. "SOLVE_CAPTCHA" should be used if there's a captcha to solve on the screen. "COMPLETE" is used when the user goal has been achieved AND if there's any data extraction goal, you should be able to get data from the page. Never return a COMPLETE action unless you confirm user goal is achieved through the elements or the screenshots. "TERMINATE" is used to terminate the whole task with a failure when it doesn't seem like the user goal can be achieved. Do not use "TERMINATE" if waiting could lead the user towards the goal. Only return "TERMINATE" if you are on a page where the user goal cannot be achieved. If you are returning "COMPLETE" or "TERMINATE", never return any other action in the same response. The "COMPLETE" and "TERMINATE" actions can only be returned once in the whole task. When they are returned, they have to be the only action in the response.
|
||||||
"id": int, // The id of the element to take action on. The id has to be one from the elements list
|
"id": str, // The id of the element to take action on. The id has to be one from the elements list
|
||||||
"text": str, // Text for INPUT_TEXT action only
|
"text": str, // Text for INPUT_TEXT action only
|
||||||
"file_url": str, // The url of the file to upload if applicable. This field must be present for UPLOAD_FILE but can also be present for CLICK only if the click is to upload the file. It should be null otherwise.
|
"file_url": str, // The url of the file to upload if applicable. This field must be present for UPLOAD_FILE but can also be present for CLICK only if the click is to upload the file. It should be null otherwise.
|
||||||
"option": { // The option to select for SELECT_OPTION action only. null if not SELECT_OPTION action
|
"option": { // The option to select for SELECT_OPTION action only. null if not SELECT_OPTION action
|
||||||
"label": str, // the label of the option if any. MAKE SURE YOU USE THIS LABEL TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION LABEL HERE
|
"label": str, // the label of the option if any. MAKE SURE YOU USE THIS LABEL TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION LABEL HERE
|
||||||
"index": int, // the id corresponding to the optionIndex under the the select element.
|
"index": int, // the index corresponding to the option index under the the select element.
|
||||||
"value": str // the value of the option. MAKE SURE YOU USE THIS VALUE TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION VALUE HERE
|
"value": str // the value of the option. MAKE SURE YOU USE THIS VALUE TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION VALUE HERE
|
||||||
},
|
},
|
||||||
{% if error_code_mapping_str %}
|
{% if error_code_mapping_str %}
|
||||||
|
|||||||
@@ -13,13 +13,13 @@ Reply in JSON format with the following keys:
|
|||||||
"reasoning": str, // The reasoning behind the action. Be specific, referencing any user information and their fields and element ids in your reasoning. Mention why you chose the action type, and why you chose the element id. Keep the reasoning short and to the point.
|
"reasoning": str, // The reasoning behind the action. Be specific, referencing any user information and their fields and element ids in your reasoning. Mention why you chose the action type, and why you chose the element id. Keep the reasoning short and to the point.
|
||||||
"confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
|
"confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
|
||||||
"action_type": str, // It's a string enum: "CLICK", "INPUT_TEXT", "UPLOAD_FILE", "SELECT_OPTION", "WAIT", "SOLVE_CAPTCHA", "COMPLETE", "TERMINATE". "CLICK" is an element you'd like to click. "INPUT_TEXT" is an element you'd like to input text into. "UPLOAD_FILE" is an element you'd like to upload a file into. "SELECT_OPTION" is an element you'd like to select an option from. "WAIT" action should be used if there are no actions to take and there is some indication on screen that waiting could yield more actions. "WAIT" should not be used if there are actions to take. "SOLVE_CAPTCHA" should be used if there's a captcha to solve on the screen. "COMPLETE" is used when the user goal has been achieved AND if there's any data extraction goal, you should be able to get data from the page. Never return a COMPLETE action unless the user goal is achieved. "TERMINATE" is used to terminate the whole task with a failure when it doesn't seem like the user goal can be achieved. Do not use "TERMINATE" if waiting could lead the user towards the goal. Only return "TERMINATE" if you are on a page where the user goal cannot be achieved. All other actions are ignored when "TERMINATE" is returned.
|
"action_type": str, // It's a string enum: "CLICK", "INPUT_TEXT", "UPLOAD_FILE", "SELECT_OPTION", "WAIT", "SOLVE_CAPTCHA", "COMPLETE", "TERMINATE". "CLICK" is an element you'd like to click. "INPUT_TEXT" is an element you'd like to input text into. "UPLOAD_FILE" is an element you'd like to upload a file into. "SELECT_OPTION" is an element you'd like to select an option from. "WAIT" action should be used if there are no actions to take and there is some indication on screen that waiting could yield more actions. "WAIT" should not be used if there are actions to take. "SOLVE_CAPTCHA" should be used if there's a captcha to solve on the screen. "COMPLETE" is used when the user goal has been achieved AND if there's any data extraction goal, you should be able to get data from the page. Never return a COMPLETE action unless the user goal is achieved. "TERMINATE" is used to terminate the whole task with a failure when it doesn't seem like the user goal can be achieved. Do not use "TERMINATE" if waiting could lead the user towards the goal. Only return "TERMINATE" if you are on a page where the user goal cannot be achieved. All other actions are ignored when "TERMINATE" is returned.
|
||||||
"id": int, // The id of the element to take action on. The id has to be one from the elements list
|
"id": str, // The id of the element to take action on. The id has to be one from the elements list
|
||||||
"text": str, // Text for INPUT_TEXT action only
|
"text": str, // Text for INPUT_TEXT action only
|
||||||
"file_url": str, // The url of the file to upload if applicable. This field must be present for UPLOAD_FILE but can also be present for CLICK only if the click is to upload the file. It should be null otherwise.
|
"file_url": str, // The url of the file to upload if applicable. This field must be present for UPLOAD_FILE but can also be present for CLICK only if the click is to upload the file. It should be null otherwise.
|
||||||
"download": bool, // Can only be true for CLICK actions. If true, the browser will trigger a download by clicking the element. If false, the browser will click the element without triggering a download.
|
"download": bool, // Can only be true for CLICK actions. If true, the browser will trigger a download by clicking the element. If false, the browser will click the element without triggering a download.
|
||||||
"option": { // The option to select for SELECT_OPTION action only. null if not SELECT_OPTION action
|
"option": { // The option to select for SELECT_OPTION action only. null if not SELECT_OPTION action
|
||||||
"label": str, // the label of the option if any. MAKE SURE YOU USE THIS LABEL TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION LABEL HERE
|
"label": str, // the label of the option if any. MAKE SURE YOU USE THIS LABEL TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION LABEL HERE
|
||||||
"index": int, // the id corresponding to the optionIndex under the the select element.
|
"index": int, // the index corresponding to the option index under the select element.
|
||||||
"value": str // the value of the option. MAKE SURE YOU USE THIS VALUE TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION VALUE HERE
|
"value": str // the value of the option. MAKE SURE YOU USE THIS VALUE TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION VALUE HERE
|
||||||
},
|
},
|
||||||
{% if error_code_mapping_str %}
|
{% if error_code_mapping_str %}
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ class Action(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
class WebAction(Action, abc.ABC):
|
class WebAction(Action, abc.ABC):
|
||||||
element_id: int
|
element_id: str
|
||||||
|
|
||||||
|
|
||||||
class UserDefinedError(BaseModel):
|
class UserDefinedError(BaseModel):
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
import uuid
|
import uuid
|
||||||
from typing import Any, Awaitable, Callable, List
|
from typing import Any, Awaitable, Callable, List
|
||||||
|
|
||||||
@@ -516,40 +515,20 @@ async def handle_select_option_action(
|
|||||||
return [ActionFailure(e)]
|
return [ActionFailure(e)]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
option_xpath = scraped_page.id_to_xpath_dict[action.option.index]
|
# This means the supplied index was for the select element, not a reference to the xpath dict
|
||||||
match = re.search(r"option\[(\d+)]$", option_xpath)
|
await page.click(
|
||||||
if match:
|
f"xpath={xpath}",
|
||||||
# This means we were trying to select an option xpath, click the option
|
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
||||||
option_index = int(match.group(1))
|
)
|
||||||
await page.click(
|
await page.select_option(
|
||||||
f"xpath={xpath}",
|
xpath,
|
||||||
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
index=action.option.index,
|
||||||
)
|
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
||||||
await page.select_option(
|
)
|
||||||
xpath,
|
await page.click(
|
||||||
index=option_index,
|
f"xpath={xpath}",
|
||||||
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
||||||
)
|
)
|
||||||
await page.click(
|
|
||||||
f"xpath={xpath}",
|
|
||||||
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
|
||||||
)
|
|
||||||
return [ActionSuccess()]
|
|
||||||
else:
|
|
||||||
# This means the supplied index was for the select element, not a reference to the xpath dict
|
|
||||||
await page.click(
|
|
||||||
f"xpath={xpath}",
|
|
||||||
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
|
||||||
)
|
|
||||||
await page.select_option(
|
|
||||||
xpath,
|
|
||||||
index=action.option.index,
|
|
||||||
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
|
||||||
)
|
|
||||||
await page.click(
|
|
||||||
f"xpath={xpath}",
|
|
||||||
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
|
||||||
)
|
|
||||||
return [ActionSuccess()]
|
return [ActionSuccess()]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
LOG.warning("Failed to click on the option by index", action=action, exc_info=True)
|
LOG.warning("Failed to click on the option by index", action=action, exc_info=True)
|
||||||
@@ -782,12 +761,11 @@ async def chain_click(
|
|||||||
page.remove_listener("filechooser", fc_func)
|
page.remove_listener("filechooser", fc_func)
|
||||||
|
|
||||||
|
|
||||||
def get_anchor_to_click(scraped_page: ScrapedPage, element_id: int) -> str | None:
|
def get_anchor_to_click(scraped_page: ScrapedPage, element_id: str) -> str | None:
|
||||||
"""
|
"""
|
||||||
Get the anchor tag under the label to click
|
Get the anchor tag under the label to click
|
||||||
"""
|
"""
|
||||||
LOG.info("Getting anchor tag to click", element_id=element_id)
|
LOG.info("Getting anchor tag to click", element_id=element_id)
|
||||||
element_id = int(element_id)
|
|
||||||
for ele in scraped_page.elements:
|
for ele in scraped_page.elements:
|
||||||
if "id" in ele and ele["id"] == element_id:
|
if "id" in ele and ele["id"] == element_id:
|
||||||
for child in ele["children"]:
|
for child in ele["children"]:
|
||||||
@@ -796,7 +774,7 @@ def get_anchor_to_click(scraped_page: ScrapedPage, element_id: int) -> str | Non
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def get_select_id_in_label_children(scraped_page: ScrapedPage, element_id: int) -> int | None:
|
def get_select_id_in_label_children(scraped_page: ScrapedPage, element_id: str) -> str | None:
|
||||||
"""
|
"""
|
||||||
search <select> in the children of <label>
|
search <select> in the children of <label>
|
||||||
"""
|
"""
|
||||||
@@ -812,7 +790,7 @@ def get_select_id_in_label_children(scraped_page: ScrapedPage, element_id: int)
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def get_checkbox_id_in_label_children(scraped_page: ScrapedPage, element_id: int) -> int | None:
|
def get_checkbox_id_in_label_children(scraped_page: ScrapedPage, element_id: str) -> str | None:
|
||||||
"""
|
"""
|
||||||
search checkbox/radio in the children of <label>
|
search checkbox/radio in the children of <label>
|
||||||
"""
|
"""
|
||||||
@@ -933,7 +911,7 @@ async def click_listbox_option(
|
|||||||
scraped_page: ScrapedPage,
|
scraped_page: ScrapedPage,
|
||||||
page: Page,
|
page: Page,
|
||||||
action: actions.SelectOptionAction,
|
action: actions.SelectOptionAction,
|
||||||
listbox_element_id: int,
|
listbox_element_id: str,
|
||||||
) -> bool:
|
) -> bool:
|
||||||
listbox_element = scraped_page.id_to_element_dict[listbox_element_id]
|
listbox_element = scraped_page.id_to_element_dict[listbox_element_id]
|
||||||
# this is a listbox element, get all the children
|
# this is a listbox element, get all the children
|
||||||
|
|||||||
@@ -540,6 +540,7 @@ function getElementContent(element, skipped_element = null) {
|
|||||||
function getSelectOptions(element) {
|
function getSelectOptions(element) {
|
||||||
const options = Array.from(element.options);
|
const options = Array.from(element.options);
|
||||||
const selectOptions = [];
|
const selectOptions = [];
|
||||||
|
|
||||||
for (const option of options) {
|
for (const option of options) {
|
||||||
selectOptions.push({
|
selectOptions.push({
|
||||||
optionIndex: option.index,
|
optionIndex: option.index,
|
||||||
@@ -554,7 +555,8 @@ function getListboxOptions(element) {
|
|||||||
var optionElements = element.querySelectorAll('[role="option"]');
|
var optionElements = element.querySelectorAll('[role="option"]');
|
||||||
let selectOptions = [];
|
let selectOptions = [];
|
||||||
for (var i = 0; i < optionElements.length; i++) {
|
for (var i = 0; i < optionElements.length; i++) {
|
||||||
var ele = optionElements[i];
|
let ele = optionElements[i];
|
||||||
|
|
||||||
selectOptions.push({
|
selectOptions.push({
|
||||||
optionIndex: i,
|
optionIndex: i,
|
||||||
text: removeMultipleSpaces(ele.textContent),
|
text: removeMultipleSpaces(ele.textContent),
|
||||||
@@ -563,6 +565,17 @@ function getListboxOptions(element) {
|
|||||||
return selectOptions;
|
return selectOptions;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function uniqueId() {
|
||||||
|
const characters =
|
||||||
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
|
||||||
|
let result = "";
|
||||||
|
for (let i = 0; i < 4; i++) {
|
||||||
|
const randomIndex = Math.floor(Math.random() * characters.length);
|
||||||
|
result += characters[randomIndex];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
function buildTreeFromBody() {
|
function buildTreeFromBody() {
|
||||||
var elements = [];
|
var elements = [];
|
||||||
var resultArray = [];
|
var resultArray = [];
|
||||||
@@ -620,7 +633,7 @@ function buildTreeFromBody() {
|
|||||||
};
|
};
|
||||||
|
|
||||||
function buildElementObject(element, interactable) {
|
function buildElementObject(element, interactable) {
|
||||||
var element_id = elements.length;
|
var element_id = element.getAttribute("unique_id") ?? uniqueId();
|
||||||
var elementTagNameLower = element.tagName.toLowerCase();
|
var elementTagNameLower = element.tagName.toLowerCase();
|
||||||
element.setAttribute("unique_id", element_id);
|
element.setAttribute("unique_id", element_id);
|
||||||
// if element is an "a" tag and has a target="_blank" attribute, remove the target attribute
|
// if element is an "a" tag and has a target="_blank" attribute, remove the target attribute
|
||||||
@@ -733,7 +746,10 @@ function buildTreeFromBody() {
|
|||||||
// If the element is interactable and has an interactable parent,
|
// If the element is interactable and has an interactable parent,
|
||||||
// then add it to the children of the parent
|
// then add it to the children of the parent
|
||||||
else {
|
else {
|
||||||
elements[parentId].children.push(elementObj);
|
// TODO: use dict/object so that we access these in O(1) instead
|
||||||
|
elements
|
||||||
|
.find((element) => element.id === parentId)
|
||||||
|
.children.push(elementObj);
|
||||||
}
|
}
|
||||||
// options already added to the select.options, no need to add options anymore
|
// options already added to the select.options, no need to add options anymore
|
||||||
if (elementObj.options && elementObj.options.length > 0) {
|
if (elementObj.options && elementObj.options.length > 0) {
|
||||||
@@ -772,13 +788,16 @@ function buildTreeFromBody() {
|
|||||||
if (parentId === null) {
|
if (parentId === null) {
|
||||||
resultArray.push(elementObj);
|
resultArray.push(elementObj);
|
||||||
} else {
|
} else {
|
||||||
elements[parentId].children.push(elementObj);
|
// TODO: use dict/object so that we access these in O(1) instead
|
||||||
|
elements
|
||||||
|
.find((element) => element.id === parentId)
|
||||||
|
.children.push(elementObj);
|
||||||
}
|
}
|
||||||
parentId = elementObj.id;
|
parentId = elementObj.id;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
getChildElements(element).forEach((child) => {
|
getChildElements(element).forEach((child) => {
|
||||||
let children = processElement(child, parentId);
|
processElement(child, parentId);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -975,8 +994,6 @@ function buildTreeFromBody() {
|
|||||||
// TODO: Handle iframes
|
// TODO: Handle iframes
|
||||||
// setup before parsing the dom
|
// setup before parsing the dom
|
||||||
checkSelect2();
|
checkSelect2();
|
||||||
// Clear all the unique_id attributes so that there are no conflicts
|
|
||||||
removeAllUniqueIdAttributes();
|
|
||||||
processElement(document.body, null);
|
processElement(document.body, null);
|
||||||
|
|
||||||
for (var element of elements) {
|
for (var element of elements) {
|
||||||
@@ -1029,14 +1046,6 @@ function drawBoundingBoxes(elements) {
|
|||||||
addHintMarkersToPage(hintMarkers);
|
addHintMarkersToPage(hintMarkers);
|
||||||
}
|
}
|
||||||
|
|
||||||
function removeAllUniqueIdAttributes() {
|
|
||||||
var elementsWithUniqueId = document.querySelectorAll("[unique_id]");
|
|
||||||
|
|
||||||
elementsWithUniqueId.forEach(function (element) {
|
|
||||||
element.removeAttribute("unique_id");
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
function captchaSolvedCallback() {
|
function captchaSolvedCallback() {
|
||||||
console.log("captcha solved");
|
console.log("captcha solved");
|
||||||
if (!window["captchaSolvedCounter"]) {
|
if (!window["captchaSolvedCounter"]) {
|
||||||
|
|||||||
@@ -121,8 +121,8 @@ class ScrapedPage(BaseModel):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
elements: list[dict]
|
elements: list[dict]
|
||||||
id_to_element_dict: dict[int, dict] = {}
|
id_to_element_dict: dict[str, dict] = {}
|
||||||
id_to_xpath_dict: dict[int, str]
|
id_to_xpath_dict: dict[str, str]
|
||||||
element_tree: list[dict]
|
element_tree: list[dict]
|
||||||
element_tree_trimmed: list[dict]
|
element_tree_trimmed: list[dict]
|
||||||
screenshots: list[bytes]
|
screenshots: list[bytes]
|
||||||
|
|||||||
Reference in New Issue
Block a user