Decorate bounding boxes with element_ids to improve Skyvern accuracy (+ a few more changes) (#536)
This commit is contained in:
@@ -213,7 +213,10 @@ function isElementStyleVisibilityVisible(element, style) {
|
||||
function isElementVisible(element) {
|
||||
// TODO: This is a hack to not check visibility for option elements
|
||||
// because they are not visible by default. We check their parent instead for visibility.
|
||||
if (element.tagName.toLowerCase() === "option")
|
||||
if (
|
||||
element.tagName.toLowerCase() === "option" ||
|
||||
(element.tagName.toLowerCase() === "input" && element.type === "radio")
|
||||
)
|
||||
return element.parentElement && isElementVisible(element.parentElement);
|
||||
|
||||
if (element.className.toString().includes("select2-offscreen")) {
|
||||
@@ -1088,7 +1091,11 @@ async function buildTreeFromBody(frame = "main.frame", open_select = false) {
|
||||
const labelElement = document.querySelector(
|
||||
element.tagName + '[unique_id="' + element.id + '"]',
|
||||
);
|
||||
if (labelElement && labelElement.childElementCount === 0) {
|
||||
if (
|
||||
labelElement &&
|
||||
labelElement.childElementCount === 0 &&
|
||||
!labelElement.getAttribute("for")
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -1234,15 +1241,30 @@ function createHintMarkersForGroups(groups) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const hintMarkers = groups.map((group) => createHintMarkerForGroup(group));
|
||||
|
||||
const hintMarkers = groups
|
||||
.filter((group) => group.elements.some((element) => element.interactable))
|
||||
.map((group) => createHintMarkerForGroup(group));
|
||||
// fill in marker text
|
||||
const hintStrings = generateHintStrings(hintMarkers.length);
|
||||
// const hintStrings = generateHintStrings(hintMarkers.length);
|
||||
for (let i = 0; i < hintMarkers.length; i++) {
|
||||
const hintMarker = hintMarkers[i];
|
||||
hintMarker.hintString = hintStrings[i];
|
||||
|
||||
let interactableElementFound = false;
|
||||
|
||||
for (let i = 0; i < hintMarker.group.elements.length; i++) {
|
||||
if (hintMarker.group.elements[i].interactable) {
|
||||
hintMarker.hintString = hintMarker.group.elements[i].id;
|
||||
interactableElementFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!interactableElementFound) {
|
||||
hintMarker.hintString = "";
|
||||
}
|
||||
|
||||
try {
|
||||
hintMarker.element.innerHTML = hintMarker.hintString.toUpperCase();
|
||||
hintMarker.element.innerHTML = hintMarker.hintString;
|
||||
} catch (e) {
|
||||
// Ensure trustedTypes is available
|
||||
if (typeof trustedTypes !== "undefined") {
|
||||
@@ -1262,11 +1284,16 @@ function createHintMarkersForGroups(groups) {
|
||||
}
|
||||
|
||||
function createHintMarkerForGroup(group) {
|
||||
// Calculate the position of the element relative to the document
|
||||
var scrollTop = window.pageYOffset || document.documentElement.scrollTop;
|
||||
var scrollLeft = window.pageXOffset || document.documentElement.scrollLeft;
|
||||
|
||||
const marker = {};
|
||||
// yellow annotation box with string
|
||||
const el = document.createElement("div");
|
||||
el.style.left = group.rect.left + "px";
|
||||
el.style.top = group.rect.top + "px";
|
||||
el.style.position = "absolute";
|
||||
el.style.left = group.rect.left + scrollLeft + "px";
|
||||
el.style.top = group.rect.top + scrollTop + "px";
|
||||
// Each group is assigned a different incremental z-index, we use the same z-index for the
|
||||
// bounding box and the hint marker
|
||||
el.style.zIndex = this.currentZIndex;
|
||||
@@ -1274,10 +1301,6 @@ function createHintMarkerForGroup(group) {
|
||||
// The bounding box around the group of hints.
|
||||
const boundingBox = document.createElement("div");
|
||||
|
||||
// Calculate the position of the element relative to the document
|
||||
var scrollTop = window.pageYOffset || document.documentElement.scrollTop;
|
||||
var scrollLeft = window.pageXOffset || document.documentElement.scrollLeft;
|
||||
|
||||
// Set styles for the bounding box
|
||||
boundingBox.style.position = "absolute";
|
||||
boundingBox.style.display = "display";
|
||||
@@ -1302,7 +1325,7 @@ function addHintMarkersToPage(hintMarkers) {
|
||||
const parent = document.createElement("div");
|
||||
parent.id = "boundingBoxContainer";
|
||||
for (const hintMarker of hintMarkers) {
|
||||
// parent.appendChild(hintMarker.element);
|
||||
parent.appendChild(hintMarker.element);
|
||||
parent.appendChild(hintMarker.boundingBox);
|
||||
}
|
||||
document.documentElement.appendChild(parent);
|
||||
|
||||
@@ -476,6 +476,7 @@ def trim_element_tree(elements: list[dict]) -> list[dict]:
|
||||
|
||||
def _trimmed_attributes(tag_name: str, attributes: dict) -> dict:
|
||||
new_attributes: dict = {}
|
||||
|
||||
for key in attributes:
|
||||
if key == "id" and tag_name in ["input", "textarea", "select"]:
|
||||
# We don't want to remove the id attribute any of these elements in case there's a label for it
|
||||
@@ -484,6 +485,7 @@ def _trimmed_attributes(tag_name: str, attributes: dict) -> dict:
|
||||
new_attributes[key] = attributes[key]
|
||||
if key in RESERVED_ATTRIBUTES and attributes[key]:
|
||||
new_attributes[key] = attributes[key]
|
||||
|
||||
return new_attributes
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user