Decorate bounding boxes with element_ids to improve Skyvern accuracy (+ a few more changes) (#536)
This commit is contained in:
@@ -11,6 +11,7 @@ from playwright.async_api import Locator, Page, TimeoutError
|
||||
from skyvern.constants import INPUT_TEXT_TIMEOUT, REPO_ROOT_DIR
|
||||
from skyvern.exceptions import (
|
||||
EmptySelect,
|
||||
FailToClick,
|
||||
FailToSelectByIndex,
|
||||
FailToSelectByLabel,
|
||||
FailToSelectByValue,
|
||||
@@ -789,10 +790,11 @@ async def chain_click(
|
||||
javascript_triggered=javascript_triggered,
|
||||
)
|
||||
]
|
||||
except Exception as e:
|
||||
|
||||
except Exception:
|
||||
action_results: list[ActionResult] = [
|
||||
ActionFailure(
|
||||
e,
|
||||
FailToClick(action.element_id),
|
||||
javascript_triggered=javascript_triggered,
|
||||
)
|
||||
]
|
||||
@@ -826,7 +828,7 @@ async def chain_click(
|
||||
interacted_with_parent=True,
|
||||
)
|
||||
)
|
||||
except Exception as pe:
|
||||
except Exception:
|
||||
LOG.warning(
|
||||
"Failed to click parent element",
|
||||
action=action,
|
||||
@@ -835,7 +837,7 @@ async def chain_click(
|
||||
)
|
||||
action_results.append(
|
||||
ActionFailure(
|
||||
pe,
|
||||
FailToClick(action.element_id),
|
||||
javascript_triggered=javascript_triggered,
|
||||
interacted_with_parent=True,
|
||||
)
|
||||
@@ -1073,9 +1075,12 @@ async def click_sibling_of_input(
|
||||
javascript_triggered=javascript_triggered,
|
||||
interacted_with_sibling=True,
|
||||
)
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
LOG.warning("Failed to click sibling label of input element", exc_info=True)
|
||||
return ActionFailure(exception=e, javascript_triggered=javascript_triggered)
|
||||
return ActionFailure(
|
||||
exception=Exception("Failed while trying to click sibling of input element"),
|
||||
javascript_triggered=javascript_triggered,
|
||||
)
|
||||
|
||||
|
||||
async def extract_information_for_navigation_goal(
|
||||
|
||||
@@ -213,7 +213,10 @@ function isElementStyleVisibilityVisible(element, style) {
|
||||
function isElementVisible(element) {
|
||||
// TODO: This is a hack to not check visibility for option elements
|
||||
// because they are not visible by default. We check their parent instead for visibility.
|
||||
if (element.tagName.toLowerCase() === "option")
|
||||
if (
|
||||
element.tagName.toLowerCase() === "option" ||
|
||||
(element.tagName.toLowerCase() === "input" && element.type === "radio")
|
||||
)
|
||||
return element.parentElement && isElementVisible(element.parentElement);
|
||||
|
||||
if (element.className.toString().includes("select2-offscreen")) {
|
||||
@@ -1088,7 +1091,11 @@ async function buildTreeFromBody(frame = "main.frame", open_select = false) {
|
||||
const labelElement = document.querySelector(
|
||||
element.tagName + '[unique_id="' + element.id + '"]',
|
||||
);
|
||||
if (labelElement && labelElement.childElementCount === 0) {
|
||||
if (
|
||||
labelElement &&
|
||||
labelElement.childElementCount === 0 &&
|
||||
!labelElement.getAttribute("for")
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -1234,15 +1241,30 @@ function createHintMarkersForGroups(groups) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const hintMarkers = groups.map((group) => createHintMarkerForGroup(group));
|
||||
|
||||
const hintMarkers = groups
|
||||
.filter((group) => group.elements.some((element) => element.interactable))
|
||||
.map((group) => createHintMarkerForGroup(group));
|
||||
// fill in marker text
|
||||
const hintStrings = generateHintStrings(hintMarkers.length);
|
||||
// const hintStrings = generateHintStrings(hintMarkers.length);
|
||||
for (let i = 0; i < hintMarkers.length; i++) {
|
||||
const hintMarker = hintMarkers[i];
|
||||
hintMarker.hintString = hintStrings[i];
|
||||
|
||||
let interactableElementFound = false;
|
||||
|
||||
for (let i = 0; i < hintMarker.group.elements.length; i++) {
|
||||
if (hintMarker.group.elements[i].interactable) {
|
||||
hintMarker.hintString = hintMarker.group.elements[i].id;
|
||||
interactableElementFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!interactableElementFound) {
|
||||
hintMarker.hintString = "";
|
||||
}
|
||||
|
||||
try {
|
||||
hintMarker.element.innerHTML = hintMarker.hintString.toUpperCase();
|
||||
hintMarker.element.innerHTML = hintMarker.hintString;
|
||||
} catch (e) {
|
||||
// Ensure trustedTypes is available
|
||||
if (typeof trustedTypes !== "undefined") {
|
||||
@@ -1262,11 +1284,16 @@ function createHintMarkersForGroups(groups) {
|
||||
}
|
||||
|
||||
function createHintMarkerForGroup(group) {
|
||||
// Calculate the position of the element relative to the document
|
||||
var scrollTop = window.pageYOffset || document.documentElement.scrollTop;
|
||||
var scrollLeft = window.pageXOffset || document.documentElement.scrollLeft;
|
||||
|
||||
const marker = {};
|
||||
// yellow annotation box with string
|
||||
const el = document.createElement("div");
|
||||
el.style.left = group.rect.left + "px";
|
||||
el.style.top = group.rect.top + "px";
|
||||
el.style.position = "absolute";
|
||||
el.style.left = group.rect.left + scrollLeft + "px";
|
||||
el.style.top = group.rect.top + scrollTop + "px";
|
||||
// Each group is assigned a different incremental z-index, we use the same z-index for the
|
||||
// bounding box and the hint marker
|
||||
el.style.zIndex = this.currentZIndex;
|
||||
@@ -1274,10 +1301,6 @@ function createHintMarkerForGroup(group) {
|
||||
// The bounding box around the group of hints.
|
||||
const boundingBox = document.createElement("div");
|
||||
|
||||
// Calculate the position of the element relative to the document
|
||||
var scrollTop = window.pageYOffset || document.documentElement.scrollTop;
|
||||
var scrollLeft = window.pageXOffset || document.documentElement.scrollLeft;
|
||||
|
||||
// Set styles for the bounding box
|
||||
boundingBox.style.position = "absolute";
|
||||
boundingBox.style.display = "display";
|
||||
@@ -1302,7 +1325,7 @@ function addHintMarkersToPage(hintMarkers) {
|
||||
const parent = document.createElement("div");
|
||||
parent.id = "boundingBoxContainer";
|
||||
for (const hintMarker of hintMarkers) {
|
||||
// parent.appendChild(hintMarker.element);
|
||||
parent.appendChild(hintMarker.element);
|
||||
parent.appendChild(hintMarker.boundingBox);
|
||||
}
|
||||
document.documentElement.appendChild(parent);
|
||||
|
||||
@@ -476,6 +476,7 @@ def trim_element_tree(elements: list[dict]) -> list[dict]:
|
||||
|
||||
def _trimmed_attributes(tag_name: str, attributes: dict) -> dict:
|
||||
new_attributes: dict = {}
|
||||
|
||||
for key in attributes:
|
||||
if key == "id" and tag_name in ["input", "textarea", "select"]:
|
||||
# We don't want to remove the id attribute any of these elements in case there's a label for it
|
||||
@@ -484,6 +485,7 @@ def _trimmed_attributes(tag_name: str, attributes: dict) -> dict:
|
||||
new_attributes[key] = attributes[key]
|
||||
if key in RESERVED_ATTRIBUTES and attributes[key]:
|
||||
new_attributes[key] = attributes[key]
|
||||
|
||||
return new_attributes
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user