general autocomplete solution (#713)

This commit is contained in:
LawyZheng
2024-08-21 10:54:32 +08:00
committed by GitHub
parent ef95dc6eca
commit 8baa8de032
9 changed files with 610 additions and 128 deletions

View File

@@ -1,4 +1,5 @@
import asyncio
import copy
import json
import os
import urllib.parse
@@ -9,13 +10,16 @@ from typing import Any, Awaitable, Callable, List
import structlog
from deprecation import deprecated
from playwright.async_api import FileChooser, Locator, Page, TimeoutError
from pydantic import BaseModel
from skyvern.constants import REPO_ROOT_DIR, VERIFICATION_CODE_POLLING_TIMEOUT_MINS
from skyvern.constants import REPO_ROOT_DIR, SKYVERN_ID_ATTR, VERIFICATION_CODE_POLLING_TIMEOUT_MINS
from skyvern.exceptions import (
EmptySelect,
ErrEmptyTweakValue,
ErrFoundSelectableElement,
FailedToFetchSecret,
FailToClick,
FailToFindAutocompleteOption,
FailToSelectByIndex,
FailToSelectByLabel,
FailToSelectByValue,
@@ -24,9 +28,12 @@ from skyvern.exceptions import (
MissingElement,
MissingFileUrl,
MultipleElementsFound,
NoAutoCompleteOptionMeetCondition,
NoElementMatchedForTargetOption,
NoIncrementalElementFoundForAutoCompletion,
NoIncrementalElementFoundForCustomSelection,
NoLabelOrValueForCustomSelection,
NoSuitableAutoCompleteOption,
OptionIndexOutOfBound,
WrongElementToUploadFile,
)
@@ -59,7 +66,13 @@ from skyvern.webeye.actions.actions import (
)
from skyvern.webeye.actions.responses import ActionFailure, ActionResult, ActionSuccess
from skyvern.webeye.browser_factory import BrowserState, get_download_dir
from skyvern.webeye.scraper.scraper import ElementTreeFormat, IncrementalScrapePage, ScrapedPage
from skyvern.webeye.scraper.scraper import (
ElementTreeFormat,
IncrementalScrapePage,
ScrapedPage,
json_to_html,
trim_element_tree,
)
from skyvern.webeye.utils.dom import DomUtil, InteractiveElement, SkyvernElement
from skyvern.webeye.utils.page import SkyvernFrame
@@ -67,6 +80,12 @@ LOG = structlog.get_logger()
COMMON_INPUT_TAGS = {"input", "textarea", "select"}
class AutoCompletionResult(BaseModel):
auto_completion_attempt: bool = False
incremental_elements: list[dict] = []
action_result: ActionResult = ActionSuccess()
class ActionHandler:
_handled_action_types: dict[
ActionType,
@@ -290,6 +309,7 @@ async def handle_input_text_action(
dom = DomUtil(scraped_page, page)
skyvern_element = await dom.get_skyvern_element_by_id(action.element_id)
skyvern_frame = await SkyvernFrame.create_instance(skyvern_element.get_frame())
incremental_scraped = IncrementalScrapePage(skyvern_frame=skyvern_frame)
timeout = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
current_text = await get_input_value(skyvern_element.get_tag_name(), skyvern_element.get_locator())
@@ -319,7 +339,6 @@ async def handle_input_text_action(
return await handle_select_option_action(select_action, page, scraped_page, task, step)
# press arrowdown to watch if there's any options popping up
incremental_scraped = IncrementalScrapePage(skyvern_frame=skyvern_frame)
await incremental_scraped.start_listen_dom_increment()
await skyvern_element.get_locator().focus(timeout=timeout)
await skyvern_element.get_locator().press("ArrowDown", timeout=timeout)
@@ -376,12 +395,26 @@ async def handle_input_text_action(
LOG.warning("Failed to clear the input field", action=action, exc_info=True)
return [ActionFailure(InvalidElementForTextInput(element_id=action.element_id, tag_name=tag_name))]
# TODO: not sure if this case will trigger auto-completion
if tag_name not in COMMON_INPUT_TAGS:
await skyvern_element.input_fill(text)
return [ActionSuccess()]
# If the input is a text input, we type the text character by character
# 3 times the time it takes to type the text so it has time to finish typing
if len(text) == 0:
return [ActionSuccess()]
if await skyvern_element.is_auto_completion_input():
result = await input_or_auto_complete_input(
action=action,
page=page,
dom=dom,
text=text,
skyvern_element=skyvern_element,
step=step,
task=task,
)
return [result]
await skyvern_element.input_sequentially(text=text)
return [ActionSuccess()]
@@ -848,6 +881,282 @@ async def chain_click(
return [ActionFailure(WrongElementToUploadFile(action.element_id))]
def remove_exist_elements(dom: DomUtil, element_tree: list[dict]) -> list[dict]:
new_element_tree = []
for element in element_tree:
children_elements = element.get("children", [])
if len(children_elements) > 0:
children_elements = remove_exist_elements(dom=dom, element_tree=children_elements)
if dom.check_id_in_dom(element.get("id", "")):
new_element_tree.extend(children_elements)
else:
element["children"] = children_elements
new_element_tree.append(element)
return new_element_tree
async def choose_auto_completion_dropdown(
action: actions.InputTextAction,
page: Page,
dom: DomUtil,
text: str,
skyvern_element: SkyvernElement,
step: Step,
task: Task,
preserved_elements: list[dict] | None = None,
relevance_threshold: float = 0.8,
) -> AutoCompletionResult:
preserved_elements = preserved_elements or []
clear_input = True
result = AutoCompletionResult()
current_frame = skyvern_element.get_frame()
skyvern_frame = await SkyvernFrame.create_instance(current_frame)
incremental_scraped = IncrementalScrapePage(skyvern_frame=skyvern_frame)
await incremental_scraped.start_listen_dom_increment()
try:
await skyvern_element.press_fill(text)
# wait for new elemnts to load
await asyncio.sleep(5)
incremental_element = await incremental_scraped.get_incremental_element_tree(
app.AGENT_FUNCTION.cleanup_element_tree_factory(task=task, step=step)
)
incremental_element = remove_exist_elements(dom=dom, element_tree=incremental_element)
# check if elements in preserve list are still on the page
confirmed_preserved_list: list[dict] = []
for element in preserved_elements:
element_id = element.get("id")
if not element_id:
continue
locator = current_frame.locator(f'[{SKYVERN_ID_ATTR}="{element_id}"]')
cnt = await locator.count()
if cnt == 0:
continue
element_handler = await locator.element_handle(
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
)
if not element_handler:
continue
current_element = await skyvern_frame.parse_element_from_html(
skyvern_element.get_frame_id(), element_handler, skyvern_element.is_interactable()
)
confirmed_preserved_list.append(current_element)
if len(confirmed_preserved_list) > 0:
confirmed_preserved_list = await app.AGENT_FUNCTION.cleanup_element_tree_factory(task=task, step=step)(
skyvern_frame.get_frame().url, copy.deepcopy(confirmed_preserved_list)
)
confirmed_preserved_list = trim_element_tree(copy.deepcopy(confirmed_preserved_list))
incremental_element.extend(confirmed_preserved_list)
result.incremental_elements = copy.deepcopy(incremental_element)
if len(incremental_element) == 0:
raise NoIncrementalElementFoundForAutoCompletion(element_id=skyvern_element.get_id(), text=text)
html = incremental_scraped.build_html_tree(incremental_element)
auto_completion_confirm_prompt = prompt_engine.load_prompt(
"auto-completion-choose-option",
context_reasoning=action.reasoning,
filled_value=text,
elements=html,
)
LOG.info(
"Confirm if it's an auto completion dropdown",
step_id=step.step_id,
task_id=task.task_id,
)
json_response = await app.SECONDARY_LLM_API_HANDLER(prompt=auto_completion_confirm_prompt, step=step)
element_id = json_response.get("id", "")
relevance_float = json_response.get("relevance_float", 0)
if not element_id:
reasoning = json_response.get("reasoning")
raise NoSuitableAutoCompleteOption(reasoning=reasoning, target_value=text)
if relevance_float < relevance_threshold:
LOG.info(
f"The closest option doesn't meet the condition(relevance_float>={relevance_threshold})",
element_id=element_id,
relevance_float=relevance_float,
)
reasoning = json_response.get("reasoning")
raise NoAutoCompleteOptionMeetCondition(
reasoning=reasoning,
required_relevance=relevance_threshold,
target_value=text,
closest_relevance=relevance_float,
)
LOG.info(
"Find a suitable option to choose",
element_id=element_id,
step_id=step.step_id,
task_id=task.task_id,
)
locator = current_frame.locator(f'[{SKYVERN_ID_ATTR}="{element_id}"]')
if await locator.count() == 0:
raise MissingElement(element_id=element_id)
await locator.click(timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS)
clear_input = False
return result
except Exception as e:
LOG.info(
"Failed to choose the auto completion dropdown",
exc_info=True,
input_value=text,
task_id=task.task_id,
step_id=step.step_id,
)
result.action_result = ActionFailure(exception=e)
return result
finally:
await incremental_scraped.stop_listen_dom_increment()
if clear_input:
await skyvern_element.input_clear()
async def input_or_auto_complete_input(
action: actions.InputTextAction,
page: Page,
dom: DomUtil,
text: str,
skyvern_element: SkyvernElement,
step: Step,
task: Task,
) -> ActionResult:
LOG.info(
"Trigger auto completion",
task_id=task.task_id,
step_id=step.step_id,
element_id=skyvern_element.get_id(),
)
# 1. press the orignal text to see if there's a match
# 2. call LLM to find 5 potential values based on the orginal text
# 3. try each potential values from #2
# 4. call LLM to tweak the orignal text according to the information from #3, then start #1 again
# FIXME: try the whole loop for twice now, to prevent too many LLM calls
MAX_AUTO_COMPLETE_ATTEMP = 2
current_attemp = 0
context_reasoning = action.reasoning
current_value = text
result = AutoCompletionResult()
while current_attemp < MAX_AUTO_COMPLETE_ATTEMP:
current_attemp += 1
whole_new_elements: list[dict] = []
tried_values: list[str] = []
LOG.info(
"Try the potential value for auto completion",
step_id=step.step_id,
task_id=task.task_id,
input_value=current_value,
)
result = await choose_auto_completion_dropdown(
action=action,
page=page,
dom=dom,
text=current_value,
preserved_elements=result.incremental_elements,
skyvern_element=skyvern_element,
step=step,
task=task,
)
if isinstance(result.action_result, ActionSuccess):
return ActionSuccess()
tried_values.append(current_value)
whole_new_elements.extend(result.incremental_elements)
prompt = prompt_engine.load_prompt(
"auto-completion-potential-answers",
context_reasoning=context_reasoning,
current_value=current_value,
)
LOG.info(
"Ask LLM to give 10 potential values based on the current value",
current_value=current_value,
step_id=step.step_id,
task_id=task.task_id,
)
json_respone = await app.SECONDARY_LLM_API_HANDLER(prompt=prompt, step=step)
values: list[dict] = json_respone.get("potential_values", [])
for each_value in values:
value: str = each_value.get("value", "")
if not value:
LOG.info(
"Empty potential value, skip this attempt",
step_id=step.step_id,
task_id=task.task_id,
value=each_value,
)
continue
LOG.info(
"Try the potential value for auto completion",
step_id=step.step_id,
task_id=task.task_id,
input_value=value,
)
result = await choose_auto_completion_dropdown(
action=action,
page=page,
dom=dom,
text=value,
preserved_elements=result.incremental_elements,
skyvern_element=skyvern_element,
step=step,
task=task,
)
if isinstance(result.action_result, ActionSuccess):
return ActionSuccess()
tried_values.append(value)
whole_new_elements.extend(result.incremental_elements)
if current_attemp < MAX_AUTO_COMPLETE_ATTEMP:
LOG.info(
"Ask LLM to tweak the current value based on tried input values",
step_id=step.step_id,
task_id=task.task_id,
current_value=current_value,
current_attemp=current_attemp,
)
prompt = prompt_engine.load_prompt(
"auto-completion-tweak-value",
context_reasoning=context_reasoning,
current_value=current_value,
tried_values=json.dumps(tried_values),
popped_up_elements="".join([json_to_html(element) for element in whole_new_elements]),
)
json_respone = await app.SECONDARY_LLM_API_HANDLER(prompt=prompt, step=step)
context_reasoning = json_respone.get("reasoning")
new_current_value = json_respone.get("tweaked_value", "")
if not new_current_value:
return ActionFailure(ErrEmptyTweakValue(reasoning=context_reasoning, current_value=current_value))
LOG.info(
"Ask LLM tweaked the current value with a new value",
step_id=step.step_id,
task_id=task.task_id,
reasoning=context_reasoning,
current_value=current_value,
new_value=new_current_value,
)
current_value = new_current_value
else:
return ActionFailure(FailToFindAutocompleteOption(current_value=text))
async def select_from_dropdown(
action: SelectOptionAction,
page: Page,

View File

@@ -877,109 +877,105 @@ function uniqueId() {
return result;
}
async function buildTreeFromBody(frame = "main.frame", open_select = false) {
function buildElementObject(frame, element, interactable) {
var element_id = element.getAttribute("unique_id") ?? uniqueId();
var elementTagNameLower = element.tagName.toLowerCase();
element.setAttribute("unique_id", element_id);
const attrs = {};
for (const attr of element.attributes) {
var attrValue = attr.value;
if (
attr.name === "required" ||
attr.name === "aria-required" ||
attr.name === "checked" ||
attr.name === "aria-checked" ||
attr.name === "selected" ||
attr.name === "aria-selected" ||
attr.name === "readonly" ||
attr.name === "aria-readonly"
) {
if (attrValue && attrValue.toLowerCase() === "false") {
attrValue = false;
} else {
attrValue = true;
}
}
attrs[attr.name] = attrValue;
}
if (
checkRequiredFromStyle(element) &&
!attrs["required"] &&
!attrs["aria-required"]
) {
attrs["required"] = true;
}
if (elementTagNameLower === "input" || elementTagNameLower === "textarea") {
if (element.type === "radio") {
attrs["value"] = "" + element.checked + "";
} else {
attrs["value"] = element.value;
}
}
let elementObj = {
id: element_id,
frame: frame,
interactable: interactable,
tagName: elementTagNameLower,
attributes: attrs,
text: getElementContent(element),
children: [],
rect: DomUtils.getVisibleClientRect(element, true),
// don't trim any attr of this element if keepAllAttr=True
keepAllAttr:
elementTagNameLower === "svg" || element.closest("svg") !== null,
isSelectable:
elementTagNameLower === "select" ||
isSelect2Dropdown(element) ||
isSelect2MultiChoice(element),
isScrollable: isScrollable(element),
};
let isInShadowRoot = element.getRootNode() instanceof ShadowRoot;
if (isInShadowRoot) {
let shadowHostEle = element.getRootNode().host;
let shadowHostId = shadowHostEle.getAttribute("unique_id");
// assign shadowHostId to the shadowHost element if it doesn't have unique_id
if (!shadowHostId) {
shadowHostId = uniqueId();
shadowHostEle.setAttribute("unique_id", shadowHostId);
}
elementObj.shadowHost = shadowHostId;
}
// get options for select element or for listbox element
let selectOptions = null;
let selectedValue = "";
if (elementTagNameLower === "select") {
[selectOptions, selectedValue] = getSelectOptions(element);
}
if (selectOptions) {
elementObj.options = selectOptions;
}
if (selectedValue) {
elementObj.attributes["selected"] = selectedValue;
}
return elementObj;
}
function buildTreeFromBody(frame = "main.frame", open_select = false) {
return buildElementTree(document.body, frame, open_select);
}
async function buildElementTree(
starter = document.body,
frame = "main.frame",
open_select = false,
) {
function buildElementTree(starter = document.body, frame = "main.frame") {
var elements = [];
var resultArray = [];
async function buildElementObject(element, interactable) {
var element_id = element.getAttribute("unique_id") ?? uniqueId();
var elementTagNameLower = element.tagName.toLowerCase();
element.setAttribute("unique_id", element_id);
const attrs = {};
for (const attr of element.attributes) {
var attrValue = attr.value;
if (
attr.name === "required" ||
attr.name === "aria-required" ||
attr.name === "checked" ||
attr.name === "aria-checked" ||
attr.name === "selected" ||
attr.name === "aria-selected" ||
attr.name === "readonly" ||
attr.name === "aria-readonly"
) {
if (attrValue && attrValue.toLowerCase() === "false") {
attrValue = false;
} else {
attrValue = true;
}
}
attrs[attr.name] = attrValue;
}
if (
checkRequiredFromStyle(element) &&
!attrs["required"] &&
!attrs["aria-required"]
) {
attrs["required"] = true;
}
if (elementTagNameLower === "input" || elementTagNameLower === "textarea") {
if (element.type === "radio") {
attrs["value"] = "" + element.checked + "";
} else {
attrs["value"] = element.value;
}
}
let elementObj = {
id: element_id,
frame: frame,
interactable: interactable,
tagName: elementTagNameLower,
attributes: attrs,
text: getElementContent(element),
children: [],
rect: DomUtils.getVisibleClientRect(element, true),
// don't trim any attr of this element if keepAllAttr=True
keepAllAttr:
elementTagNameLower === "svg" || element.closest("svg") !== null,
isSelectable:
elementTagNameLower === "select" ||
isSelect2Dropdown(element) ||
isSelect2MultiChoice(element),
isScrollable: isScrollable(element),
};
let isInShadowRoot = element.getRootNode() instanceof ShadowRoot;
if (isInShadowRoot) {
let shadowHostEle = element.getRootNode().host;
let shadowHostId = shadowHostEle.getAttribute("unique_id");
// assign shadowHostId to the shadowHost element if it doesn't have unique_id
if (!shadowHostId) {
shadowHostId = uniqueId();
shadowHostEle.setAttribute("unique_id", shadowHostId);
}
elementObj.shadowHost = shadowHostId;
}
// get options for select element or for listbox element
let selectOptions = null;
let selectedValue = "";
if (elementTagNameLower === "select") {
[selectOptions, selectedValue] = getSelectOptions(element);
}
if (selectOptions) {
elementObj.options = selectOptions;
}
if (selectedValue) {
elementObj.attributes["selected"] = selectedValue;
}
return elementObj;
}
function getChildElements(element) {
if (element.childElementCount !== 0) {
return Array.from(element.children);
@@ -987,7 +983,7 @@ async function buildElementTree(
return [];
}
}
async function processElement(element, parentId) {
function processElement(element, parentId) {
if (element === null) {
console.log("get a null element");
return;
@@ -1008,7 +1004,7 @@ async function buildElementTree(
// Check if the element is interactable
if (isInteractable(element)) {
var elementObj = await buildElementObject(element, true);
var elementObj = buildElementObject(frame, element, true);
elements.push(elementObj);
// If the element is interactable but has no interactable parent,
// then it starts a new tree, so add it to the result array
@@ -1029,24 +1025,24 @@ async function buildElementTree(
const children = getChildElements(element);
for (let i = 0; i < children.length; i++) {
const childElement = children[i];
await processElement(childElement, elementObj.id);
processElement(childElement, elementObj.id);
}
return elementObj;
} else if (element.tagName.toLowerCase() === "iframe") {
let iframeElementObject = await buildElementObject(element, false);
let iframeElementObject = buildElementObject(frame, element, false);
elements.push(iframeElementObject);
resultArray.push(iframeElementObject);
} else if (element.shadowRoot) {
// shadow host element
let shadowHostElement = await buildElementObject(element, false);
let shadowHostElement = buildElementObject(frame, element, false);
elements.push(shadowHostElement);
resultArray.push(shadowHostElement);
const children = getChildElements(element.shadowRoot);
for (let i = 0; i < children.length; i++) {
const childElement = children[i];
await processElement(childElement, shadowHostElement.id);
processElement(childElement, shadowHostElement.id);
}
} else {
// For a non-interactable element, if it has direct text, we also tagged
@@ -1063,14 +1059,14 @@ async function buildElementTree(
let isParentSVG = element.closest("svg");
if (element.tagName.toLowerCase() === "svg") {
// if element is <svg> we save all attributes and its children
elementObj = await buildElementObject(element, false);
elementObj = buildElementObject(frame, element, false);
} else if (isParentSVG && isParentSVG.getAttribute("unique_id")) {
// if elemnet is the children of the <svg> with an unique_id
elementObj = await buildElementObject(element, false);
elementObj = buildElementObject(frame, element, false);
} else if (isTableRelatedElement(element)) {
// build all table related elements into skyvern element
// we need these elements to preserve the DOM structure
elementObj = await buildElementObject(element, false);
elementObj = buildElementObject(frame, element, false);
} else {
// character length limit for non-interactable elements should be 5000
// we don't use element context in HTML format,
@@ -1083,7 +1079,7 @@ async function buildElementTree(
}
}
if (textContent && textContent.length <= 5000) {
elementObj = await buildElementObject(element, false);
elementObj = buildElementObject(frame, element, false);
}
}
@@ -1104,7 +1100,7 @@ async function buildElementTree(
const children = getChildElements(element);
for (let i = 0; i < children.length; i++) {
const childElement = children[i];
await processElement(childElement, parentId);
processElement(childElement, parentId);
}
}
}
@@ -1313,7 +1309,7 @@ async function buildElementTree(
};
// setup before parsing the dom
await processElement(starter, null);
processElement(starter, null);
for (var element of elements) {
if (
@@ -1545,17 +1541,17 @@ function removeBoundingBoxes() {
}
}
async function scrollToTop(draw_boxes) {
function scrollToTop(draw_boxes) {
removeBoundingBoxes();
window.scroll({ left: 0, top: 0, behavior: "instant" });
if (draw_boxes) {
var elementsAndResultArray = await buildTreeFromBody();
var elementsAndResultArray = buildTreeFromBody();
drawBoundingBoxes(elementsAndResultArray[0]);
}
return window.scrollY;
}
async function scrollToNextPage(draw_boxes) {
function scrollToNextPage(draw_boxes) {
// remove bounding boxes, scroll to next page with 200px overlap, then draw bounding boxes again
// return true if there is a next page, false otherwise
removeBoundingBoxes();
@@ -1565,7 +1561,7 @@ async function scrollToNextPage(draw_boxes) {
behavior: "instant",
});
if (draw_boxes) {
var elementsAndResultArray = await buildTreeFromBody();
var elementsAndResultArray = buildTreeFromBody();
drawBoundingBoxes(elementsAndResultArray[0]);
}
return window.scrollY;
@@ -1688,7 +1684,7 @@ function stopGlobalIncrementalObserver() {
window.globalOneTimeIncrementElements = [];
}
async function getIncrementElements(frame) {
function getIncrementElements(frame) {
const domDepthMap = new Map();
for (const element of window.globalOneTimeIncrementElements) {
@@ -1700,7 +1696,7 @@ async function getIncrementElements(frame) {
}
for (const child of element.newNodes) {
const [_, newNodeTree] = await buildElementTree(child, frame, false);
const [_, newNodeTree] = buildElementTree(child, frame, false);
if (newNodeTree.length > 0) {
newNodesTreeList.push(...newNodeTree);
}

View File

@@ -337,7 +337,7 @@ async def get_interactable_element_tree_in_frame(
unique_id = await frame_element.get_attribute("unique_id")
frame_js_script = f"async () => await buildTreeFromBody('{unique_id}', true)"
frame_js_script = f"() => buildTreeFromBody('{unique_id}', true)"
await frame.evaluate(JS_FUNCTION_DEFS)
frame_elements, frame_element_tree = await frame.evaluate(frame_js_script)
@@ -373,7 +373,7 @@ async def get_interactable_element_tree(
:return: Tuple containing the element tree and a map of element IDs to elements.
"""
await page.evaluate(JS_FUNCTION_DEFS)
main_frame_js_script = "async () => await buildTreeFromBody('main.frame', true)"
main_frame_js_script = "() => buildTreeFromBody('main.frame', true)"
elements, element_tree = await page.evaluate(main_frame_js_script)
if len(page.main_frame.child_frames) > 0:
@@ -415,7 +415,7 @@ class IncrementalScrapePage:
exc_info=True,
)
js_script = f"async () => await getIncrementElements('{frame_id}')"
js_script = f"() => getIncrementElements('{frame_id}')"
incremental_elements, incremental_tree = await frame.evaluate(js_script)
# we listen the incremental elements seperated by frames, so all elements will be in the same SkyvernFrame
self.id_to_css_dict, self.id_to_element_dict, _ = build_element_dict(incremental_elements)
@@ -473,7 +473,8 @@ def trim_element_tree(elements: list[dict]) -> list[dict]:
else:
del queue_ele["attributes"]
# remove the tag, don't need it in the HTML tree
del queue_ele["keepAllAttr"]
if "keepAllAttr" in queue_ele:
del queue_ele["keepAllAttr"]
if "children" in queue_ele:
queue.extend(queue_ele["children"])

View File

@@ -159,6 +159,22 @@ class SkyvernElement:
haspopup = await self.get_attr("aria-haspopup")
return tag_name == InteractiveElement.INPUT and role == "combobox" and haspopup == "listbox"
async def is_auto_completion_input(self) -> bool:
tag_name = self.get_tag_name()
if tag_name != InteractiveElement.INPUT:
return False
haspopup = await self.get_attr("aria-haspopup")
autocomplete = await self.get_attr("aria-autocomplete")
if haspopup and autocomplete:
return True
element_id = await self.get_attr("id")
if element_id == "location-input":
return True
return False
async def is_checkbox(self) -> bool:
tag_name = self.get_tag_name()
if tag_name != "input":
@@ -181,6 +197,9 @@ class SkyvernElement:
async def is_selectable(self) -> bool:
return self.get_selectable() or self.get_tag_name() in SELECTABLE_ELEMENT
def get_element_dict(self) -> dict:
return self.__static_element
def get_scrollable(self) -> bool:
return self.__static_element.get("isScrollable", False)
@@ -193,6 +212,9 @@ class SkyvernElement:
def get_id(self) -> str:
return self.__static_element.get("id", "")
def get_frame_id(self) -> str:
return self.__static_element.get("frame", "")
def get_attributes(self) -> typing.Dict:
return self.__static_element.get("attributes", {})
@@ -314,10 +336,15 @@ class SkyvernElement:
if length > TEXT_PRESS_MAX_LENGTH:
# if the text is longer than TEXT_PRESS_MAX_LENGTH characters, we will locator.fill in initial texts until the last TEXT_PRESS_MAX_LENGTH characters
# and then type the last TEXT_PRESS_MAX_LENGTH characters with locator.press_sequentially
await self.get_locator().fill(text[: length - TEXT_PRESS_MAX_LENGTH])
await self.input_fill(text[: length - TEXT_PRESS_MAX_LENGTH])
text = text[length - TEXT_PRESS_MAX_LENGTH :]
await self.get_locator().press_sequentially(text, delay=TEXT_INPUT_DELAY, timeout=default_timeout)
await self.press_fill(text, timeout=default_timeout)
async def press_fill(
self, text: str, timeout: float = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
) -> None:
await self.get_locator().press_sequentially(text, delay=TEXT_INPUT_DELAY, timeout=timeout)
async def input_fill(
self, text: str, timeout: float = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
@@ -377,6 +404,12 @@ class DomUtil:
self.scraped_page = scraped_page
self.page = page
def check_id_in_dom(self, element_id: str) -> bool:
css_selector = self.scraped_page.id_to_css_dict.get(element_id, "")
if css_selector:
return True
return False
async def get_skyvern_element_by_id(self, element_id: str) -> SkyvernElement:
element = self.scraped_page.id_to_element_dict.get(element_id)
if not element:

View File

@@ -168,6 +168,10 @@ class SkyvernFrame:
js_script = "async (element) => await getListboxOptions(element)"
return await self.frame.evaluate(js_script, element)
async def parse_element_from_html(self, frame: str, element: ElementHandle, interactable: bool) -> Dict:
js_script = "([frame, element, interactable]) => buildElementObject(frame, element, interactable)"
return await self.frame.evaluate(js_script, [frame, element, interactable])
async def scroll_to_top(self, draw_boxes: bool) -> float:
"""
Scroll to the top of the page and take a screenshot.
@@ -175,7 +179,7 @@ class SkyvernFrame:
:param page: Page instance to take the screenshot from.
:return: Screenshot of the page.
"""
js_script = f"async () => await scrollToTop({str(draw_boxes).lower()})"
js_script = f"() => scrollToTop({str(draw_boxes).lower()})"
scroll_y_px = await self.frame.evaluate(js_script)
return scroll_y_px
@@ -186,7 +190,7 @@ class SkyvernFrame:
:param page: Page instance to take the screenshot from.
:return: Screenshot of the page.
"""
js_script = f"async () => await scrollToNextPage({str(draw_boxes).lower()})"
js_script = f"() => scrollToNextPage({str(draw_boxes).lower()})"
scroll_y_px = await self.frame.evaluate(js_script)
return scroll_y_px