general autocomplete solution (#713)
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import asyncio
|
||||
import copy
|
||||
import json
|
||||
import os
|
||||
import urllib.parse
|
||||
@@ -9,13 +10,16 @@ from typing import Any, Awaitable, Callable, List
|
||||
import structlog
|
||||
from deprecation import deprecated
|
||||
from playwright.async_api import FileChooser, Locator, Page, TimeoutError
|
||||
from pydantic import BaseModel
|
||||
|
||||
from skyvern.constants import REPO_ROOT_DIR, VERIFICATION_CODE_POLLING_TIMEOUT_MINS
|
||||
from skyvern.constants import REPO_ROOT_DIR, SKYVERN_ID_ATTR, VERIFICATION_CODE_POLLING_TIMEOUT_MINS
|
||||
from skyvern.exceptions import (
|
||||
EmptySelect,
|
||||
ErrEmptyTweakValue,
|
||||
ErrFoundSelectableElement,
|
||||
FailedToFetchSecret,
|
||||
FailToClick,
|
||||
FailToFindAutocompleteOption,
|
||||
FailToSelectByIndex,
|
||||
FailToSelectByLabel,
|
||||
FailToSelectByValue,
|
||||
@@ -24,9 +28,12 @@ from skyvern.exceptions import (
|
||||
MissingElement,
|
||||
MissingFileUrl,
|
||||
MultipleElementsFound,
|
||||
NoAutoCompleteOptionMeetCondition,
|
||||
NoElementMatchedForTargetOption,
|
||||
NoIncrementalElementFoundForAutoCompletion,
|
||||
NoIncrementalElementFoundForCustomSelection,
|
||||
NoLabelOrValueForCustomSelection,
|
||||
NoSuitableAutoCompleteOption,
|
||||
OptionIndexOutOfBound,
|
||||
WrongElementToUploadFile,
|
||||
)
|
||||
@@ -59,7 +66,13 @@ from skyvern.webeye.actions.actions import (
|
||||
)
|
||||
from skyvern.webeye.actions.responses import ActionFailure, ActionResult, ActionSuccess
|
||||
from skyvern.webeye.browser_factory import BrowserState, get_download_dir
|
||||
from skyvern.webeye.scraper.scraper import ElementTreeFormat, IncrementalScrapePage, ScrapedPage
|
||||
from skyvern.webeye.scraper.scraper import (
|
||||
ElementTreeFormat,
|
||||
IncrementalScrapePage,
|
||||
ScrapedPage,
|
||||
json_to_html,
|
||||
trim_element_tree,
|
||||
)
|
||||
from skyvern.webeye.utils.dom import DomUtil, InteractiveElement, SkyvernElement
|
||||
from skyvern.webeye.utils.page import SkyvernFrame
|
||||
|
||||
@@ -67,6 +80,12 @@ LOG = structlog.get_logger()
|
||||
COMMON_INPUT_TAGS = {"input", "textarea", "select"}
|
||||
|
||||
|
||||
class AutoCompletionResult(BaseModel):
|
||||
auto_completion_attempt: bool = False
|
||||
incremental_elements: list[dict] = []
|
||||
action_result: ActionResult = ActionSuccess()
|
||||
|
||||
|
||||
class ActionHandler:
|
||||
_handled_action_types: dict[
|
||||
ActionType,
|
||||
@@ -290,6 +309,7 @@ async def handle_input_text_action(
|
||||
dom = DomUtil(scraped_page, page)
|
||||
skyvern_element = await dom.get_skyvern_element_by_id(action.element_id)
|
||||
skyvern_frame = await SkyvernFrame.create_instance(skyvern_element.get_frame())
|
||||
incremental_scraped = IncrementalScrapePage(skyvern_frame=skyvern_frame)
|
||||
timeout = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
|
||||
|
||||
current_text = await get_input_value(skyvern_element.get_tag_name(), skyvern_element.get_locator())
|
||||
@@ -319,7 +339,6 @@ async def handle_input_text_action(
|
||||
return await handle_select_option_action(select_action, page, scraped_page, task, step)
|
||||
|
||||
# press arrowdown to watch if there's any options popping up
|
||||
incremental_scraped = IncrementalScrapePage(skyvern_frame=skyvern_frame)
|
||||
await incremental_scraped.start_listen_dom_increment()
|
||||
await skyvern_element.get_locator().focus(timeout=timeout)
|
||||
await skyvern_element.get_locator().press("ArrowDown", timeout=timeout)
|
||||
@@ -376,12 +395,26 @@ async def handle_input_text_action(
|
||||
LOG.warning("Failed to clear the input field", action=action, exc_info=True)
|
||||
return [ActionFailure(InvalidElementForTextInput(element_id=action.element_id, tag_name=tag_name))]
|
||||
|
||||
# TODO: not sure if this case will trigger auto-completion
|
||||
if tag_name not in COMMON_INPUT_TAGS:
|
||||
await skyvern_element.input_fill(text)
|
||||
return [ActionSuccess()]
|
||||
|
||||
# If the input is a text input, we type the text character by character
|
||||
# 3 times the time it takes to type the text so it has time to finish typing
|
||||
if len(text) == 0:
|
||||
return [ActionSuccess()]
|
||||
|
||||
if await skyvern_element.is_auto_completion_input():
|
||||
result = await input_or_auto_complete_input(
|
||||
action=action,
|
||||
page=page,
|
||||
dom=dom,
|
||||
text=text,
|
||||
skyvern_element=skyvern_element,
|
||||
step=step,
|
||||
task=task,
|
||||
)
|
||||
return [result]
|
||||
|
||||
await skyvern_element.input_sequentially(text=text)
|
||||
return [ActionSuccess()]
|
||||
|
||||
@@ -848,6 +881,282 @@ async def chain_click(
|
||||
return [ActionFailure(WrongElementToUploadFile(action.element_id))]
|
||||
|
||||
|
||||
def remove_exist_elements(dom: DomUtil, element_tree: list[dict]) -> list[dict]:
|
||||
new_element_tree = []
|
||||
for element in element_tree:
|
||||
children_elements = element.get("children", [])
|
||||
if len(children_elements) > 0:
|
||||
children_elements = remove_exist_elements(dom=dom, element_tree=children_elements)
|
||||
if dom.check_id_in_dom(element.get("id", "")):
|
||||
new_element_tree.extend(children_elements)
|
||||
else:
|
||||
element["children"] = children_elements
|
||||
new_element_tree.append(element)
|
||||
return new_element_tree
|
||||
|
||||
|
||||
async def choose_auto_completion_dropdown(
|
||||
action: actions.InputTextAction,
|
||||
page: Page,
|
||||
dom: DomUtil,
|
||||
text: str,
|
||||
skyvern_element: SkyvernElement,
|
||||
step: Step,
|
||||
task: Task,
|
||||
preserved_elements: list[dict] | None = None,
|
||||
relevance_threshold: float = 0.8,
|
||||
) -> AutoCompletionResult:
|
||||
preserved_elements = preserved_elements or []
|
||||
clear_input = True
|
||||
result = AutoCompletionResult()
|
||||
|
||||
current_frame = skyvern_element.get_frame()
|
||||
skyvern_frame = await SkyvernFrame.create_instance(current_frame)
|
||||
incremental_scraped = IncrementalScrapePage(skyvern_frame=skyvern_frame)
|
||||
await incremental_scraped.start_listen_dom_increment()
|
||||
|
||||
try:
|
||||
await skyvern_element.press_fill(text)
|
||||
# wait for new elemnts to load
|
||||
await asyncio.sleep(5)
|
||||
incremental_element = await incremental_scraped.get_incremental_element_tree(
|
||||
app.AGENT_FUNCTION.cleanup_element_tree_factory(task=task, step=step)
|
||||
)
|
||||
incremental_element = remove_exist_elements(dom=dom, element_tree=incremental_element)
|
||||
|
||||
# check if elements in preserve list are still on the page
|
||||
confirmed_preserved_list: list[dict] = []
|
||||
for element in preserved_elements:
|
||||
element_id = element.get("id")
|
||||
if not element_id:
|
||||
continue
|
||||
locator = current_frame.locator(f'[{SKYVERN_ID_ATTR}="{element_id}"]')
|
||||
cnt = await locator.count()
|
||||
if cnt == 0:
|
||||
continue
|
||||
|
||||
element_handler = await locator.element_handle(
|
||||
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
|
||||
)
|
||||
if not element_handler:
|
||||
continue
|
||||
|
||||
current_element = await skyvern_frame.parse_element_from_html(
|
||||
skyvern_element.get_frame_id(), element_handler, skyvern_element.is_interactable()
|
||||
)
|
||||
confirmed_preserved_list.append(current_element)
|
||||
|
||||
if len(confirmed_preserved_list) > 0:
|
||||
confirmed_preserved_list = await app.AGENT_FUNCTION.cleanup_element_tree_factory(task=task, step=step)(
|
||||
skyvern_frame.get_frame().url, copy.deepcopy(confirmed_preserved_list)
|
||||
)
|
||||
confirmed_preserved_list = trim_element_tree(copy.deepcopy(confirmed_preserved_list))
|
||||
|
||||
incremental_element.extend(confirmed_preserved_list)
|
||||
|
||||
result.incremental_elements = copy.deepcopy(incremental_element)
|
||||
if len(incremental_element) == 0:
|
||||
raise NoIncrementalElementFoundForAutoCompletion(element_id=skyvern_element.get_id(), text=text)
|
||||
|
||||
html = incremental_scraped.build_html_tree(incremental_element)
|
||||
auto_completion_confirm_prompt = prompt_engine.load_prompt(
|
||||
"auto-completion-choose-option",
|
||||
context_reasoning=action.reasoning,
|
||||
filled_value=text,
|
||||
elements=html,
|
||||
)
|
||||
LOG.info(
|
||||
"Confirm if it's an auto completion dropdown",
|
||||
step_id=step.step_id,
|
||||
task_id=task.task_id,
|
||||
)
|
||||
json_response = await app.SECONDARY_LLM_API_HANDLER(prompt=auto_completion_confirm_prompt, step=step)
|
||||
element_id = json_response.get("id", "")
|
||||
relevance_float = json_response.get("relevance_float", 0)
|
||||
if not element_id:
|
||||
reasoning = json_response.get("reasoning")
|
||||
raise NoSuitableAutoCompleteOption(reasoning=reasoning, target_value=text)
|
||||
|
||||
if relevance_float < relevance_threshold:
|
||||
LOG.info(
|
||||
f"The closest option doesn't meet the condition(relevance_float>={relevance_threshold})",
|
||||
element_id=element_id,
|
||||
relevance_float=relevance_float,
|
||||
)
|
||||
reasoning = json_response.get("reasoning")
|
||||
raise NoAutoCompleteOptionMeetCondition(
|
||||
reasoning=reasoning,
|
||||
required_relevance=relevance_threshold,
|
||||
target_value=text,
|
||||
closest_relevance=relevance_float,
|
||||
)
|
||||
|
||||
LOG.info(
|
||||
"Find a suitable option to choose",
|
||||
element_id=element_id,
|
||||
step_id=step.step_id,
|
||||
task_id=task.task_id,
|
||||
)
|
||||
|
||||
locator = current_frame.locator(f'[{SKYVERN_ID_ATTR}="{element_id}"]')
|
||||
if await locator.count() == 0:
|
||||
raise MissingElement(element_id=element_id)
|
||||
|
||||
await locator.click(timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS)
|
||||
clear_input = False
|
||||
return result
|
||||
except Exception as e:
|
||||
LOG.info(
|
||||
"Failed to choose the auto completion dropdown",
|
||||
exc_info=True,
|
||||
input_value=text,
|
||||
task_id=task.task_id,
|
||||
step_id=step.step_id,
|
||||
)
|
||||
result.action_result = ActionFailure(exception=e)
|
||||
return result
|
||||
finally:
|
||||
await incremental_scraped.stop_listen_dom_increment()
|
||||
if clear_input:
|
||||
await skyvern_element.input_clear()
|
||||
|
||||
|
||||
async def input_or_auto_complete_input(
|
||||
action: actions.InputTextAction,
|
||||
page: Page,
|
||||
dom: DomUtil,
|
||||
text: str,
|
||||
skyvern_element: SkyvernElement,
|
||||
step: Step,
|
||||
task: Task,
|
||||
) -> ActionResult:
|
||||
LOG.info(
|
||||
"Trigger auto completion",
|
||||
task_id=task.task_id,
|
||||
step_id=step.step_id,
|
||||
element_id=skyvern_element.get_id(),
|
||||
)
|
||||
|
||||
# 1. press the orignal text to see if there's a match
|
||||
# 2. call LLM to find 5 potential values based on the orginal text
|
||||
# 3. try each potential values from #2
|
||||
# 4. call LLM to tweak the orignal text according to the information from #3, then start #1 again
|
||||
|
||||
# FIXME: try the whole loop for twice now, to prevent too many LLM calls
|
||||
MAX_AUTO_COMPLETE_ATTEMP = 2
|
||||
current_attemp = 0
|
||||
context_reasoning = action.reasoning
|
||||
current_value = text
|
||||
result = AutoCompletionResult()
|
||||
|
||||
while current_attemp < MAX_AUTO_COMPLETE_ATTEMP:
|
||||
current_attemp += 1
|
||||
whole_new_elements: list[dict] = []
|
||||
tried_values: list[str] = []
|
||||
|
||||
LOG.info(
|
||||
"Try the potential value for auto completion",
|
||||
step_id=step.step_id,
|
||||
task_id=task.task_id,
|
||||
input_value=current_value,
|
||||
)
|
||||
result = await choose_auto_completion_dropdown(
|
||||
action=action,
|
||||
page=page,
|
||||
dom=dom,
|
||||
text=current_value,
|
||||
preserved_elements=result.incremental_elements,
|
||||
skyvern_element=skyvern_element,
|
||||
step=step,
|
||||
task=task,
|
||||
)
|
||||
if isinstance(result.action_result, ActionSuccess):
|
||||
return ActionSuccess()
|
||||
|
||||
tried_values.append(current_value)
|
||||
whole_new_elements.extend(result.incremental_elements)
|
||||
|
||||
prompt = prompt_engine.load_prompt(
|
||||
"auto-completion-potential-answers",
|
||||
context_reasoning=context_reasoning,
|
||||
current_value=current_value,
|
||||
)
|
||||
|
||||
LOG.info(
|
||||
"Ask LLM to give 10 potential values based on the current value",
|
||||
current_value=current_value,
|
||||
step_id=step.step_id,
|
||||
task_id=task.task_id,
|
||||
)
|
||||
json_respone = await app.SECONDARY_LLM_API_HANDLER(prompt=prompt, step=step)
|
||||
values: list[dict] = json_respone.get("potential_values", [])
|
||||
|
||||
for each_value in values:
|
||||
value: str = each_value.get("value", "")
|
||||
if not value:
|
||||
LOG.info(
|
||||
"Empty potential value, skip this attempt",
|
||||
step_id=step.step_id,
|
||||
task_id=task.task_id,
|
||||
value=each_value,
|
||||
)
|
||||
continue
|
||||
LOG.info(
|
||||
"Try the potential value for auto completion",
|
||||
step_id=step.step_id,
|
||||
task_id=task.task_id,
|
||||
input_value=value,
|
||||
)
|
||||
result = await choose_auto_completion_dropdown(
|
||||
action=action,
|
||||
page=page,
|
||||
dom=dom,
|
||||
text=value,
|
||||
preserved_elements=result.incremental_elements,
|
||||
skyvern_element=skyvern_element,
|
||||
step=step,
|
||||
task=task,
|
||||
)
|
||||
if isinstance(result.action_result, ActionSuccess):
|
||||
return ActionSuccess()
|
||||
|
||||
tried_values.append(value)
|
||||
whole_new_elements.extend(result.incremental_elements)
|
||||
|
||||
if current_attemp < MAX_AUTO_COMPLETE_ATTEMP:
|
||||
LOG.info(
|
||||
"Ask LLM to tweak the current value based on tried input values",
|
||||
step_id=step.step_id,
|
||||
task_id=task.task_id,
|
||||
current_value=current_value,
|
||||
current_attemp=current_attemp,
|
||||
)
|
||||
prompt = prompt_engine.load_prompt(
|
||||
"auto-completion-tweak-value",
|
||||
context_reasoning=context_reasoning,
|
||||
current_value=current_value,
|
||||
tried_values=json.dumps(tried_values),
|
||||
popped_up_elements="".join([json_to_html(element) for element in whole_new_elements]),
|
||||
)
|
||||
json_respone = await app.SECONDARY_LLM_API_HANDLER(prompt=prompt, step=step)
|
||||
context_reasoning = json_respone.get("reasoning")
|
||||
new_current_value = json_respone.get("tweaked_value", "")
|
||||
if not new_current_value:
|
||||
return ActionFailure(ErrEmptyTweakValue(reasoning=context_reasoning, current_value=current_value))
|
||||
LOG.info(
|
||||
"Ask LLM tweaked the current value with a new value",
|
||||
step_id=step.step_id,
|
||||
task_id=task.task_id,
|
||||
reasoning=context_reasoning,
|
||||
current_value=current_value,
|
||||
new_value=new_current_value,
|
||||
)
|
||||
current_value = new_current_value
|
||||
|
||||
else:
|
||||
return ActionFailure(FailToFindAutocompleteOption(current_value=text))
|
||||
|
||||
|
||||
async def select_from_dropdown(
|
||||
action: SelectOptionAction,
|
||||
page: Page,
|
||||
|
||||
@@ -877,109 +877,105 @@ function uniqueId() {
|
||||
return result;
|
||||
}
|
||||
|
||||
async function buildTreeFromBody(frame = "main.frame", open_select = false) {
|
||||
function buildElementObject(frame, element, interactable) {
|
||||
var element_id = element.getAttribute("unique_id") ?? uniqueId();
|
||||
var elementTagNameLower = element.tagName.toLowerCase();
|
||||
element.setAttribute("unique_id", element_id);
|
||||
|
||||
const attrs = {};
|
||||
for (const attr of element.attributes) {
|
||||
var attrValue = attr.value;
|
||||
if (
|
||||
attr.name === "required" ||
|
||||
attr.name === "aria-required" ||
|
||||
attr.name === "checked" ||
|
||||
attr.name === "aria-checked" ||
|
||||
attr.name === "selected" ||
|
||||
attr.name === "aria-selected" ||
|
||||
attr.name === "readonly" ||
|
||||
attr.name === "aria-readonly"
|
||||
) {
|
||||
if (attrValue && attrValue.toLowerCase() === "false") {
|
||||
attrValue = false;
|
||||
} else {
|
||||
attrValue = true;
|
||||
}
|
||||
}
|
||||
attrs[attr.name] = attrValue;
|
||||
}
|
||||
|
||||
if (
|
||||
checkRequiredFromStyle(element) &&
|
||||
!attrs["required"] &&
|
||||
!attrs["aria-required"]
|
||||
) {
|
||||
attrs["required"] = true;
|
||||
}
|
||||
|
||||
if (elementTagNameLower === "input" || elementTagNameLower === "textarea") {
|
||||
if (element.type === "radio") {
|
||||
attrs["value"] = "" + element.checked + "";
|
||||
} else {
|
||||
attrs["value"] = element.value;
|
||||
}
|
||||
}
|
||||
|
||||
let elementObj = {
|
||||
id: element_id,
|
||||
frame: frame,
|
||||
interactable: interactable,
|
||||
tagName: elementTagNameLower,
|
||||
attributes: attrs,
|
||||
text: getElementContent(element),
|
||||
children: [],
|
||||
rect: DomUtils.getVisibleClientRect(element, true),
|
||||
// don't trim any attr of this element if keepAllAttr=True
|
||||
keepAllAttr:
|
||||
elementTagNameLower === "svg" || element.closest("svg") !== null,
|
||||
isSelectable:
|
||||
elementTagNameLower === "select" ||
|
||||
isSelect2Dropdown(element) ||
|
||||
isSelect2MultiChoice(element),
|
||||
isScrollable: isScrollable(element),
|
||||
};
|
||||
|
||||
let isInShadowRoot = element.getRootNode() instanceof ShadowRoot;
|
||||
if (isInShadowRoot) {
|
||||
let shadowHostEle = element.getRootNode().host;
|
||||
let shadowHostId = shadowHostEle.getAttribute("unique_id");
|
||||
// assign shadowHostId to the shadowHost element if it doesn't have unique_id
|
||||
if (!shadowHostId) {
|
||||
shadowHostId = uniqueId();
|
||||
shadowHostEle.setAttribute("unique_id", shadowHostId);
|
||||
}
|
||||
elementObj.shadowHost = shadowHostId;
|
||||
}
|
||||
|
||||
// get options for select element or for listbox element
|
||||
let selectOptions = null;
|
||||
let selectedValue = "";
|
||||
if (elementTagNameLower === "select") {
|
||||
[selectOptions, selectedValue] = getSelectOptions(element);
|
||||
}
|
||||
|
||||
if (selectOptions) {
|
||||
elementObj.options = selectOptions;
|
||||
}
|
||||
if (selectedValue) {
|
||||
elementObj.attributes["selected"] = selectedValue;
|
||||
}
|
||||
|
||||
return elementObj;
|
||||
}
|
||||
|
||||
function buildTreeFromBody(frame = "main.frame", open_select = false) {
|
||||
return buildElementTree(document.body, frame, open_select);
|
||||
}
|
||||
|
||||
async function buildElementTree(
|
||||
starter = document.body,
|
||||
frame = "main.frame",
|
||||
open_select = false,
|
||||
) {
|
||||
function buildElementTree(starter = document.body, frame = "main.frame") {
|
||||
var elements = [];
|
||||
var resultArray = [];
|
||||
|
||||
async function buildElementObject(element, interactable) {
|
||||
var element_id = element.getAttribute("unique_id") ?? uniqueId();
|
||||
var elementTagNameLower = element.tagName.toLowerCase();
|
||||
element.setAttribute("unique_id", element_id);
|
||||
|
||||
const attrs = {};
|
||||
for (const attr of element.attributes) {
|
||||
var attrValue = attr.value;
|
||||
if (
|
||||
attr.name === "required" ||
|
||||
attr.name === "aria-required" ||
|
||||
attr.name === "checked" ||
|
||||
attr.name === "aria-checked" ||
|
||||
attr.name === "selected" ||
|
||||
attr.name === "aria-selected" ||
|
||||
attr.name === "readonly" ||
|
||||
attr.name === "aria-readonly"
|
||||
) {
|
||||
if (attrValue && attrValue.toLowerCase() === "false") {
|
||||
attrValue = false;
|
||||
} else {
|
||||
attrValue = true;
|
||||
}
|
||||
}
|
||||
attrs[attr.name] = attrValue;
|
||||
}
|
||||
|
||||
if (
|
||||
checkRequiredFromStyle(element) &&
|
||||
!attrs["required"] &&
|
||||
!attrs["aria-required"]
|
||||
) {
|
||||
attrs["required"] = true;
|
||||
}
|
||||
|
||||
if (elementTagNameLower === "input" || elementTagNameLower === "textarea") {
|
||||
if (element.type === "radio") {
|
||||
attrs["value"] = "" + element.checked + "";
|
||||
} else {
|
||||
attrs["value"] = element.value;
|
||||
}
|
||||
}
|
||||
|
||||
let elementObj = {
|
||||
id: element_id,
|
||||
frame: frame,
|
||||
interactable: interactable,
|
||||
tagName: elementTagNameLower,
|
||||
attributes: attrs,
|
||||
text: getElementContent(element),
|
||||
children: [],
|
||||
rect: DomUtils.getVisibleClientRect(element, true),
|
||||
// don't trim any attr of this element if keepAllAttr=True
|
||||
keepAllAttr:
|
||||
elementTagNameLower === "svg" || element.closest("svg") !== null,
|
||||
isSelectable:
|
||||
elementTagNameLower === "select" ||
|
||||
isSelect2Dropdown(element) ||
|
||||
isSelect2MultiChoice(element),
|
||||
isScrollable: isScrollable(element),
|
||||
};
|
||||
|
||||
let isInShadowRoot = element.getRootNode() instanceof ShadowRoot;
|
||||
if (isInShadowRoot) {
|
||||
let shadowHostEle = element.getRootNode().host;
|
||||
let shadowHostId = shadowHostEle.getAttribute("unique_id");
|
||||
// assign shadowHostId to the shadowHost element if it doesn't have unique_id
|
||||
if (!shadowHostId) {
|
||||
shadowHostId = uniqueId();
|
||||
shadowHostEle.setAttribute("unique_id", shadowHostId);
|
||||
}
|
||||
elementObj.shadowHost = shadowHostId;
|
||||
}
|
||||
|
||||
// get options for select element or for listbox element
|
||||
let selectOptions = null;
|
||||
let selectedValue = "";
|
||||
if (elementTagNameLower === "select") {
|
||||
[selectOptions, selectedValue] = getSelectOptions(element);
|
||||
}
|
||||
|
||||
if (selectOptions) {
|
||||
elementObj.options = selectOptions;
|
||||
}
|
||||
if (selectedValue) {
|
||||
elementObj.attributes["selected"] = selectedValue;
|
||||
}
|
||||
|
||||
return elementObj;
|
||||
}
|
||||
|
||||
function getChildElements(element) {
|
||||
if (element.childElementCount !== 0) {
|
||||
return Array.from(element.children);
|
||||
@@ -987,7 +983,7 @@ async function buildElementTree(
|
||||
return [];
|
||||
}
|
||||
}
|
||||
async function processElement(element, parentId) {
|
||||
function processElement(element, parentId) {
|
||||
if (element === null) {
|
||||
console.log("get a null element");
|
||||
return;
|
||||
@@ -1008,7 +1004,7 @@ async function buildElementTree(
|
||||
|
||||
// Check if the element is interactable
|
||||
if (isInteractable(element)) {
|
||||
var elementObj = await buildElementObject(element, true);
|
||||
var elementObj = buildElementObject(frame, element, true);
|
||||
elements.push(elementObj);
|
||||
// If the element is interactable but has no interactable parent,
|
||||
// then it starts a new tree, so add it to the result array
|
||||
@@ -1029,24 +1025,24 @@ async function buildElementTree(
|
||||
const children = getChildElements(element);
|
||||
for (let i = 0; i < children.length; i++) {
|
||||
const childElement = children[i];
|
||||
await processElement(childElement, elementObj.id);
|
||||
processElement(childElement, elementObj.id);
|
||||
}
|
||||
return elementObj;
|
||||
} else if (element.tagName.toLowerCase() === "iframe") {
|
||||
let iframeElementObject = await buildElementObject(element, false);
|
||||
let iframeElementObject = buildElementObject(frame, element, false);
|
||||
|
||||
elements.push(iframeElementObject);
|
||||
resultArray.push(iframeElementObject);
|
||||
} else if (element.shadowRoot) {
|
||||
// shadow host element
|
||||
let shadowHostElement = await buildElementObject(element, false);
|
||||
let shadowHostElement = buildElementObject(frame, element, false);
|
||||
elements.push(shadowHostElement);
|
||||
resultArray.push(shadowHostElement);
|
||||
|
||||
const children = getChildElements(element.shadowRoot);
|
||||
for (let i = 0; i < children.length; i++) {
|
||||
const childElement = children[i];
|
||||
await processElement(childElement, shadowHostElement.id);
|
||||
processElement(childElement, shadowHostElement.id);
|
||||
}
|
||||
} else {
|
||||
// For a non-interactable element, if it has direct text, we also tagged
|
||||
@@ -1063,14 +1059,14 @@ async function buildElementTree(
|
||||
let isParentSVG = element.closest("svg");
|
||||
if (element.tagName.toLowerCase() === "svg") {
|
||||
// if element is <svg> we save all attributes and its children
|
||||
elementObj = await buildElementObject(element, false);
|
||||
elementObj = buildElementObject(frame, element, false);
|
||||
} else if (isParentSVG && isParentSVG.getAttribute("unique_id")) {
|
||||
// if elemnet is the children of the <svg> with an unique_id
|
||||
elementObj = await buildElementObject(element, false);
|
||||
elementObj = buildElementObject(frame, element, false);
|
||||
} else if (isTableRelatedElement(element)) {
|
||||
// build all table related elements into skyvern element
|
||||
// we need these elements to preserve the DOM structure
|
||||
elementObj = await buildElementObject(element, false);
|
||||
elementObj = buildElementObject(frame, element, false);
|
||||
} else {
|
||||
// character length limit for non-interactable elements should be 5000
|
||||
// we don't use element context in HTML format,
|
||||
@@ -1083,7 +1079,7 @@ async function buildElementTree(
|
||||
}
|
||||
}
|
||||
if (textContent && textContent.length <= 5000) {
|
||||
elementObj = await buildElementObject(element, false);
|
||||
elementObj = buildElementObject(frame, element, false);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1104,7 +1100,7 @@ async function buildElementTree(
|
||||
const children = getChildElements(element);
|
||||
for (let i = 0; i < children.length; i++) {
|
||||
const childElement = children[i];
|
||||
await processElement(childElement, parentId);
|
||||
processElement(childElement, parentId);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1313,7 +1309,7 @@ async function buildElementTree(
|
||||
};
|
||||
|
||||
// setup before parsing the dom
|
||||
await processElement(starter, null);
|
||||
processElement(starter, null);
|
||||
|
||||
for (var element of elements) {
|
||||
if (
|
||||
@@ -1545,17 +1541,17 @@ function removeBoundingBoxes() {
|
||||
}
|
||||
}
|
||||
|
||||
async function scrollToTop(draw_boxes) {
|
||||
function scrollToTop(draw_boxes) {
|
||||
removeBoundingBoxes();
|
||||
window.scroll({ left: 0, top: 0, behavior: "instant" });
|
||||
if (draw_boxes) {
|
||||
var elementsAndResultArray = await buildTreeFromBody();
|
||||
var elementsAndResultArray = buildTreeFromBody();
|
||||
drawBoundingBoxes(elementsAndResultArray[0]);
|
||||
}
|
||||
return window.scrollY;
|
||||
}
|
||||
|
||||
async function scrollToNextPage(draw_boxes) {
|
||||
function scrollToNextPage(draw_boxes) {
|
||||
// remove bounding boxes, scroll to next page with 200px overlap, then draw bounding boxes again
|
||||
// return true if there is a next page, false otherwise
|
||||
removeBoundingBoxes();
|
||||
@@ -1565,7 +1561,7 @@ async function scrollToNextPage(draw_boxes) {
|
||||
behavior: "instant",
|
||||
});
|
||||
if (draw_boxes) {
|
||||
var elementsAndResultArray = await buildTreeFromBody();
|
||||
var elementsAndResultArray = buildTreeFromBody();
|
||||
drawBoundingBoxes(elementsAndResultArray[0]);
|
||||
}
|
||||
return window.scrollY;
|
||||
@@ -1688,7 +1684,7 @@ function stopGlobalIncrementalObserver() {
|
||||
window.globalOneTimeIncrementElements = [];
|
||||
}
|
||||
|
||||
async function getIncrementElements(frame) {
|
||||
function getIncrementElements(frame) {
|
||||
const domDepthMap = new Map();
|
||||
|
||||
for (const element of window.globalOneTimeIncrementElements) {
|
||||
@@ -1700,7 +1696,7 @@ async function getIncrementElements(frame) {
|
||||
}
|
||||
|
||||
for (const child of element.newNodes) {
|
||||
const [_, newNodeTree] = await buildElementTree(child, frame, false);
|
||||
const [_, newNodeTree] = buildElementTree(child, frame, false);
|
||||
if (newNodeTree.length > 0) {
|
||||
newNodesTreeList.push(...newNodeTree);
|
||||
}
|
||||
|
||||
@@ -337,7 +337,7 @@ async def get_interactable_element_tree_in_frame(
|
||||
|
||||
unique_id = await frame_element.get_attribute("unique_id")
|
||||
|
||||
frame_js_script = f"async () => await buildTreeFromBody('{unique_id}', true)"
|
||||
frame_js_script = f"() => buildTreeFromBody('{unique_id}', true)"
|
||||
|
||||
await frame.evaluate(JS_FUNCTION_DEFS)
|
||||
frame_elements, frame_element_tree = await frame.evaluate(frame_js_script)
|
||||
@@ -373,7 +373,7 @@ async def get_interactable_element_tree(
|
||||
:return: Tuple containing the element tree and a map of element IDs to elements.
|
||||
"""
|
||||
await page.evaluate(JS_FUNCTION_DEFS)
|
||||
main_frame_js_script = "async () => await buildTreeFromBody('main.frame', true)"
|
||||
main_frame_js_script = "() => buildTreeFromBody('main.frame', true)"
|
||||
elements, element_tree = await page.evaluate(main_frame_js_script)
|
||||
|
||||
if len(page.main_frame.child_frames) > 0:
|
||||
@@ -415,7 +415,7 @@ class IncrementalScrapePage:
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
js_script = f"async () => await getIncrementElements('{frame_id}')"
|
||||
js_script = f"() => getIncrementElements('{frame_id}')"
|
||||
incremental_elements, incremental_tree = await frame.evaluate(js_script)
|
||||
# we listen the incremental elements seperated by frames, so all elements will be in the same SkyvernFrame
|
||||
self.id_to_css_dict, self.id_to_element_dict, _ = build_element_dict(incremental_elements)
|
||||
@@ -473,7 +473,8 @@ def trim_element_tree(elements: list[dict]) -> list[dict]:
|
||||
else:
|
||||
del queue_ele["attributes"]
|
||||
# remove the tag, don't need it in the HTML tree
|
||||
del queue_ele["keepAllAttr"]
|
||||
if "keepAllAttr" in queue_ele:
|
||||
del queue_ele["keepAllAttr"]
|
||||
|
||||
if "children" in queue_ele:
|
||||
queue.extend(queue_ele["children"])
|
||||
|
||||
@@ -159,6 +159,22 @@ class SkyvernElement:
|
||||
haspopup = await self.get_attr("aria-haspopup")
|
||||
return tag_name == InteractiveElement.INPUT and role == "combobox" and haspopup == "listbox"
|
||||
|
||||
async def is_auto_completion_input(self) -> bool:
|
||||
tag_name = self.get_tag_name()
|
||||
if tag_name != InteractiveElement.INPUT:
|
||||
return False
|
||||
|
||||
haspopup = await self.get_attr("aria-haspopup")
|
||||
autocomplete = await self.get_attr("aria-autocomplete")
|
||||
if haspopup and autocomplete:
|
||||
return True
|
||||
|
||||
element_id = await self.get_attr("id")
|
||||
if element_id == "location-input":
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
async def is_checkbox(self) -> bool:
|
||||
tag_name = self.get_tag_name()
|
||||
if tag_name != "input":
|
||||
@@ -181,6 +197,9 @@ class SkyvernElement:
|
||||
async def is_selectable(self) -> bool:
|
||||
return self.get_selectable() or self.get_tag_name() in SELECTABLE_ELEMENT
|
||||
|
||||
def get_element_dict(self) -> dict:
|
||||
return self.__static_element
|
||||
|
||||
def get_scrollable(self) -> bool:
|
||||
return self.__static_element.get("isScrollable", False)
|
||||
|
||||
@@ -193,6 +212,9 @@ class SkyvernElement:
|
||||
def get_id(self) -> str:
|
||||
return self.__static_element.get("id", "")
|
||||
|
||||
def get_frame_id(self) -> str:
|
||||
return self.__static_element.get("frame", "")
|
||||
|
||||
def get_attributes(self) -> typing.Dict:
|
||||
return self.__static_element.get("attributes", {})
|
||||
|
||||
@@ -314,10 +336,15 @@ class SkyvernElement:
|
||||
if length > TEXT_PRESS_MAX_LENGTH:
|
||||
# if the text is longer than TEXT_PRESS_MAX_LENGTH characters, we will locator.fill in initial texts until the last TEXT_PRESS_MAX_LENGTH characters
|
||||
# and then type the last TEXT_PRESS_MAX_LENGTH characters with locator.press_sequentially
|
||||
await self.get_locator().fill(text[: length - TEXT_PRESS_MAX_LENGTH])
|
||||
await self.input_fill(text[: length - TEXT_PRESS_MAX_LENGTH])
|
||||
text = text[length - TEXT_PRESS_MAX_LENGTH :]
|
||||
|
||||
await self.get_locator().press_sequentially(text, delay=TEXT_INPUT_DELAY, timeout=default_timeout)
|
||||
await self.press_fill(text, timeout=default_timeout)
|
||||
|
||||
async def press_fill(
|
||||
self, text: str, timeout: float = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
|
||||
) -> None:
|
||||
await self.get_locator().press_sequentially(text, delay=TEXT_INPUT_DELAY, timeout=timeout)
|
||||
|
||||
async def input_fill(
|
||||
self, text: str, timeout: float = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
|
||||
@@ -377,6 +404,12 @@ class DomUtil:
|
||||
self.scraped_page = scraped_page
|
||||
self.page = page
|
||||
|
||||
def check_id_in_dom(self, element_id: str) -> bool:
|
||||
css_selector = self.scraped_page.id_to_css_dict.get(element_id, "")
|
||||
if css_selector:
|
||||
return True
|
||||
return False
|
||||
|
||||
async def get_skyvern_element_by_id(self, element_id: str) -> SkyvernElement:
|
||||
element = self.scraped_page.id_to_element_dict.get(element_id)
|
||||
if not element:
|
||||
|
||||
@@ -168,6 +168,10 @@ class SkyvernFrame:
|
||||
js_script = "async (element) => await getListboxOptions(element)"
|
||||
return await self.frame.evaluate(js_script, element)
|
||||
|
||||
async def parse_element_from_html(self, frame: str, element: ElementHandle, interactable: bool) -> Dict:
|
||||
js_script = "([frame, element, interactable]) => buildElementObject(frame, element, interactable)"
|
||||
return await self.frame.evaluate(js_script, [frame, element, interactable])
|
||||
|
||||
async def scroll_to_top(self, draw_boxes: bool) -> float:
|
||||
"""
|
||||
Scroll to the top of the page and take a screenshot.
|
||||
@@ -175,7 +179,7 @@ class SkyvernFrame:
|
||||
:param page: Page instance to take the screenshot from.
|
||||
:return: Screenshot of the page.
|
||||
"""
|
||||
js_script = f"async () => await scrollToTop({str(draw_boxes).lower()})"
|
||||
js_script = f"() => scrollToTop({str(draw_boxes).lower()})"
|
||||
scroll_y_px = await self.frame.evaluate(js_script)
|
||||
return scroll_y_px
|
||||
|
||||
@@ -186,7 +190,7 @@ class SkyvernFrame:
|
||||
:param page: Page instance to take the screenshot from.
|
||||
:return: Screenshot of the page.
|
||||
"""
|
||||
js_script = f"async () => await scrollToNextPage({str(draw_boxes).lower()})"
|
||||
js_script = f"() => scrollToNextPage({str(draw_boxes).lower()})"
|
||||
scroll_y_px = await self.frame.evaluate(js_script)
|
||||
return scroll_y_px
|
||||
|
||||
|
||||
Reference in New Issue
Block a user