support value select for custom selection (#756)

This commit is contained in:
LawyZheng
2024-08-30 01:24:38 +08:00
committed by GitHub
parent 2e11f24f5c
commit 9e0201627b
7 changed files with 331 additions and 73 deletions

View File

@@ -1,7 +1,7 @@
import asyncio
import copy
import hashlib
from typing import Awaitable, Callable, Dict, List
from typing import Dict, List
import structlog
from playwright.async_api import Page
@@ -14,9 +14,7 @@ from skyvern.forge.prompts import prompt_engine
from skyvern.forge.sdk.models import Organization, Step, StepStatus
from skyvern.forge.sdk.schemas.tasks import Task, TaskStatus
from skyvern.webeye.browser_factory import BrowserState
from skyvern.webeye.scraper.scraper import ELEMENT_NODE_ATTRIBUTES, json_to_html
CleanupElementTreeFunc = Callable[[str, list[dict]], Awaitable[list[dict]]]
from skyvern.webeye.scraper.scraper import ELEMENT_NODE_ATTRIBUTES, CleanupElementTreeFunc, json_to_html
LOG = structlog.get_logger()

View File

@@ -13,6 +13,7 @@ Reply in JSON format with the following keys:
"reasoning": str, // The reasoning behind the action. Be specific, referencing target value and element ids in your reasoning. Mention why you chose the element id. Keep the reasoning short and to the point.
"confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
"id": str, // The id of the element to take action on. The id has to be one from the elements list
"value": str, // The value to select.
}
Context:

View File

@@ -67,6 +67,7 @@ from skyvern.webeye.actions.actions import (
from skyvern.webeye.actions.responses import ActionFailure, ActionResult, ActionSuccess
from skyvern.webeye.browser_factory import BrowserState, get_download_dir
from skyvern.webeye.scraper.scraper import (
CleanupElementTreeFunc,
ElementTreeFormat,
IncrementalScrapePage,
ScrapedPage,
@@ -80,6 +81,28 @@ LOG = structlog.get_logger()
COMMON_INPUT_TAGS = {"input", "textarea", "select"}
def remove_exist_elements(dom: DomUtil, element_tree: list[dict]) -> list[dict]:
new_element_tree = []
for element in element_tree:
children_elements = element.get("children", [])
if len(children_elements) > 0:
children_elements = remove_exist_elements(dom=dom, element_tree=children_elements)
if dom.check_id_in_dom(element.get("id", "")):
new_element_tree.extend(children_elements)
else:
element["children"] = children_elements
new_element_tree.append(element)
return new_element_tree
def clean_and_remove_element_tree_factory(task: Task, step: Step, dom: DomUtil) -> CleanupElementTreeFunc:
async def helper_func(url: str, element_tree: list[dict]) -> list[dict]:
element_tree = await app.AGENT_FUNCTION.cleanup_element_tree_factory(task=task, step=step)(url, element_tree)
return remove_exist_elements(dom=dom, element_tree=element_tree)
return helper_func
class AutoCompletionResult(BaseModel):
auto_completion_attempt: bool = False
incremental_elements: list[dict] = []
@@ -345,7 +368,7 @@ async def handle_input_text_action(
await asyncio.sleep(5)
incremental_element = await incremental_scraped.get_incremental_element_tree(
app.AGENT_FUNCTION.cleanup_element_tree_factory(task=task, step=step)
clean_and_remove_element_tree_factory(task=task, step=step, dom=dom),
)
if len(incremental_element) == 0:
LOG.info(
@@ -357,7 +380,8 @@ async def handle_input_text_action(
)
else:
try:
result = await select_from_dropdown(
# TODO: we don't select by value for the auto completion detect case
result, _ = await select_from_dropdown(
action=select_action,
page=page,
dom=dom,
@@ -613,6 +637,8 @@ async def handle_select_option_action(
skyvern_frame = await SkyvernFrame.create_instance(skyvern_element.get_frame())
incremental_scraped = IncrementalScrapePage(skyvern_frame=skyvern_frame)
is_open = False
suggested_value: str | None = None
results: list[ActionResult] = []
try:
await incremental_scraped.start_listen_dom_increment()
@@ -635,12 +661,12 @@ async def handle_select_option_action(
is_open = True
incremental_element = await incremental_scraped.get_incremental_element_tree(
app.AGENT_FUNCTION.cleanup_element_tree_factory(step=step, task=task)
clean_and_remove_element_tree_factory(task=task, step=step, dom=dom),
)
if len(incremental_element) == 0:
raise NoIncrementalElementFoundForCustomSelection(element_id=action.element_id)
result = await select_from_dropdown(
result, suggested_value = await select_from_dropdown(
action=action,
page=page,
dom=dom,
@@ -654,16 +680,73 @@ async def handle_select_option_action(
)
# force_select won't return None result
assert result is not None
return [result]
results.append(result)
if isinstance(result, ActionSuccess) or suggested_value is None:
return results
except Exception as e:
if is_open:
LOG.exception("Custom select error")
results.append(ActionFailure(exception=e))
return results
finally:
if is_open and len(results) > 0 and not isinstance(results[-1], ActionSuccess):
await skyvern_element.scroll_into_view()
await skyvern_element.coordinate_click(page=page)
await skyvern_element.get_locator().press("Escape", timeout=timeout)
LOG.exception("Custom select error")
return [ActionFailure(exception=e)]
await skyvern_element.press_key("Escape")
await skyvern_element.blur()
await incremental_scraped.stop_listen_dom_increment()
LOG.info(
"Try to select by value in custom select",
element_id=skyvern_element.get_id(),
value=suggested_value,
task_id=task.task_id,
step_id=step.step_id,
)
try:
await incremental_scraped.start_listen_dom_increment()
timeout = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
await skyvern_element.focus()
try:
await skyvern_element.get_locator().click(timeout=timeout)
except Exception:
LOG.info(
"fail to open dropdown by clicking, try to press arrow down to open",
element_id=skyvern_element.get_id(),
task_id=task.task_id,
step_id=step.step_id,
)
await skyvern_element.focus()
await skyvern_element.press_key("ArrowDown")
await asyncio.sleep(5)
is_open = True
result = await select_from_dropdown_by_value(
value=suggested_value,
page=page,
dom=dom,
skyvern_frame=skyvern_frame,
incremental_scraped=incremental_scraped,
llm_handler=app.SECONDARY_LLM_API_HANDLER,
task=task,
step=step,
)
results.append(result)
return results
except Exception as e:
LOG.exception("Custom select by value error")
results.append(ActionFailure(exception=e))
return results
finally:
if is_open and len(results) > 0 and not isinstance(results[-1], ActionSuccess):
await skyvern_element.scroll_into_view()
await skyvern_element.coordinate_click(page=page)
await skyvern_element.press_key("Escape")
await skyvern_element.blur()
await incremental_scraped.stop_listen_dom_increment()
@@ -891,20 +974,6 @@ async def chain_click(
return [ActionFailure(WrongElementToUploadFile(action.element_id))]
def remove_exist_elements(dom: DomUtil, element_tree: list[dict]) -> list[dict]:
new_element_tree = []
for element in element_tree:
children_elements = element.get("children", [])
if len(children_elements) > 0:
children_elements = remove_exist_elements(dom=dom, element_tree=children_elements)
if dom.check_id_in_dom(element.get("id", "")):
new_element_tree.extend(children_elements)
else:
element["children"] = children_elements
new_element_tree.append(element)
return new_element_tree
async def choose_auto_completion_dropdown(
action: actions.InputTextAction,
page: Page,
@@ -930,9 +999,8 @@ async def choose_auto_completion_dropdown(
# wait for new elemnts to load
await asyncio.sleep(5)
incremental_element = await incremental_scraped.get_incremental_element_tree(
app.AGENT_FUNCTION.cleanup_element_tree_factory(task=task, step=step)
clean_and_remove_element_tree_factory(task=task, step=step, dom=dom),
)
incremental_element = remove_exist_elements(dom=dom, element_tree=incremental_element)
# check if elements in preserve list are still on the page
confirmed_preserved_list: list[dict] = []
@@ -1178,14 +1246,14 @@ async def select_from_dropdown(
step: Step,
task: Task,
force_select: bool = False,
) -> ActionResult | None:
) -> tuple[ActionResult | None, str | None]:
"""
force_select is used to choose an element to click even there's no dropdown menu
None will be only returned when force_select is false and no dropdown menu popped
"""
timeout = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
dropdown_menu_element = await locate_dropdown_meanu(
dropdown_menu_element = await locate_dropdown_menu(
incremental_scraped=incremental_scraped,
element_trees=element_trees,
llm_handler=llm_handler,
@@ -1194,7 +1262,7 @@ async def select_from_dropdown(
)
if not force_select and dropdown_menu_element is None:
return None
return None, None
if dropdown_menu_element and await skyvern_frame.get_element_scrollable(
await dropdown_menu_element.get_element_handler()
@@ -1209,9 +1277,8 @@ async def select_from_dropdown(
)
trimmed_element_tree = await incremental_scraped.get_incremental_element_tree(
app.AGENT_FUNCTION.cleanup_element_tree_factory(step=step, task=task)
clean_and_remove_element_tree_factory(task=task, step=step, dom=dom),
)
trimmed_element_tree = remove_exist_elements(dom=dom, element_tree=trimmed_element_tree)
html = incremental_scraped.build_html_tree(element_tree=trimmed_element_tree)
@@ -1238,22 +1305,114 @@ async def select_from_dropdown(
task_id=task.task_id,
)
value: str | None = json_response.get("value", None)
element_id: str | None = json_response.get("id", None)
if not element_id:
raise NoElementMatchedForTargetOption(target=target_value, reason=json_response.get("reasoning"))
selected_element = await SkyvernElement.create_from_incremental(incremental_scraped, element_id)
await selected_element.scroll_into_view()
await selected_element.get_locator().click(timeout=timeout)
return ActionSuccess()
try:
selected_element = await SkyvernElement.create_from_incremental(incremental_scraped, element_id)
await selected_element.scroll_into_view()
await selected_element.get_locator().click(timeout=timeout)
return ActionSuccess(), None
except MissingElement:
if not value:
raise
# sometimes we have multiple elements pointed to the same value,
# but only one option is clickable on the page
LOG.debug(
"Searching option with the same value in incremetal elements",
value=value,
elements=incremental_scraped.element_tree,
)
locator = await incremental_scraped.select_one_element_by_value(value=value)
if not locator:
return ActionFailure(exception=MissingElement()), value
try:
LOG.info(
"Find an alternative option with the same value. Try to select the option.",
value=value,
)
await locator.click(timeout=timeout)
return ActionSuccess(), value
except Exception as e:
return ActionFailure(exception=e), value
async def locate_dropdown_meanu(
async def select_from_dropdown_by_value(
value: str,
page: Page,
skyvern_frame: SkyvernFrame,
dom: DomUtil,
incremental_scraped: IncrementalScrapePage,
llm_handler: LLMAPIHandler,
task: Task,
step: Step,
) -> ActionResult:
timeout = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
element_trees = await incremental_scraped.get_incremental_element_tree(
clean_and_remove_element_tree_factory(task=task, step=step, dom=dom),
)
element_locator = await incremental_scraped.select_one_element_by_value(value=value)
if element_locator is not None:
await element_locator.click(timeout=timeout)
return ActionSuccess()
dropdown_menu_element = await locate_dropdown_menu(
incremental_scraped=incremental_scraped,
element_trees=element_trees,
llm_handler=llm_handler,
step=step,
task=task,
)
if not dropdown_menu_element or not await skyvern_frame.get_element_scrollable(
await dropdown_menu_element.get_element_handler()
):
raise NoElementMatchedForTargetOption(target=value, reason="No value matched and element is not scrollable")
selected: bool = False
async def continue_callback(incre_scraped: IncrementalScrapePage) -> bool:
await incre_scraped.get_incremental_element_tree(
clean_and_remove_element_tree_factory(task=task, step=step, dom=dom),
)
element_locator = await incre_scraped.select_one_element_by_value(value=value)
if element_locator is not None:
await element_locator.click(timeout=timeout)
nonlocal selected
selected = True
return False
return True
await scroll_down_to_load_all_options(
dropdown_menu_element=dropdown_menu_element,
page=page,
skyvern_frame=skyvern_frame,
incremental_scraped=incremental_scraped,
step=step,
task=task,
page_by_page=True,
is_continue=continue_callback,
)
if selected:
return ActionSuccess()
raise NoElementMatchedForTargetOption(target=value, reason="No value matched after scrolling")
async def locate_dropdown_menu(
incremental_scraped: IncrementalScrapePage,
element_trees: list[dict],
llm_handler: LLMAPIHandler,
step: Step | None = None,
task: Task | None = None,
step: Step,
task: Task,
) -> SkyvernElement | None:
for idx, element_dict in enumerate(element_trees):
# FIXME: confirm max to 10 nodes for now, preventing sendindg too many requests to LLM
@@ -1262,27 +1421,45 @@ async def locate_dropdown_meanu(
element_id = element_dict.get("id")
if not element_id:
LOG.info(
LOG.debug(
"Skip the non-interactable element for the dropdown menu confirm",
step_id=step.step_id if step else "none",
task_id=task.task_id if task else "none",
step_id=step.step_id,
task_id=task.task_id,
element=element_dict,
)
continue
head_element = await SkyvernElement.create_from_incremental(incremental_scraped, element_id)
try:
head_element = await SkyvernElement.create_from_incremental(incremental_scraped, element_id)
except Exception:
LOG.debug(
"Failed to get head element in the incremental page",
element_id=element_id,
step_id=step.step_id,
task_id=task.task_id,
exc_info=True,
)
continue
screenshot = await head_element.get_locator().screenshot(
timeout=SettingsManager.get_settings().BROWSER_SCREENSHOT_TIMEOUT_MS
)
dropdown_confirm_prompt = prompt_engine.load_prompt("opened-dropdown-confirm")
LOG.info(
LOG.debug(
"Confirm if it's an opened dropdown menu",
step_id=step.step_id if step else "none",
task_id=task.task_id if task else "none",
step_id=step.step_id,
task_id=task.task_id,
element=element_dict,
)
json_response = await llm_handler(prompt=dropdown_confirm_prompt, screenshots=[screenshot], step=step)
is_opened_dropdown_menu = json_response.get("is_opened_dropdown_menu")
if is_opened_dropdown_menu:
LOG.info(
"Opened dropdown menu found",
step_id=step.step_id,
task_id=task.task_id,
element_id=element_id,
)
return await SkyvernElement.create_from_incremental(incre_page=incremental_scraped, element_id=element_id)
return None
@@ -1294,6 +1471,8 @@ async def scroll_down_to_load_all_options(
incremental_scraped: IncrementalScrapePage,
step: Step | None = None,
task: Task | None = None,
page_by_page: bool = False,
is_continue: Callable[[IncrementalScrapePage], Awaitable[bool]] | None = None,
) -> None:
LOG.info(
"Scroll down the dropdown menu to load all options",
@@ -1325,8 +1504,7 @@ async def scroll_down_to_load_all_options(
await page.mouse.wheel(0, scroll_interval)
scroll_pace += scroll_interval
else:
await skyvern_frame.scroll_to_element_bottom(dropdown_menu_element_handle)
# wait for the options to be fully loaded
await skyvern_frame.scroll_to_element_bottom(dropdown_menu_element_handle, page_by_page)
await asyncio.sleep(2)
# scoll a little back and scoll down to trigger the loading
@@ -1342,6 +1520,10 @@ async def scroll_down_to_load_all_options(
step_id=step.step_id if step else "none",
task_id=task.task_id if task else "none",
)
if is_continue is not None and not await is_continue(incremental_scraped):
return
if previous_num == current_num:
break
previous_num = current_num

View File

@@ -496,6 +496,17 @@ const isComboboxDropdown = (element) => {
return role && haspopup && controls && readonly;
};
const isDropdownButton = (element) => {
const tagName = element.tagName.toLowerCase();
const type = element.getAttribute("type")
? element.getAttribute("type").toLowerCase()
: "";
const haspopup = element.getAttribute("aria-haspopup")
? element.getAttribute("aria-haspopup").toLowerCase()
: "";
return tagName === "button" && type === "button" && haspopup === "listbox";
};
const isSelect2Dropdown = (element) => {
const tagName = element.tagName.toLowerCase();
const className = element.className.toString();
@@ -934,6 +945,7 @@ function buildElementObject(frame, element, interactable) {
elementTagNameLower === "svg" || element.closest("svg") !== null,
isSelectable:
elementTagNameLower === "select" ||
isDropdownButton(element) ||
isSelect2Dropdown(element) ||
isSelect2MultiChoice(element),
};
@@ -1566,9 +1578,12 @@ function scrollToNextPage(draw_boxes) {
return window.scrollY;
}
function scrollToElementBottom(element) {
function scrollToElementBottom(element, page_by_page = false) {
const top = page_by_page
? element.clientHeight + element.scrollTop
: element.scrollHeight;
element.scroll({
top: element.scrollHeight,
top: top,
left: 0,
behavior: "smooth",
});

View File

@@ -16,6 +16,7 @@ from skyvern.webeye.browser_factory import BrowserState
from skyvern.webeye.utils.page import SkyvernFrame
LOG = structlog.get_logger()
CleanupElementTreeFunc = Callable[[str, list[dict]], Awaitable[list[dict]]]
RESERVED_ATTRIBUTES = {
"accept", # for input file
@@ -171,7 +172,7 @@ class ScrapedPage(BaseModel):
async def scrape_website(
browser_state: BrowserState,
url: str,
cleanup_element_tree: Callable[[str, list[dict]], Awaitable[list[dict]]],
cleanup_element_tree: CleanupElementTreeFunc,
num_retry: int = 0,
scrape_exclude: Callable[[Page, Frame], Awaitable[bool]] | None = None,
) -> ScrapedPage:
@@ -251,7 +252,7 @@ async def get_frame_text(iframe: Frame) -> str:
async def scrape_web_unsafe(
browser_state: BrowserState,
url: str,
cleanup_element_tree: Callable[[str, list[dict]], Awaitable[list[dict]]],
cleanup_element_tree: CleanupElementTreeFunc,
scrape_exclude: Callable[[Page, Frame], Awaitable[bool]] | None = None,
) -> ScrapedPage:
"""
@@ -398,7 +399,7 @@ class IncrementalScrapePage:
async def get_incremental_element_tree(
self,
cleanup_element_tree: Callable[[str, list[dict]], Awaitable[list[dict]]],
cleanup_element_tree: CleanupElementTreeFunc,
) -> list[dict]:
frame = self.skyvern_frame.get_frame()
@@ -429,23 +430,54 @@ class IncrementalScrapePage:
js_script = "() => window.globalOneTimeIncrementElements.length"
return await self.skyvern_frame.get_frame().evaluate(js_script)
async def __validate_element_by_value(self, value: str, element: dict) -> tuple[Locator | None, bool]:
"""
Locator: the locator of the matched element. None if no valid element to interact;
bool: is_matched. True, found an intercatable alternative one; False, not found any alternative;
If is_matched is True, but Locator is None. It means the value is matched, but the current element is non-interactable
"""
interactable = element.get("interactable", False)
element_id = element.get("id", "")
parent_locator: Locator | None = None
if element_id:
parent_locator = self.skyvern_frame.get_frame().locator(f'[{SKYVERN_ID_ATTR}="{element_id}"]')
# DFS to validate the children first:
# if the child element matched and is interactable, return the child node directly
# if the child element matched value but not interactable, try to interact with the parent node
children = element.get("children", [])
for child in children:
child_locator, is_match = await self.__validate_element_by_value(value, child)
if is_match:
if child_locator:
return child_locator, True
if interactable and parent_locator and await parent_locator.count() > 0:
return parent_locator, True
return None, True
if not parent_locator:
return None, False
text = element.get("text", "")
if text != value:
return None, False
if await parent_locator.count() == 0:
return None, False
if not interactable:
return None, True
return parent_locator, True
async def select_one_element_by_value(self, value: str) -> Locator | None:
for element in self.elements:
element_id = element.get("id", "")
if not element_id:
continue
if not element.get("interactable", False):
continue
text = element.get("text", "")
if text != value:
continue
locator = self.skyvern_frame.get_frame().locator(f'[{SKYVERN_ID_ATTR}="{element_id}"]')
if await locator.count() > 0:
for element in self.element_tree:
locator, _ = await self.__validate_element_by_value(value=value, element=element)
if locator:
return locator
return None
def build_html_tree(self, element_tree: list[dict] | None = None) -> str:

View File

@@ -320,6 +320,36 @@ class SkyvernElement:
index += 1
return None
async def find_interactable_anchor_child(
self, dom: DomUtil, element_type: InteractiveElement
) -> SkyvernElement | None:
index = 0
queue = [self]
while index < len(queue):
item = queue[index]
if item.is_interactable() and item.get_tag_name() == element_type:
return item
try:
for_element = await item.find_label_for(dom=dom)
if for_element is not None and for_element.get_tag_name() == element_type:
return for_element
except Exception:
LOG.error(
"Failed to find element by label-for",
element=item.__static_element,
exc_info=True,
)
children: list[dict] = item.__static_element.get("children", [])
for child in children:
child_id = child.get("id", "")
child_element = await dom.get_skyvern_element_by_id(child_id)
queue.append(child_element)
index += 1
return None
async def get_attr(
self,
attr_name: str,

View File

@@ -145,9 +145,9 @@ class SkyvernFrame:
async with asyncio.timeout(timeout):
return await self.frame.content()
async def scroll_to_element_bottom(self, element: ElementHandle) -> None:
js_script = "(element) => scrollToElementBottom(element)"
return await self.frame.evaluate(js_script, element)
async def scroll_to_element_bottom(self, element: ElementHandle, page_by_page: bool = False) -> None:
js_script = "([element, page_by_page]) => scrollToElementBottom(element, page_by_page)"
return await self.frame.evaluate(js_script, [element, page_by_page])
async def scroll_to_element_top(self, element: ElementHandle) -> None:
js_script = "(element) => scrollToElementTop(element)"