fix selection issues (#1515)

This commit is contained in:
LawyZheng
2025-01-08 14:27:50 +08:00
committed by GitHub
parent ff8405d1d5
commit c6140fa405
8 changed files with 183 additions and 13 deletions

View File

@@ -16,6 +16,7 @@ SAVE_DOWNLOADED_FILES_TIMEOUT = 180
GET_DOWNLOADED_FILES_TIMEOUT = 30
NAVIGATION_MAX_RETRY_TIME = 5
AUTO_COMPLETION_POTENTIAL_VALUES_COUNT = 5
DROPDOWN_MENU_MAX_DISTANCE = 100
# reserved fields for navigation payload
SPECIAL_FIELD_VERIFICATION_CODE = "verification_code"

View File

@@ -267,6 +267,16 @@ async def _convert_css_shape_to_string(
try:
LOG.debug("call LLM to convert css shape to string shape", element_id=element_id)
if not await locater.is_visible(timeout=settings.BROWSER_ACTION_TIMEOUT_MS):
LOG.info(
"element is not visible on the page, going to abort conversion",
task_id=task_id,
step_id=step_id,
element_id=element_id,
key=shape_key,
)
return None
screenshot = await locater.screenshot(timeout=settings.BROWSER_SCREENSHOT_TIMEOUT_MS)
prompt = prompt_engine.load_prompt("css-shape-convert")

View File

@@ -2,7 +2,7 @@ You are performing a {{ "multi-level selection" if select_history else "selectio
You can identify the matching element based on the following guidelines:
1. Select the most suitable element based on the user goal, user details, and the context.
2. If no option is a perfect match, and there is a fallback option such as "Others" or "None of the above" in the DOM elements, you can consider it a match.
2. If none of the options perfectly match, and there is no search box for input, but there is a fallback option such as "Others" or "None of the above" in the DOM elements, you can consider it a match.
3. If a field is required, do not leave it blank.
4. If a field is required, do not select a placeholder value, such as "Please select", "-", or "Select...".
5. Exclude loading indicators like "loading more results" as valid options.{% if select_history %}

View File

@@ -2,7 +2,7 @@ There is a screenshot from a part of a web HTML page. Help me confirm if it is a
An open dropdown menu can be defined as:
- At least one option is visible in the screenshot.
- A calendar view or date picker could be considered as an open dropdown menu.
- A calendar view could be considered as an open dropdown menu. But DO NOT consider an calendar icon as the dropdown menu.
- Do not consider it an open dropdown menu if the only visible option displays a message like "No results" or "No match".
- Do not consider it an open dropdown menu if the only visible element displays a placeholder like "Please select", "-", or "Select...".

View File

@@ -17,6 +17,7 @@ from skyvern.config import settings
from skyvern.constants import (
AUTO_COMPLETION_POTENTIAL_VALUES_COUNT,
BROWSER_DOWNLOAD_TIMEOUT,
DROPDOWN_MENU_MAX_DISTANCE,
REPO_ROOT_DIR,
SKYVERN_ID_ATTR,
)
@@ -579,6 +580,16 @@ async def handle_input_text_action(
# press arrowdown to watch if there's any options popping up
await incremental_scraped.start_listen_dom_increment()
try:
await skyvern_element.input_clear()
except Exception:
LOG.info(
"Failed to clear up the input, but continue to input",
task_id=task.task_id,
step_id=step.step_id,
element_id=skyvern_element.get_id(),
)
try:
await skyvern_element.press_key("ArrowDown")
except TimeoutError:
@@ -613,6 +624,7 @@ async def handle_input_text_action(
action=select_action,
page=page,
dom=dom,
skyvern_element=skyvern_element,
skyvern_frame=skyvern_frame,
incremental_scraped=incremental_scraped,
step=step,
@@ -641,14 +653,29 @@ async def handle_input_text_action(
)
except Exception:
await skyvern_element.scroll_into_view()
LOG.warning(
"Failed to do custom selection transformed from input action, continue to input text",
exc_info=True,
task_id=task.task_id,
step_id=step.step_id,
)
await skyvern_element.scroll_into_view()
finally:
blocking_element, exist = await skyvern_element.find_blocking_element(
dom=dom, incremental_page=incremental_scraped
)
if blocking_element and exist:
LOG.info(
"Find a blocking element to the current element, going to blur the blocking element first",
task_id=task.task_id,
step_id=step.step_id,
blocking_element=blocking_element.get_locator(),
)
if await blocking_element.get_locator().count():
await blocking_element.press_key("Escape")
if await blocking_element.get_locator().count():
await blocking_element.blur()
await skyvern_element.press_key("Escape")
await skyvern_element.blur()
await incremental_scraped.stop_listen_dom_increment()
@@ -672,6 +699,25 @@ async def handle_input_text_action(
await skyvern_element.press_fill(text=text)
return [ActionSuccess()]
# wait 2s for blocking element to show up
await asyncio.sleep(2)
try:
blocking_element, exist = await skyvern_element.find_blocking_element(
dom=dom, incremental_page=incremental_scraped
)
if blocking_element and exist:
LOG.warning(
"Find a blocking element to the current element, going to input on the blocking element",
)
skyvern_element = blocking_element
except Exception:
LOG.info(
"Failed to find the blocking element, continue with the orignal element",
exc_info=True,
task_id=task.task_id,
step_id=step.step_id,
)
try:
# TODO: not sure if this case will trigger auto-completion
if tag_name not in COMMON_INPUT_TAGS:
@@ -1032,6 +1078,7 @@ async def handle_select_option_action(
action=action,
page=page,
dom=dom,
skyvern_element=skyvern_element,
skyvern_frame=skyvern_frame,
incremental_scraped=incremental_scraped,
step=step,
@@ -1087,6 +1134,7 @@ async def handle_select_option_action(
value=suggested_value,
page=page,
dom=dom,
skyvern_element=skyvern_element,
skyvern_frame=skyvern_frame,
incremental_scraped=incremental_scraped,
task=task,
@@ -1765,6 +1813,7 @@ async def sequentially_select_from_dropdown(
action: SelectOptionAction,
page: Page,
dom: DomUtil,
skyvern_element: SkyvernElement,
skyvern_frame: SkyvernFrame,
incremental_scraped: IncrementalScrapePage,
step: Step,
@@ -1812,6 +1861,7 @@ async def sequentially_select_from_dropdown(
single_select_result = await select_from_dropdown(
context=input_or_select_context,
page=page,
skyvern_element=skyvern_element,
skyvern_frame=skyvern_frame,
incremental_scraped=incremental_scraped,
check_exist_funcs=check_exist_funcs,
@@ -1887,6 +1937,7 @@ def build_sequential_select_history(history_list: list[CustomSingleSelectResult]
async def select_from_dropdown(
context: InputOrSelectContext,
page: Page,
skyvern_element: SkyvernElement,
skyvern_frame: SkyvernFrame,
incremental_scraped: IncrementalScrapePage,
check_exist_funcs: list[CheckExistIDFunc],
@@ -1911,6 +1962,7 @@ async def select_from_dropdown(
if dropdown_menu_element is None:
dropdown_menu_element = await locate_dropdown_menu(
current_anchor_element=skyvern_element,
incremental_scraped=incremental_scraped,
step=step,
task=task,
@@ -2059,6 +2111,7 @@ async def select_from_dropdown(
async def select_from_dropdown_by_value(
value: str,
page: Page,
skyvern_element: SkyvernElement,
skyvern_frame: SkyvernFrame,
dom: DomUtil,
incremental_scraped: IncrementalScrapePage,
@@ -2078,6 +2131,7 @@ async def select_from_dropdown_by_value(
if dropdown_menu_element is None:
dropdown_menu_element = await locate_dropdown_menu(
current_anchor_element=skyvern_element,
incremental_scraped=incremental_scraped,
step=step,
task=task,
@@ -2131,6 +2185,7 @@ async def select_from_dropdown_by_value(
async def locate_dropdown_menu(
current_anchor_element: SkyvernElement,
incremental_scraped: IncrementalScrapePage,
step: Step,
task: Task,
@@ -2164,6 +2219,30 @@ async def locate_dropdown_menu(
)
continue
try:
if not await head_element.is_next_to_element(
target_locator=current_anchor_element.get_locator(),
max_x_distance=DROPDOWN_MENU_MAX_DISTANCE,
max_y_distance=DROPDOWN_MENU_MAX_DISTANCE,
):
LOG.debug(
"Skip the element since it's too far away from the anchor element",
step_id=step.step_id,
task_id=task.task_id,
element_id=element_id,
)
continue
except Exception:
LOG.info(
"Failed to calculate the distance between the elements",
element_id=element_id,
step_id=step.step_id,
task_id=task.task_id,
exc_info=True,
)
continue
if not await skyvern_frame.get_element_visible(await head_element.get_element_handler()):
LOG.debug(
"Skip the element since it's invisible",

View File

@@ -770,6 +770,21 @@ const isComboboxDropdown = (element) => {
return role && haspopup && controls && readonly;
};
const isDivComboboxDropdown = (element) => {
const tagName = element.tagName.toLowerCase();
if (tagName !== "div") {
return false;
}
const role = element.getAttribute("role")
? element.getAttribute("role").toLowerCase()
: "";
const haspopup = element.getAttribute("aria-haspopup")
? element.getAttribute("aria-haspopup").toLowerCase()
: "";
const controls = element.hasAttribute("aria-controls");
return role === "combobox" && controls && haspopup;
};
const isDropdownButton = (element) => {
const tagName = element.tagName.toLowerCase();
const type = element.getAttribute("type")
@@ -1182,6 +1197,7 @@ function buildElementObject(frame, element, interactable, purgeable = false) {
elementTagNameLower === "svg" || element.closest("svg") !== null,
isSelectable:
elementTagNameLower === "select" ||
isDivComboboxDropdown(element) ||
isDropdownButton(element) ||
isAngularDropdown(element) ||
isSelect2Dropdown(element) ||
@@ -2083,9 +2099,12 @@ if (window.globalObserverForDOMIncrement === undefined) {
}
if (mutation.attributeName === "class") {
const node = mutation.target;
if (node.nodeType === Node.TEXT_NODE) continue;
if (node.tagName.toLowerCase() === "body") continue;
if (!mutation.oldValue) continue;
if (
!mutation.oldValue ||
!isClassNameIncludesHidden(mutation.oldValue)
!isClassNameIncludesHidden(mutation.oldValue) &&
!node.hasAttribute("data-menu-uid") // google framework use this to trace dropdown menu
)
continue;
const newStyle = getElementComputedStyle(node);

View File

@@ -541,6 +541,12 @@ class IncrementalScrapePage:
self.element_tree_trimmed: list[dict] = list()
self.skyvern_frame = skyvern_frame
def check_id_in_page(self, element_id: str) -> bool:
css_selector = self.id_to_css_dict.get(element_id, "")
if css_selector:
return True
return False
async def get_incremental_element_tree(
self,
cleanup_element_tree: CleanupElementTreeFunc,

View File

@@ -318,12 +318,21 @@ class SkyvernElement:
assert handler is not None
return handler
async def find_blocking_element(self, dom: DomUtil) -> tuple[SkyvernElement | None, bool]:
async def find_blocking_element(
self, dom: DomUtil, incremental_page: IncrementalScrapePage | None = None
) -> tuple[SkyvernElement | None, bool]:
skyvern_frame = await SkyvernFrame.create_instance(self.get_frame())
blocking_element_id, blocked = await skyvern_frame.get_blocking_element_id(await self.get_element_handler())
if not blocking_element_id:
return None, blocked
return await dom.get_skyvern_element_by_id(blocking_element_id), blocked
if dom.check_id_in_dom(blocking_element_id):
return await dom.get_skyvern_element_by_id(blocking_element_id), blocked
if incremental_page and incremental_page.check_id_in_page(blocking_element_id):
return await SkyvernElement.create_from_incremental(incremental_page, blocking_element_id), blocked
return None, blocked
async def find_element_in_label_children(
self, dom: DomUtil, element_type: InteractiveElement
@@ -590,10 +599,9 @@ class SkyvernElement:
await self.focus(timeout=timeout)
await asyncio.sleep(2) # wait for scrolling into the target
async def calculate_vertical_distance_to(
async def calculate_min_y_distance_to(
self,
target_locator: Locator,
mode: typing.Literal["inner", "outer"],
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> float:
self_rect = await self.get_locator().bounding_box(timeout=timeout)
@@ -604,10 +612,57 @@ class SkyvernElement:
if self_rect is None or target_rect is None:
raise Exception("Can't get the target element rect")
if mode == "inner":
return abs(self_rect["y"] + self_rect["height"] - target_rect["y"])
else:
return abs(self_rect["y"] - (target_rect["y"] + target_rect["height"]))
y_1 = self_rect["y"] + self_rect["height"] - target_rect["y"]
y_2 = self_rect["y"] - (target_rect["y"] + target_rect["height"])
# if y1 * y2 <= 0, it means the two elements are overlapping
if y_1 * y_2 <= 0:
return 0
return min(
abs(y_1),
abs(y_2),
)
async def calculate_min_x_distance_to(
self,
target_locator: Locator,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> float:
self_rect = await self.get_locator().bounding_box(timeout=timeout)
if self_rect is None:
raise Exception("Can't Skyvern element rect")
target_rect = await target_locator.bounding_box(timeout=timeout)
if self_rect is None or target_rect is None:
raise Exception("Can't get the target element rect")
x_1 = self_rect["x"] + self_rect["width"] - target_rect["x"]
x_2 = self_rect["x"] - (target_rect["x"] + target_rect["width"])
# if x1 * x2 <= 0, it means the two elements are overlapping
if x_1 * x_2 <= 0:
return 0
return min(
abs(x_1),
abs(x_2),
)
async def is_next_to_element(
self,
target_locator: Locator,
max_x_distance: float = 0,
max_y_distance: float = 0,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> bool:
if max_x_distance > 0 and await self.calculate_min_x_distance_to(target_locator, timeout) > max_x_distance:
return False
if max_y_distance > 0 and await self.calculate_min_y_distance_to(target_locator, timeout) > max_y_distance:
return False
return True
class DomUtil: