fix selection issues (#1515)

This commit is contained in:
LawyZheng
2025-01-08 14:27:50 +08:00
committed by GitHub
parent ff8405d1d5
commit c6140fa405
8 changed files with 183 additions and 13 deletions

View File

@@ -16,6 +16,7 @@ SAVE_DOWNLOADED_FILES_TIMEOUT = 180
GET_DOWNLOADED_FILES_TIMEOUT = 30 GET_DOWNLOADED_FILES_TIMEOUT = 30
NAVIGATION_MAX_RETRY_TIME = 5 NAVIGATION_MAX_RETRY_TIME = 5
AUTO_COMPLETION_POTENTIAL_VALUES_COUNT = 5 AUTO_COMPLETION_POTENTIAL_VALUES_COUNT = 5
DROPDOWN_MENU_MAX_DISTANCE = 100
# reserved fields for navigation payload # reserved fields for navigation payload
SPECIAL_FIELD_VERIFICATION_CODE = "verification_code" SPECIAL_FIELD_VERIFICATION_CODE = "verification_code"

View File

@@ -267,6 +267,16 @@ async def _convert_css_shape_to_string(
try: try:
LOG.debug("call LLM to convert css shape to string shape", element_id=element_id) LOG.debug("call LLM to convert css shape to string shape", element_id=element_id)
if not await locater.is_visible(timeout=settings.BROWSER_ACTION_TIMEOUT_MS):
LOG.info(
"element is not visible on the page, going to abort conversion",
task_id=task_id,
step_id=step_id,
element_id=element_id,
key=shape_key,
)
return None
screenshot = await locater.screenshot(timeout=settings.BROWSER_SCREENSHOT_TIMEOUT_MS) screenshot = await locater.screenshot(timeout=settings.BROWSER_SCREENSHOT_TIMEOUT_MS)
prompt = prompt_engine.load_prompt("css-shape-convert") prompt = prompt_engine.load_prompt("css-shape-convert")

View File

@@ -2,7 +2,7 @@ You are performing a {{ "multi-level selection" if select_history else "selectio
You can identify the matching element based on the following guidelines: You can identify the matching element based on the following guidelines:
1. Select the most suitable element based on the user goal, user details, and the context. 1. Select the most suitable element based on the user goal, user details, and the context.
2. If no option is a perfect match, and there is a fallback option such as "Others" or "None of the above" in the DOM elements, you can consider it a match. 2. If none of the options perfectly match, and there is no search box for input, but there is a fallback option such as "Others" or "None of the above" in the DOM elements, you can consider it a match.
3. If a field is required, do not leave it blank. 3. If a field is required, do not leave it blank.
4. If a field is required, do not select a placeholder value, such as "Please select", "-", or "Select...". 4. If a field is required, do not select a placeholder value, such as "Please select", "-", or "Select...".
5. Exclude loading indicators like "loading more results" as valid options.{% if select_history %} 5. Exclude loading indicators like "loading more results" as valid options.{% if select_history %}

View File

@@ -2,7 +2,7 @@ There is a screenshot from a part of a web HTML page. Help me confirm if it is a
An open dropdown menu can be defined as: An open dropdown menu can be defined as:
- At least one option is visible in the screenshot. - At least one option is visible in the screenshot.
- A calendar view or date picker could be considered as an open dropdown menu. - A calendar view could be considered as an open dropdown menu. But DO NOT consider an calendar icon as the dropdown menu.
- Do not consider it an open dropdown menu if the only visible option displays a message like "No results" or "No match". - Do not consider it an open dropdown menu if the only visible option displays a message like "No results" or "No match".
- Do not consider it an open dropdown menu if the only visible element displays a placeholder like "Please select", "-", or "Select...". - Do not consider it an open dropdown menu if the only visible element displays a placeholder like "Please select", "-", or "Select...".

View File

@@ -17,6 +17,7 @@ from skyvern.config import settings
from skyvern.constants import ( from skyvern.constants import (
AUTO_COMPLETION_POTENTIAL_VALUES_COUNT, AUTO_COMPLETION_POTENTIAL_VALUES_COUNT,
BROWSER_DOWNLOAD_TIMEOUT, BROWSER_DOWNLOAD_TIMEOUT,
DROPDOWN_MENU_MAX_DISTANCE,
REPO_ROOT_DIR, REPO_ROOT_DIR,
SKYVERN_ID_ATTR, SKYVERN_ID_ATTR,
) )
@@ -579,6 +580,16 @@ async def handle_input_text_action(
# press arrowdown to watch if there's any options popping up # press arrowdown to watch if there's any options popping up
await incremental_scraped.start_listen_dom_increment() await incremental_scraped.start_listen_dom_increment()
try:
await skyvern_element.input_clear()
except Exception:
LOG.info(
"Failed to clear up the input, but continue to input",
task_id=task.task_id,
step_id=step.step_id,
element_id=skyvern_element.get_id(),
)
try: try:
await skyvern_element.press_key("ArrowDown") await skyvern_element.press_key("ArrowDown")
except TimeoutError: except TimeoutError:
@@ -613,6 +624,7 @@ async def handle_input_text_action(
action=select_action, action=select_action,
page=page, page=page,
dom=dom, dom=dom,
skyvern_element=skyvern_element,
skyvern_frame=skyvern_frame, skyvern_frame=skyvern_frame,
incremental_scraped=incremental_scraped, incremental_scraped=incremental_scraped,
step=step, step=step,
@@ -641,14 +653,29 @@ async def handle_input_text_action(
) )
except Exception: except Exception:
await skyvern_element.scroll_into_view()
LOG.warning( LOG.warning(
"Failed to do custom selection transformed from input action, continue to input text", "Failed to do custom selection transformed from input action, continue to input text",
exc_info=True, exc_info=True,
task_id=task.task_id, task_id=task.task_id,
step_id=step.step_id, step_id=step.step_id,
) )
await skyvern_element.scroll_into_view()
finally: finally:
blocking_element, exist = await skyvern_element.find_blocking_element(
dom=dom, incremental_page=incremental_scraped
)
if blocking_element and exist:
LOG.info(
"Find a blocking element to the current element, going to blur the blocking element first",
task_id=task.task_id,
step_id=step.step_id,
blocking_element=blocking_element.get_locator(),
)
if await blocking_element.get_locator().count():
await blocking_element.press_key("Escape")
if await blocking_element.get_locator().count():
await blocking_element.blur()
await skyvern_element.press_key("Escape") await skyvern_element.press_key("Escape")
await skyvern_element.blur() await skyvern_element.blur()
await incremental_scraped.stop_listen_dom_increment() await incremental_scraped.stop_listen_dom_increment()
@@ -672,6 +699,25 @@ async def handle_input_text_action(
await skyvern_element.press_fill(text=text) await skyvern_element.press_fill(text=text)
return [ActionSuccess()] return [ActionSuccess()]
# wait 2s for blocking element to show up
await asyncio.sleep(2)
try:
blocking_element, exist = await skyvern_element.find_blocking_element(
dom=dom, incremental_page=incremental_scraped
)
if blocking_element and exist:
LOG.warning(
"Find a blocking element to the current element, going to input on the blocking element",
)
skyvern_element = blocking_element
except Exception:
LOG.info(
"Failed to find the blocking element, continue with the orignal element",
exc_info=True,
task_id=task.task_id,
step_id=step.step_id,
)
try: try:
# TODO: not sure if this case will trigger auto-completion # TODO: not sure if this case will trigger auto-completion
if tag_name not in COMMON_INPUT_TAGS: if tag_name not in COMMON_INPUT_TAGS:
@@ -1032,6 +1078,7 @@ async def handle_select_option_action(
action=action, action=action,
page=page, page=page,
dom=dom, dom=dom,
skyvern_element=skyvern_element,
skyvern_frame=skyvern_frame, skyvern_frame=skyvern_frame,
incremental_scraped=incremental_scraped, incremental_scraped=incremental_scraped,
step=step, step=step,
@@ -1087,6 +1134,7 @@ async def handle_select_option_action(
value=suggested_value, value=suggested_value,
page=page, page=page,
dom=dom, dom=dom,
skyvern_element=skyvern_element,
skyvern_frame=skyvern_frame, skyvern_frame=skyvern_frame,
incremental_scraped=incremental_scraped, incremental_scraped=incremental_scraped,
task=task, task=task,
@@ -1765,6 +1813,7 @@ async def sequentially_select_from_dropdown(
action: SelectOptionAction, action: SelectOptionAction,
page: Page, page: Page,
dom: DomUtil, dom: DomUtil,
skyvern_element: SkyvernElement,
skyvern_frame: SkyvernFrame, skyvern_frame: SkyvernFrame,
incremental_scraped: IncrementalScrapePage, incremental_scraped: IncrementalScrapePage,
step: Step, step: Step,
@@ -1812,6 +1861,7 @@ async def sequentially_select_from_dropdown(
single_select_result = await select_from_dropdown( single_select_result = await select_from_dropdown(
context=input_or_select_context, context=input_or_select_context,
page=page, page=page,
skyvern_element=skyvern_element,
skyvern_frame=skyvern_frame, skyvern_frame=skyvern_frame,
incremental_scraped=incremental_scraped, incremental_scraped=incremental_scraped,
check_exist_funcs=check_exist_funcs, check_exist_funcs=check_exist_funcs,
@@ -1887,6 +1937,7 @@ def build_sequential_select_history(history_list: list[CustomSingleSelectResult]
async def select_from_dropdown( async def select_from_dropdown(
context: InputOrSelectContext, context: InputOrSelectContext,
page: Page, page: Page,
skyvern_element: SkyvernElement,
skyvern_frame: SkyvernFrame, skyvern_frame: SkyvernFrame,
incremental_scraped: IncrementalScrapePage, incremental_scraped: IncrementalScrapePage,
check_exist_funcs: list[CheckExistIDFunc], check_exist_funcs: list[CheckExistIDFunc],
@@ -1911,6 +1962,7 @@ async def select_from_dropdown(
if dropdown_menu_element is None: if dropdown_menu_element is None:
dropdown_menu_element = await locate_dropdown_menu( dropdown_menu_element = await locate_dropdown_menu(
current_anchor_element=skyvern_element,
incremental_scraped=incremental_scraped, incremental_scraped=incremental_scraped,
step=step, step=step,
task=task, task=task,
@@ -2059,6 +2111,7 @@ async def select_from_dropdown(
async def select_from_dropdown_by_value( async def select_from_dropdown_by_value(
value: str, value: str,
page: Page, page: Page,
skyvern_element: SkyvernElement,
skyvern_frame: SkyvernFrame, skyvern_frame: SkyvernFrame,
dom: DomUtil, dom: DomUtil,
incremental_scraped: IncrementalScrapePage, incremental_scraped: IncrementalScrapePage,
@@ -2078,6 +2131,7 @@ async def select_from_dropdown_by_value(
if dropdown_menu_element is None: if dropdown_menu_element is None:
dropdown_menu_element = await locate_dropdown_menu( dropdown_menu_element = await locate_dropdown_menu(
current_anchor_element=skyvern_element,
incremental_scraped=incremental_scraped, incremental_scraped=incremental_scraped,
step=step, step=step,
task=task, task=task,
@@ -2131,6 +2185,7 @@ async def select_from_dropdown_by_value(
async def locate_dropdown_menu( async def locate_dropdown_menu(
current_anchor_element: SkyvernElement,
incremental_scraped: IncrementalScrapePage, incremental_scraped: IncrementalScrapePage,
step: Step, step: Step,
task: Task, task: Task,
@@ -2164,6 +2219,30 @@ async def locate_dropdown_menu(
) )
continue continue
try:
if not await head_element.is_next_to_element(
target_locator=current_anchor_element.get_locator(),
max_x_distance=DROPDOWN_MENU_MAX_DISTANCE,
max_y_distance=DROPDOWN_MENU_MAX_DISTANCE,
):
LOG.debug(
"Skip the element since it's too far away from the anchor element",
step_id=step.step_id,
task_id=task.task_id,
element_id=element_id,
)
continue
except Exception:
LOG.info(
"Failed to calculate the distance between the elements",
element_id=element_id,
step_id=step.step_id,
task_id=task.task_id,
exc_info=True,
)
continue
if not await skyvern_frame.get_element_visible(await head_element.get_element_handler()): if not await skyvern_frame.get_element_visible(await head_element.get_element_handler()):
LOG.debug( LOG.debug(
"Skip the element since it's invisible", "Skip the element since it's invisible",

View File

@@ -770,6 +770,21 @@ const isComboboxDropdown = (element) => {
return role && haspopup && controls && readonly; return role && haspopup && controls && readonly;
}; };
const isDivComboboxDropdown = (element) => {
const tagName = element.tagName.toLowerCase();
if (tagName !== "div") {
return false;
}
const role = element.getAttribute("role")
? element.getAttribute("role").toLowerCase()
: "";
const haspopup = element.getAttribute("aria-haspopup")
? element.getAttribute("aria-haspopup").toLowerCase()
: "";
const controls = element.hasAttribute("aria-controls");
return role === "combobox" && controls && haspopup;
};
const isDropdownButton = (element) => { const isDropdownButton = (element) => {
const tagName = element.tagName.toLowerCase(); const tagName = element.tagName.toLowerCase();
const type = element.getAttribute("type") const type = element.getAttribute("type")
@@ -1182,6 +1197,7 @@ function buildElementObject(frame, element, interactable, purgeable = false) {
elementTagNameLower === "svg" || element.closest("svg") !== null, elementTagNameLower === "svg" || element.closest("svg") !== null,
isSelectable: isSelectable:
elementTagNameLower === "select" || elementTagNameLower === "select" ||
isDivComboboxDropdown(element) ||
isDropdownButton(element) || isDropdownButton(element) ||
isAngularDropdown(element) || isAngularDropdown(element) ||
isSelect2Dropdown(element) || isSelect2Dropdown(element) ||
@@ -2083,9 +2099,12 @@ if (window.globalObserverForDOMIncrement === undefined) {
} }
if (mutation.attributeName === "class") { if (mutation.attributeName === "class") {
const node = mutation.target; const node = mutation.target;
if (node.nodeType === Node.TEXT_NODE) continue;
if (node.tagName.toLowerCase() === "body") continue;
if (!mutation.oldValue) continue;
if ( if (
!mutation.oldValue || !isClassNameIncludesHidden(mutation.oldValue) &&
!isClassNameIncludesHidden(mutation.oldValue) !node.hasAttribute("data-menu-uid") // google framework use this to trace dropdown menu
) )
continue; continue;
const newStyle = getElementComputedStyle(node); const newStyle = getElementComputedStyle(node);

View File

@@ -541,6 +541,12 @@ class IncrementalScrapePage:
self.element_tree_trimmed: list[dict] = list() self.element_tree_trimmed: list[dict] = list()
self.skyvern_frame = skyvern_frame self.skyvern_frame = skyvern_frame
def check_id_in_page(self, element_id: str) -> bool:
css_selector = self.id_to_css_dict.get(element_id, "")
if css_selector:
return True
return False
async def get_incremental_element_tree( async def get_incremental_element_tree(
self, self,
cleanup_element_tree: CleanupElementTreeFunc, cleanup_element_tree: CleanupElementTreeFunc,

View File

@@ -318,12 +318,21 @@ class SkyvernElement:
assert handler is not None assert handler is not None
return handler return handler
async def find_blocking_element(self, dom: DomUtil) -> tuple[SkyvernElement | None, bool]: async def find_blocking_element(
self, dom: DomUtil, incremental_page: IncrementalScrapePage | None = None
) -> tuple[SkyvernElement | None, bool]:
skyvern_frame = await SkyvernFrame.create_instance(self.get_frame()) skyvern_frame = await SkyvernFrame.create_instance(self.get_frame())
blocking_element_id, blocked = await skyvern_frame.get_blocking_element_id(await self.get_element_handler()) blocking_element_id, blocked = await skyvern_frame.get_blocking_element_id(await self.get_element_handler())
if not blocking_element_id: if not blocking_element_id:
return None, blocked return None, blocked
return await dom.get_skyvern_element_by_id(blocking_element_id), blocked
if dom.check_id_in_dom(blocking_element_id):
return await dom.get_skyvern_element_by_id(blocking_element_id), blocked
if incremental_page and incremental_page.check_id_in_page(blocking_element_id):
return await SkyvernElement.create_from_incremental(incremental_page, blocking_element_id), blocked
return None, blocked
async def find_element_in_label_children( async def find_element_in_label_children(
self, dom: DomUtil, element_type: InteractiveElement self, dom: DomUtil, element_type: InteractiveElement
@@ -590,10 +599,9 @@ class SkyvernElement:
await self.focus(timeout=timeout) await self.focus(timeout=timeout)
await asyncio.sleep(2) # wait for scrolling into the target await asyncio.sleep(2) # wait for scrolling into the target
async def calculate_vertical_distance_to( async def calculate_min_y_distance_to(
self, self,
target_locator: Locator, target_locator: Locator,
mode: typing.Literal["inner", "outer"],
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> float: ) -> float:
self_rect = await self.get_locator().bounding_box(timeout=timeout) self_rect = await self.get_locator().bounding_box(timeout=timeout)
@@ -604,10 +612,57 @@ class SkyvernElement:
if self_rect is None or target_rect is None: if self_rect is None or target_rect is None:
raise Exception("Can't get the target element rect") raise Exception("Can't get the target element rect")
if mode == "inner": y_1 = self_rect["y"] + self_rect["height"] - target_rect["y"]
return abs(self_rect["y"] + self_rect["height"] - target_rect["y"]) y_2 = self_rect["y"] - (target_rect["y"] + target_rect["height"])
else:
return abs(self_rect["y"] - (target_rect["y"] + target_rect["height"])) # if y1 * y2 <= 0, it means the two elements are overlapping
if y_1 * y_2 <= 0:
return 0
return min(
abs(y_1),
abs(y_2),
)
async def calculate_min_x_distance_to(
self,
target_locator: Locator,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> float:
self_rect = await self.get_locator().bounding_box(timeout=timeout)
if self_rect is None:
raise Exception("Can't Skyvern element rect")
target_rect = await target_locator.bounding_box(timeout=timeout)
if self_rect is None or target_rect is None:
raise Exception("Can't get the target element rect")
x_1 = self_rect["x"] + self_rect["width"] - target_rect["x"]
x_2 = self_rect["x"] - (target_rect["x"] + target_rect["width"])
# if x1 * x2 <= 0, it means the two elements are overlapping
if x_1 * x_2 <= 0:
return 0
return min(
abs(x_1),
abs(x_2),
)
async def is_next_to_element(
self,
target_locator: Locator,
max_x_distance: float = 0,
max_y_distance: float = 0,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> bool:
if max_x_distance > 0 and await self.calculate_min_x_distance_to(target_locator, timeout) > max_x_distance:
return False
if max_y_distance > 0 and await self.calculate_min_y_distance_to(target_locator, timeout) > max_y_distance:
return False
return True
class DomUtil: class DomUtil: