extend auto completion coverage (#1165)
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
You're doing an auto completion input action on HTML page. The current filled value doesn't match any option.
|
||||
Based on the context and current value, give ten most potential values with the same meaning as the current value.
|
||||
Based on the context, current value, user goal and user details, give ten most potential values with the same meaning as the current value.
|
||||
You can provide values like:
|
||||
- Subset or superset meaning from the current value
|
||||
- Summarized from the current value
|
||||
@@ -26,4 +26,14 @@ Choose an auto-completion suggestion for "{{ field_information }}"
|
||||
Current Value:
|
||||
```
|
||||
{{ current_value }}
|
||||
```
|
||||
|
||||
User goal:
|
||||
```
|
||||
{{ navigation_goal }}
|
||||
```
|
||||
|
||||
User details:
|
||||
```
|
||||
{{ navigation_payload_str }}
|
||||
```
|
||||
@@ -1,5 +1,5 @@
|
||||
You're doing an auto completion input action on HTML page. User has tried several values, but none of them could find a match.
|
||||
Based on the context, current value, tried values, option elements popped up while typing, tweak the value into a reasonable one based on the information.
|
||||
Based on the context, current value, tried values, user goal, user details and option elements popped up while typing, tweak the value into a reasonable one based on the information.
|
||||
You can try to change the value under the following rules:
|
||||
1. the value must be reasonably changed from the current value, like superset, subset of the current value
|
||||
2. If there're popped up elements, find the common concept among all elements, and then tweak the current value into a reasonable value based on the same concept.
|
||||
@@ -32,6 +32,16 @@ Tried Values:
|
||||
{{ tried_values }}
|
||||
```
|
||||
|
||||
User goal:
|
||||
```
|
||||
{{ navigation_goal }}
|
||||
```
|
||||
|
||||
User details:
|
||||
```
|
||||
{{ navigation_payload_str }}
|
||||
```
|
||||
|
||||
Popped up elements:
|
||||
```
|
||||
{{ popped_up_elements }}
|
||||
|
||||
@@ -8,6 +8,7 @@ Reply in the following JSON format:
|
||||
"field": str, // Which field is this action intended to fill out?
|
||||
"is_required": bool, // True if this is a required field, otherwise false.
|
||||
"is_search_bar": bool, // True if the element to take the action is a search bar, otherwise false.
|
||||
"is_location_input": bool, // True if the element is asking user to input where he lives, otherwise false. For example, it is asking for location, or address, or other similar information.
|
||||
}
|
||||
|
||||
Existing reasoning context:
|
||||
|
||||
@@ -69,9 +69,10 @@ class InputOrSelectContext(BaseModel):
|
||||
field: str | None = None
|
||||
is_required: bool | None = None
|
||||
is_search_bar: bool | None = None # don't trigger custom-selection logic when it's a search bar
|
||||
is_location_input: bool | None = None # address input usually requires auto completion
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"InputOrSelectContext(field={self.field}, is_required={self.is_required}, is_search_bar={self.is_search_bar})"
|
||||
return f"InputOrSelectContext(field={self.field}, is_required={self.is_required}, is_search_bar={self.is_search_bar}, is_location_input={self.is_location_input})"
|
||||
|
||||
|
||||
class Action(BaseModel):
|
||||
|
||||
@@ -18,7 +18,6 @@ from skyvern.exceptions import (
|
||||
ErrFoundSelectableElement,
|
||||
FailedToFetchSecret,
|
||||
FailToClick,
|
||||
FailToFindAutocompleteOption,
|
||||
FailToSelectByIndex,
|
||||
FailToSelectByLabel,
|
||||
FailToSelectByValue,
|
||||
@@ -27,6 +26,8 @@ from skyvern.exceptions import (
|
||||
InteractWithDisabledElement,
|
||||
InvalidElementForTextInput,
|
||||
MissingElement,
|
||||
MissingElementDict,
|
||||
MissingElementInCSSMap,
|
||||
MissingFileUrl,
|
||||
MultipleElementsFound,
|
||||
NoAutoCompleteOptionMeetCondition,
|
||||
@@ -72,6 +73,7 @@ from skyvern.webeye.scraper.scraper import (
|
||||
ElementTreeFormat,
|
||||
IncrementalScrapePage,
|
||||
ScrapedPage,
|
||||
hash_element,
|
||||
json_to_html,
|
||||
trim_element_tree,
|
||||
)
|
||||
@@ -169,6 +171,7 @@ def clean_and_remove_element_tree_factory(
|
||||
)
|
||||
for check_exist in check_exist_funcs:
|
||||
element_tree = remove_exist_elements(element_tree=element_tree, check_exist=check_exist)
|
||||
|
||||
return element_tree
|
||||
|
||||
return helper_func
|
||||
@@ -441,6 +444,7 @@ async def handle_input_text_action(
|
||||
return [ActionFailure(InteractWithDisabledElement(skyvern_element.get_id()))]
|
||||
|
||||
incremental_element: list[dict] = []
|
||||
auto_complete_hacky_flag: bool = False
|
||||
# check if it's selectable
|
||||
if skyvern_element.get_tag_name() == InteractiveElement.INPUT and not await skyvern_element.is_raw_input():
|
||||
select_action = SelectOptionAction(
|
||||
@@ -489,6 +493,7 @@ async def handle_input_text_action(
|
||||
)
|
||||
await incremental_scraped.stop_listen_dom_increment()
|
||||
else:
|
||||
auto_complete_hacky_flag = True
|
||||
try:
|
||||
# TODO: we don't select by value for the auto completion detect case
|
||||
result, _ = await sequentially_select_from_dropdown(
|
||||
@@ -545,9 +550,26 @@ async def handle_input_text_action(
|
||||
if len(text) == 0:
|
||||
return [ActionSuccess()]
|
||||
|
||||
if await skyvern_element.is_auto_completion_input():
|
||||
# parse the input context to help executing input action
|
||||
prompt = prompt_engine.load_prompt(
|
||||
"parse-input-or-select-context",
|
||||
element_id=action.element_id,
|
||||
action_reasoning=action.reasoning,
|
||||
elements=dom.scraped_page.build_element_tree(ElementTreeFormat.HTML),
|
||||
)
|
||||
|
||||
json_response = await app.SECONDARY_LLM_API_HANDLER(prompt=prompt, step=step)
|
||||
input_or_select_context = InputOrSelectContext.model_validate(json_response)
|
||||
LOG.info(
|
||||
"Parsed input/select context",
|
||||
context=input_or_select_context,
|
||||
task_id=task.task_id,
|
||||
step_id=step.step_id,
|
||||
)
|
||||
|
||||
if await skyvern_element.is_auto_completion_input() or input_or_select_context.is_location_input:
|
||||
if result := await input_or_auto_complete_input(
|
||||
action=action,
|
||||
input_or_select_context=input_or_select_context,
|
||||
page=page,
|
||||
dom=dom,
|
||||
text=text,
|
||||
@@ -557,11 +579,22 @@ async def handle_input_text_action(
|
||||
):
|
||||
return [result]
|
||||
|
||||
await skyvern_element.input_sequentially(text=text)
|
||||
await incremental_scraped.start_listen_dom_increment()
|
||||
|
||||
try:
|
||||
await skyvern_element.input_sequentially(text=text)
|
||||
finally:
|
||||
incremental_element = await incremental_scraped.get_incremental_element_tree(
|
||||
clean_and_remove_element_tree_factory(task=task, step=step, check_exist_funcs=[dom.check_id_in_dom]),
|
||||
)
|
||||
if len(incremental_element) > 0:
|
||||
auto_complete_hacky_flag = True
|
||||
await incremental_scraped.stop_listen_dom_increment()
|
||||
|
||||
return [ActionSuccess()]
|
||||
finally:
|
||||
# HACK: force to finish missing auto completion input
|
||||
if len(incremental_element) > 0:
|
||||
if auto_complete_hacky_flag:
|
||||
LOG.debug(
|
||||
"Trigger input-selection hack, pressing Tab to choose one",
|
||||
action=action,
|
||||
@@ -1240,7 +1273,8 @@ async def choose_auto_completion_dropdown(
|
||||
if len(incremental_element) == 0:
|
||||
raise NoIncrementalElementFoundForAutoCompletion(element_id=skyvern_element.get_id(), text=text)
|
||||
|
||||
html = incremental_scraped.build_html_tree(incremental_element)
|
||||
cleaned_incremental_element = remove_duplicated_HTML_element(incremental_element)
|
||||
html = incremental_scraped.build_html_tree(cleaned_incremental_element)
|
||||
auto_completion_confirm_prompt = prompt_engine.load_prompt(
|
||||
"auto-completion-choose-option",
|
||||
field_information=context.field,
|
||||
@@ -1305,8 +1339,20 @@ async def choose_auto_completion_dropdown(
|
||||
await skyvern_element.input_clear()
|
||||
|
||||
|
||||
def remove_duplicated_HTML_element(elements: list[dict]) -> list[dict]:
|
||||
cache_map = set()
|
||||
new_elements: list[dict] = []
|
||||
for element in elements:
|
||||
key = hash_element(element=element)
|
||||
if key in cache_map:
|
||||
continue
|
||||
cache_map.add(key)
|
||||
new_elements.append(element)
|
||||
return new_elements
|
||||
|
||||
|
||||
async def input_or_auto_complete_input(
|
||||
action: actions.InputTextAction,
|
||||
input_or_select_context: InputOrSelectContext,
|
||||
page: Page,
|
||||
dom: DomUtil,
|
||||
text: str,
|
||||
@@ -1321,22 +1367,6 @@ async def input_or_auto_complete_input(
|
||||
element_id=skyvern_element.get_id(),
|
||||
)
|
||||
|
||||
prompt = prompt_engine.load_prompt(
|
||||
"parse-input-or-select-context",
|
||||
element_id=action.element_id,
|
||||
action_reasoning=action.reasoning,
|
||||
elements=dom.scraped_page.build_element_tree(ElementTreeFormat.HTML),
|
||||
)
|
||||
|
||||
json_response = await app.SECONDARY_LLM_API_HANDLER(prompt=prompt, step=step)
|
||||
input_or_select_context = InputOrSelectContext.model_validate(json_response)
|
||||
LOG.info(
|
||||
"Parsed input/select context",
|
||||
context=input_or_select_context,
|
||||
task_id=task.task_id,
|
||||
step_id=step.step_id,
|
||||
)
|
||||
|
||||
# 1. press the orignal text to see if there's a match
|
||||
# 2. call LLM to find 5 potential values based on the orginal text
|
||||
# 3. try each potential values from #2
|
||||
@@ -1388,6 +1418,8 @@ async def input_or_auto_complete_input(
|
||||
"auto-completion-potential-answers",
|
||||
field_information=input_or_select_context.field,
|
||||
current_value=current_value,
|
||||
navigation_goal=task.navigation_goal,
|
||||
navigation_payload_str=json.dumps(task.navigation_payload),
|
||||
)
|
||||
|
||||
LOG.info(
|
||||
@@ -1439,12 +1471,15 @@ async def input_or_auto_complete_input(
|
||||
current_value=current_value,
|
||||
current_attemp=current_attemp,
|
||||
)
|
||||
cleaned_new_elements = remove_duplicated_HTML_element(whole_new_elements)
|
||||
prompt = prompt_engine.load_prompt(
|
||||
"auto-completion-tweak-value",
|
||||
field_information=input_or_select_context.field,
|
||||
current_value=current_value,
|
||||
navigation_goal=task.navigation_goal,
|
||||
navigation_payload_str=json.dumps(task.navigation_payload),
|
||||
tried_values=json.dumps(tried_values),
|
||||
popped_up_elements="".join([json_to_html(element) for element in whole_new_elements]),
|
||||
popped_up_elements="".join([json_to_html(element) for element in cleaned_new_elements]),
|
||||
)
|
||||
json_respone = await app.SECONDARY_LLM_API_HANDLER(prompt=prompt, step=step)
|
||||
context_reasoning = json_respone.get("reasoning")
|
||||
@@ -1462,7 +1497,13 @@ async def input_or_auto_complete_input(
|
||||
current_value = new_current_value
|
||||
|
||||
else:
|
||||
return ActionFailure(FailToFindAutocompleteOption(current_value=text))
|
||||
LOG.warning(
|
||||
"Auto completion didn't finish, this might leave the input value to be empty.",
|
||||
context=input_or_select_context,
|
||||
step_id=step.step_id,
|
||||
task_id=task.task_id,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
async def sequentially_select_from_dropdown(
|
||||
@@ -1723,7 +1764,7 @@ async def select_from_dropdown(
|
||||
await selected_element.get_locator().click(timeout=timeout)
|
||||
single_select_result.action_result = ActionSuccess()
|
||||
return single_select_result
|
||||
except MissingElement:
|
||||
except (MissingElement, MissingElementDict, MissingElementInCSSMap, MultipleElementsFound):
|
||||
if not value:
|
||||
raise
|
||||
|
||||
|
||||
@@ -216,6 +216,10 @@ function isElementStyleVisibilityVisible(element, style) {
|
||||
return true;
|
||||
}
|
||||
|
||||
function hasASPClientControl() {
|
||||
return typeof ASPxClientControl !== "undefined";
|
||||
}
|
||||
|
||||
// from playwright
|
||||
function isElementVisible(element) {
|
||||
// TODO: This is a hack to not check visibility for option elements
|
||||
@@ -496,8 +500,16 @@ function isInteractable(element) {
|
||||
if (element.className.toString().includes("hover:cursor-pointer")) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// auto for <a> is equal to pointer for <a>
|
||||
if (tagName == "a" && computedStyle.cursor === "auto") {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (hasASPClientControl() && tagName === "tr") {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -600,6 +600,7 @@ class IncrementalScrapePage:
|
||||
return None, False
|
||||
|
||||
if not interactable:
|
||||
LOG.debug("Find the target element by text, but the element is not interactable", text=text)
|
||||
return None, True
|
||||
|
||||
return parent_locator, True
|
||||
|
||||
@@ -143,10 +143,6 @@ class SkyvernElement:
|
||||
if autocomplete and autocomplete == "list":
|
||||
return True
|
||||
|
||||
element_id = await self.get_attr("id")
|
||||
if element_id == "location-input":
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
async def is_custom_option(self) -> bool:
|
||||
@@ -527,6 +523,25 @@ class SkyvernElement:
|
||||
await self.focus(timeout=timeout)
|
||||
await asyncio.sleep(2) # wait for scrolling into the target
|
||||
|
||||
async def calculate_vertical_distance_to(
|
||||
self,
|
||||
target_locator: Locator,
|
||||
mode: typing.Literal["inner", "outer"],
|
||||
timeout: float = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
|
||||
) -> float:
|
||||
self_rect = await self.get_locator().bounding_box(timeout=timeout)
|
||||
if self_rect is None:
|
||||
raise Exception("Can't Skyvern element rect")
|
||||
|
||||
target_rect = await target_locator.bounding_box(timeout=timeout)
|
||||
if self_rect is None or target_rect is None:
|
||||
raise Exception("Can't get the target element rect")
|
||||
|
||||
if mode == "inner":
|
||||
return abs(self_rect["y"] + self_rect["height"] - target_rect["y"])
|
||||
else:
|
||||
return abs(self_rect["y"] - (target_rect["y"] + target_rect["height"]))
|
||||
|
||||
|
||||
class DomUtil:
|
||||
"""
|
||||
|
||||
@@ -223,3 +223,7 @@ class SkyvernFrame:
|
||||
async def is_window_scrollable(self) -> bool:
|
||||
js_script = "() => isWindowScrollable()"
|
||||
return await self.evaluate(frame=self.frame, expression=js_script)
|
||||
|
||||
async def has_ASP_client_control(self) -> bool:
|
||||
js_script = "() => hasASPClientControl()"
|
||||
return await self.evaluate(frame=self.frame, expression=js_script)
|
||||
|
||||
Reference in New Issue
Block a user