fix search on auto completion (#1544)

This commit is contained in:
LawyZheng
2025-01-14 13:08:35 +08:00
committed by GitHub
parent a798757541
commit d63061f13b
5 changed files with 51 additions and 11 deletions

View File

@@ -1,6 +1,7 @@
There is an input element on an HTML page. Based on the context and information provided, you have two goals:
There is an input element on an HTML page. Based on the context and information provided, you have {{ "three" if is_search else "two" }} goals:
- Confirm if an auto-completion attempt appears after the user inputs the current value.
- If auto-completion suggestions appear, assist the user in selecting the most appropriate element based on the user's goal, details, and the context.
- If auto-completion suggestions appear, assist the user in selecting the most appropriate element based on the user's goal, details, and the context.{% if is_search %}
- Confirm if direct searching is a better way compared to all suggestions based on user's goal.{% endif %}
You can confirm an auto-completion attempt based on the following rules:
- Several auto-completion suggestions appear for the input value.
@@ -15,6 +16,8 @@ Each interactable element is tagged with an ID.
Reply in JSON format with the following keys:
{
"thought": str, // Think step by step. Describe your thought about how you achieve the {{ "three" if is_search else "two" }} goals with convincing evidence.{% if is_search %}
"direct_searching": bool, // True if direct searching is a better way compared to all suggestions, otherwise False.{% endif %}
"auto_completion_attempt": bool, // True if there's any auto completion attempt based on the rules. Otherwise, it should be False.
"reasoning": str, // The reasoning behind the decision. Be specific, referencing the value and the element id in your reasoning. Mention why you chose the element id. Keep the reasoning short and to the point.
"confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence.
@@ -25,7 +28,7 @@ Reply in JSON format with the following keys:
Context:
```
Choose an auto-completion suggestion for "{{ field_information }}"
Choose an auto-completion suggestion for "{{ field_information }}"{%if is_search %} or directly search with the input value{% endif %}
```
Input value:

View File

@@ -788,12 +788,21 @@ async def handle_input_text_action(
await incremental_scraped.stop_listen_dom_increment()
return [ActionSuccess()]
except Exception as e:
LOG.exception(
"Failed to input the value or finish the auto completion",
task_id=task.task_id,
step_id=step.step_id,
)
raise e
finally:
# HACK: force to finish missing auto completion input
if auto_complete_hacky_flag and not await skyvern_element.is_raw_input():
if auto_complete_hacky_flag and await skyvern_element.is_visible() and not await skyvern_element.is_raw_input():
LOG.debug(
"Trigger input-selection hack, pressing Tab to choose one",
action=action,
task_id=task.task_id,
step_id=step.step_id,
)
await skyvern_element.press_key("Tab")
@@ -1624,6 +1633,7 @@ async def choose_auto_completion_dropdown(
html = incremental_scraped.build_html_tree(cleaned_incremental_element)
auto_completion_confirm_prompt = prompt_engine.load_prompt(
"auto-completion-choose-option",
is_search=context.is_search_bar,
field_information=context.field,
filled_value=text,
navigation_goal=task.navigation_goal,
@@ -1638,6 +1648,16 @@ async def choose_auto_completion_dropdown(
json_response = await app.SECONDARY_LLM_API_HANDLER(prompt=auto_completion_confirm_prompt, step=step)
element_id = json_response.get("id", "")
relevance_float = json_response.get("relevance_float", 0)
if json_response.get("direct_searching", False):
LOG.info(
"Decided to directly search with the current value",
value=text,
step_id=step.step_id,
task_id=task.task_id,
)
await skyvern_element.press_key("Enter")
return result
if not element_id:
reasoning = json_response.get("reasoning")
raise NoSuitableAutoCompleteOption(reasoning=reasoning, target_value=text)
@@ -1682,7 +1702,7 @@ async def choose_auto_completion_dropdown(
return result
finally:
await incremental_scraped.stop_listen_dom_increment()
if clear_input:
if clear_input and await skyvern_element.is_visible():
await skyvern_element.input_clear()

View File

@@ -2132,15 +2132,26 @@ if (window.globalObserverForDOMIncrement === undefined) {
}
if (mutation.type === "childList") {
if (mutation.target.nodeType === Node.TEXT_NODE) continue;
const node = mutation.target;
let changedNode = {
targetNode: mutation.target, // TODO: for future usage, when we want to parse new elements into a tree
targetNode: node, // TODO: for future usage, when we want to parse new elements into a tree
};
let newNodes = [];
if (mutation.addedNodes && mutation.addedNodes.length > 0) {
for (const node of mutation.addedNodes) {
// skip the text nodes, they won't be interactable
if (node.nodeType === Node.TEXT_NODE) continue;
newNodes.push(node);
if (
node.tagName.toLowerCase() === "ul" ||
(node.tagName.toLowerCase() === "div" &&
node.hasAttribute("role") &&
node.getAttribute("role").toLowerCase() === "listbox")
) {
newNodes.push(node);
} else {
if (mutation.addedNodes && mutation.addedNodes.length > 0) {
for (const node of mutation.addedNodes) {
// skip the text nodes, they won't be interactable
if (node.nodeType === Node.TEXT_NODE) continue;
newNodes.push(node);
}
}
}
if (newNodes.length > 0) {

View File

@@ -575,6 +575,10 @@ class IncrementalScrapePage:
await SkyvernFrame.evaluate(frame=self.skyvern_frame.get_frame(), expression=js_script)
async def stop_listen_dom_increment(self) -> None:
# check if the DOM has navigated away or refreshed
js_script = "() => window.globalObserverForDOMIncrement === undefined"
if await SkyvernFrame.evaluate(frame=self.skyvern_frame.get_frame(), expression=js_script):
return
js_script = "() => stopGlobalIncrementalObserver()"
await SkyvernFrame.evaluate(frame=self.skyvern_frame.get_frame(), expression=js_script)

View File

@@ -267,6 +267,8 @@ class SkyvernElement:
return self.get_selectable() or self.get_tag_name() in SELECTABLE_ELEMENT
async def is_visible(self) -> bool:
if not await self.get_locator().count():
return False
skyvern_frame = await SkyvernFrame.create_instance(self.get_frame())
return await skyvern_frame.get_element_visible(await self.get_element_handler())