use css selector instead of xpath (#551)

This commit is contained in:
LawyZheng
2024-07-04 10:45:47 +08:00
committed by GitHub
parent 80c7c43bd6
commit cd0d563070
7 changed files with 92 additions and 122 deletions

View File

@@ -47,17 +47,17 @@ class ScriptNotFound(SkyvernException):
class MissingElement(SkyvernException): class MissingElement(SkyvernException):
def __init__(self, xpath: str | None = None, element_id: str | None = None): def __init__(self, selector: str | None = None, element_id: str | None = None):
super().__init__( super().__init__(
f"Found no elements. Might be due to previous actions which removed this element." f"Found no elements. Might be due to previous actions which removed this element."
f" xpath={xpath} element_id={element_id}", f" selector={selector} element_id={element_id}",
) )
class MultipleElementsFound(SkyvernException): class MultipleElementsFound(SkyvernException):
def __init__(self, num: int, xpath: str | None = None, element_id: str | None = None): def __init__(self, num: int, selector: str | None = None, element_id: str | None = None):
super().__init__( super().__init__(
f"Found {num} elements. Expected 1. num_elements={num} xpath={xpath} element_id={element_id}", f"Found {num} elements. Expected 1. num_elements={num} selector={selector} element_id={element_id}",
) )
@@ -318,6 +318,11 @@ class MissingElementInIframe(SkyvernException):
super().__init__(f"Found no iframe includes the element. element_id={element_id}") super().__init__(f"Found no iframe includes the element. element_id={element_id}")
class MissingElementInCSSMap(SkyvernException):
def __init__(self, element_id: str) -> None:
super().__init__(f"Found no css selector in the CSS map for the element. element_id={element_id}")
class InputActionOnSelect2Dropdown(SkyvernException): class InputActionOnSelect2Dropdown(SkyvernException):
def __init__(self, element_id: str): def __init__(self, element_id: str):
super().__init__( super().__init__(

View File

@@ -966,8 +966,8 @@ class ForgeAgent:
await app.ARTIFACT_MANAGER.create_artifact( await app.ARTIFACT_MANAGER.create_artifact(
step=step, step=step,
artifact_type=ArtifactType.VISIBLE_ELEMENTS_ID_XPATH_MAP, artifact_type=ArtifactType.VISIBLE_ELEMENTS_ID_CSS_MAP,
data=json.dumps(scraped_page.id_to_xpath_dict, indent=2).encode(), data=json.dumps(scraped_page.id_to_css_dict, indent=2).encode(),
) )
await app.ARTIFACT_MANAGER.create_artifact( await app.ARTIFACT_MANAGER.create_artifact(
step=step, step=step,

View File

@@ -21,12 +21,15 @@ class ArtifactType(StrEnum):
LLM_REQUEST = "llm_request" LLM_REQUEST = "llm_request"
LLM_RESPONSE = "llm_response" LLM_RESPONSE = "llm_response"
LLM_RESPONSE_PARSED = "llm_response_parsed" LLM_RESPONSE_PARSED = "llm_response_parsed"
VISIBLE_ELEMENTS_ID_XPATH_MAP = "visible_elements_id_xpath_map" VISIBLE_ELEMENTS_ID_CSS_MAP = "visible_elements_id_css_map"
VISIBLE_ELEMENTS_ID_FRAME_MAP = "visible_elements_id_frame_map" VISIBLE_ELEMENTS_ID_FRAME_MAP = "visible_elements_id_frame_map"
VISIBLE_ELEMENTS_TREE = "visible_elements_tree" VISIBLE_ELEMENTS_TREE = "visible_elements_tree"
VISIBLE_ELEMENTS_TREE_TRIMMED = "visible_elements_tree_trimmed" VISIBLE_ELEMENTS_TREE_TRIMMED = "visible_elements_tree_trimmed"
VISIBLE_ELEMENTS_TREE_IN_PROMPT = "visible_elements_tree_in_prompt" VISIBLE_ELEMENTS_TREE_IN_PROMPT = "visible_elements_tree_in_prompt"
# DEPRECATED. pls use VISIBLE_ELEMENTS_ID_CSS_MAP
VISIBLE_ELEMENTS_ID_XPATH_MAP = "visible_elements_id_xpath_map"
# DEPRECATED. pls use HTML_SCRAPE or HTML_ACTION # DEPRECATED. pls use HTML_SCRAPE or HTML_ACTION
HTML = "html" HTML = "html"

View File

@@ -13,7 +13,7 @@ FILE_EXTENTSION_MAP: dict[ArtifactType, str] = {
ArtifactType.LLM_REQUEST: "json", ArtifactType.LLM_REQUEST: "json",
ArtifactType.LLM_RESPONSE: "json", ArtifactType.LLM_RESPONSE: "json",
ArtifactType.LLM_RESPONSE_PARSED: "json", ArtifactType.LLM_RESPONSE_PARSED: "json",
ArtifactType.VISIBLE_ELEMENTS_ID_XPATH_MAP: "json", ArtifactType.VISIBLE_ELEMENTS_ID_CSS_MAP: "json",
ArtifactType.VISIBLE_ELEMENTS_ID_FRAME_MAP: "json", ArtifactType.VISIBLE_ELEMENTS_ID_FRAME_MAP: "json",
ArtifactType.VISIBLE_ELEMENTS_TREE: "json", ArtifactType.VISIBLE_ELEMENTS_TREE: "json",
ArtifactType.VISIBLE_ELEMENTS_TREE_TRIMMED: "json", ArtifactType.VISIBLE_ELEMENTS_TREE_TRIMMED: "json",
@@ -22,6 +22,8 @@ FILE_EXTENTSION_MAP: dict[ArtifactType, str] = {
ArtifactType.HTML_ACTION: "html", ArtifactType.HTML_ACTION: "html",
ArtifactType.TRACE: "zip", ArtifactType.TRACE: "zip",
ArtifactType.HAR: "har", ArtifactType.HAR: "har",
# DEPRECATED: we're using CSS selector map now
ArtifactType.VISIBLE_ELEMENTS_ID_XPATH_MAP: "json",
} }

View File

@@ -48,7 +48,7 @@ from skyvern.webeye.actions.actions import (
from skyvern.webeye.actions.responses import ActionFailure, ActionResult, ActionSuccess from skyvern.webeye.actions.responses import ActionFailure, ActionResult, ActionSuccess
from skyvern.webeye.browser_factory import BrowserState from skyvern.webeye.browser_factory import BrowserState
from skyvern.webeye.scraper.scraper import ScrapedPage from skyvern.webeye.scraper.scraper import ScrapedPage
from skyvern.webeye.utils.dom import DomUtil, InteractiveElement, Select2Dropdown, SkyvernElement, resolve_locator from skyvern.webeye.utils.dom import DomUtil, InteractiveElement, Select2Dropdown, SkyvernElement
LOG = structlog.get_logger() LOG = structlog.get_logger()
TEXT_INPUT_DELAY = 10 # 10ms between each character input TEXT_INPUT_DELAY = 10 # 10ms between each character input
@@ -152,7 +152,7 @@ class ActionHandler:
return [ActionFailure(e)] return [ActionFailure(e)]
except MultipleElementsFound as e: except MultipleElementsFound as e:
LOG.exception( LOG.exception(
"Cannot handle multiple elements with the same xpath in one action.", "Cannot handle multiple elements with the same selector in one action.",
action=action, action=action,
) )
return [ActionFailure(e)] return [ActionFailure(e)]
@@ -206,7 +206,8 @@ async def handle_click_action(
num_downloaded_files_before=num_downloaded_files_before, num_downloaded_files_before=num_downloaded_files_before,
download_dir=download_dir, download_dir=download_dir,
) )
xpath, frame = await validate_actions_in_dom(action, page, scraped_page) dom = DomUtil(scraped_page=scraped_page, page=page)
skyvern_element = await dom.get_skyvern_element_by_id(action.element_id)
await asyncio.sleep(0.3) await asyncio.sleep(0.3)
if action.download: if action.download:
results = await handle_click_to_download_file_action(action, page, scraped_page) results = await handle_click_to_download_file_action(action, page, scraped_page)
@@ -216,8 +217,7 @@ async def handle_click_action(
scraped_page, scraped_page,
page, page,
action, action,
xpath, skyvern_element,
frame,
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS, timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
) )
@@ -241,9 +241,9 @@ async def handle_click_to_download_file_action(
page: Page, page: Page,
scraped_page: ScrapedPage, scraped_page: ScrapedPage,
) -> list[ActionResult]: ) -> list[ActionResult]:
xpath, frame = await validate_actions_in_dom(action, page, scraped_page) dom = DomUtil(scraped_page=scraped_page, page=page)
skyvern_element = await dom.get_skyvern_element_by_id(action.element_id)
locator = resolve_locator(scraped_page, page, frame, xpath) locator = skyvern_element.locator
try: try:
await locator.click( await locator.click(
@@ -269,10 +269,7 @@ async def handle_input_text_action(
if await skyvern_element.is_select2_dropdown(): if await skyvern_element.is_select2_dropdown():
return [ActionFailure(InputActionOnSelect2Dropdown(element_id=action.element_id))] return [ActionFailure(InputActionOnSelect2Dropdown(element_id=action.element_id))]
xpath, frame = await validate_actions_in_dom(action, page, scraped_page) locator = skyvern_element.locator
locator = resolve_locator(scraped_page, page, frame, xpath)
current_text = await get_input_value(locator) current_text = await get_input_value(locator)
if current_text == action.text: if current_text == action.text:
return [ActionSuccess()] return [ActionSuccess()]
@@ -323,19 +320,16 @@ async def handle_upload_file_action(
) )
return [ActionFailure(ImaginaryFileUrl(action.file_url))] return [ActionFailure(ImaginaryFileUrl(action.file_url))]
xpath, frame = await validate_actions_in_dom(action, page, scraped_page) dom = DomUtil(scraped_page=scraped_page, page=page)
skyvern_element = await dom.get_skyvern_element_by_id(action.element_id)
locator = skyvern_element.locator
file_path = await download_file(file_url) file_path = await download_file(file_url)
locator = resolve_locator(scraped_page, page, frame, xpath)
is_file_input = await is_file_input_element(locator) is_file_input = await is_file_input_element(locator)
if is_file_input: if is_file_input:
LOG.info("Taking UploadFileAction. Found file input tag", action=action) LOG.info("Taking UploadFileAction. Found file input tag", action=action)
if file_path: if file_path:
locator = resolve_locator(scraped_page, page, frame, xpath)
await locator.set_input_files( await locator.set_input_files(
file_path, file_path,
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS, timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
@@ -356,8 +350,7 @@ async def handle_upload_file_action(
scraped_page, scraped_page,
page, page,
action, action,
xpath, skyvern_element,
frame,
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS, timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
) )
@@ -370,7 +363,9 @@ async def handle_download_file_action(
task: Task, task: Task,
step: Step, step: Step,
) -> list[ActionResult]: ) -> list[ActionResult]:
xpath, frame = await validate_actions_in_dom(action, page, scraped_page) dom = DomUtil(scraped_page=scraped_page, page=page)
skyvern_element = await dom.get_skyvern_element_by_id(action.element_id)
file_name = f"{action.file_name or uuid.uuid4()}" file_name = f"{action.file_name or uuid.uuid4()}"
full_file_path = f"{REPO_ROOT_DIR}/downloads/{task.workflow_run_id or task.task_id}/{file_name}" full_file_path = f"{REPO_ROOT_DIR}/downloads/{task.workflow_run_id or task.task_id}/{file_name}"
try: try:
@@ -378,8 +373,7 @@ async def handle_download_file_action(
async with page.expect_download() as download_info: async with page.expect_download() as download_info:
await asyncio.sleep(0.3) await asyncio.sleep(0.3)
locator = resolve_locator(scraped_page, page, frame, xpath) locator = skyvern_element.locator
await locator.click( await locator.click(
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS, timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
modifiers=["Alt"], modifiers=["Alt"],
@@ -422,12 +416,9 @@ async def handle_select_option_action(
) -> list[ActionResult]: ) -> list[ActionResult]:
dom = DomUtil(scraped_page, page) dom = DomUtil(scraped_page, page)
skyvern_element = await dom.get_skyvern_element_by_id(action.element_id) skyvern_element = await dom.get_skyvern_element_by_id(action.element_id)
locator = skyvern_element.locator
xpath, frame = await validate_actions_in_dom(action, page, scraped_page) tag_name = skyvern_element.get_tag_name()
locator = resolve_locator(scraped_page, page, frame, xpath)
tag_name = await get_tag_name_lowercase(locator)
element_dict = scraped_page.id_to_element_dict[action.element_id] element_dict = scraped_page.id_to_element_dict[action.element_id]
LOG.info( LOG.info(
"SelectOptionAction", "SelectOptionAction",
@@ -574,6 +565,7 @@ async def handle_select_option_action(
return result return result
elif tag_name == "ul" or tag_name == "div" or tag_name == "li": elif tag_name == "ul" or tag_name == "div" or tag_name == "li":
# DEPRECATED: This was used for handle select2 dropdown, and isn't used anymore.
# if the role is listbox, find the option with the "label" or "value" and click that option element # if the role is listbox, find the option with the "label" or "value" and click that option element
# references: # references:
# https://developer.mozilla.org/en-US/docs/Web/Accessibility/ARIA/Roles/listbox_role # https://developer.mozilla.org/en-US/docs/Web/Accessibility/ARIA/Roles/listbox_role
@@ -603,7 +595,7 @@ async def handle_select_option_action(
) )
# click the option element # click the option element
click_action = ClickAction(element_id=action.element_id) click_action = ClickAction(element_id=action.element_id)
return await chain_click(task, scraped_page, page, click_action, xpath, frame) return await chain_click(task, scraped_page, page, click_action, skyvern_element)
else: else:
LOG.error( LOG.error(
"SelectOptionAction on a non-listbox element. Cannot handle this action", "SelectOptionAction on a non-listbox element. Cannot handle this action",
@@ -622,7 +614,7 @@ async def handle_select_option_action(
action=action, action=action,
) )
click_action = ClickAction(element_id=action.element_id) click_action = ClickAction(element_id=action.element_id)
return await chain_click(task, scraped_page, page, click_action, xpath, frame) return await chain_click(task, scraped_page, page, click_action, skyvern_element)
try: try:
current_text = await locator.input_value() current_text = await locator.input_value()
@@ -631,7 +623,7 @@ async def handle_select_option_action(
except Exception: except Exception:
LOG.info("failed to confirm if the select option has been done, force to take the action again.") LOG.info("failed to confirm if the select option has been done, force to take the action again.")
return await normal_select(action=action, skyvern_element=skyvern_element, xpath=xpath, frame=frame) return await normal_select(action=action, skyvern_element=skyvern_element)
async def handle_checkbox_action( async def handle_checkbox_action(
@@ -648,9 +640,10 @@ async def handle_checkbox_action(
Treating checkbox actions as click actions seem to perform way more reliably Treating checkbox actions as click actions seem to perform way more reliably
Developers who tried this and failed: 2 (Suchintan and Shu 😂) Developers who tried this and failed: 2 (Suchintan and Shu 😂)
""" """
xpath, frame = await validate_actions_in_dom(action, page, scraped_page)
locator = resolve_locator(scraped_page, page, frame, xpath) dom = DomUtil(scraped_page=scraped_page, page=page)
skyvern_element = await dom.get_skyvern_element_by_id(action.element_id)
locator = skyvern_element.locator
if action.is_checked: if action.is_checked:
await locator.check(timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS) await locator.check(timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS)
@@ -732,48 +725,21 @@ def get_actual_value_of_parameter_if_secret(task: Task, parameter: str) -> Any:
return secret_value if secret_value is not None else parameter return secret_value if secret_value is not None else parameter
async def validate_actions_in_dom(action: WebAction, page: Page, scraped_page: ScrapedPage) -> tuple[str, str]:
xpath = scraped_page.id_to_xpath_dict[action.element_id]
frame = scraped_page.id_to_frame_dict[action.element_id]
locator = resolve_locator(scraped_page, page, frame, xpath)
num_elements = await locator.count()
if num_elements < 1:
LOG.warning(
"No elements found with action xpath. Validation failed.",
action=action,
xpath=xpath,
)
raise MissingElement(xpath=xpath, element_id=action.element_id)
elif num_elements > 1:
LOG.warning(
"Multiple elements found with action xpath. Expected 1. Validation failed.",
action=action,
num_elements=num_elements,
)
raise MultipleElementsFound(num=num_elements, xpath=xpath, element_id=action.element_id)
else:
LOG.info("Validated action xpath in DOM", action=action)
return xpath, frame
async def chain_click( async def chain_click(
task: Task, task: Task,
scraped_page: ScrapedPage, scraped_page: ScrapedPage,
page: Page, page: Page,
action: ClickAction | UploadFileAction, action: ClickAction | UploadFileAction,
xpath: str, skyvern_element: SkyvernElement,
frame: str,
timeout: int = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS, timeout: int = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
) -> List[ActionResult]: ) -> List[ActionResult]:
# Add a defensive page handler here in case a click action opens a file chooser. # Add a defensive page handler here in case a click action opens a file chooser.
# This automatically dismisses the dialog # This automatically dismisses the dialog
# File choosers are impossible to close if you don't expect one. Instead of dealing with it, close it! # File choosers are impossible to close if you don't expect one. Instead of dealing with it, close it!
locator = skyvern_element.locator
# TODO (suchintan): This should likely result in an ActionFailure -- we can figure out how to do this later! # TODO (suchintan): This should likely result in an ActionFailure -- we can figure out how to do this later!
LOG.info("Chain click starts", action=action, xpath=xpath) LOG.info("Chain click starts", action=action, locator=locator)
file: list[str] | str = [] file: list[str] | str = []
if action.file_url: if action.file_url:
file_url = get_actual_value_of_parameter_if_secret(task, action.file_url) file_url = get_actual_value_of_parameter_if_secret(task, action.file_url)
@@ -792,15 +758,14 @@ async def chain_click(
LOG.info("Registered file chooser listener", action=action, path=file) LOG.info("Registered file chooser listener", action=action, path=file)
""" """
Clicks on an element identified by the xpath and its parent if failed. Clicks on an element identified by the css and its parent if failed.
:param xpath: xpath of the element to click :param css: css of the element to click
""" """
javascript_triggered = await is_javascript_triggered(scraped_page, page, frame, xpath) javascript_triggered = await is_javascript_triggered(scraped_page, page, locator)
locator = resolve_locator(scraped_page, page, frame, xpath)
try: try:
await locator.click(timeout=timeout) await locator.click(timeout=timeout)
LOG.info("Chain click: main element click succeeded", action=action, xpath=xpath) LOG.info("Chain click: main element click succeeded", action=action, locator=locator)
return [ return [
ActionSuccess( ActionSuccess(
javascript_triggered=javascript_triggered, javascript_triggered=javascript_triggered,
@@ -818,25 +783,25 @@ async def chain_click(
LOG.info( LOG.info(
"Chain click: it's an input element. going to try sibling click", "Chain click: it's an input element. going to try sibling click",
action=action, action=action,
xpath=xpath, locator=locator,
) )
sibling_action_result = await click_sibling_of_input(locator, timeout=timeout) sibling_action_result = await click_sibling_of_input(locator, timeout=timeout)
action_results.append(sibling_action_result) action_results.append(sibling_action_result)
if type(sibling_action_result) == ActionSuccess: if type(sibling_action_result) == ActionSuccess:
return action_results return action_results
parent_xpath = f"{xpath}/.."
try: try:
parent_javascript_triggered = await is_javascript_triggered(scraped_page, page, frame, parent_xpath) parent_locator = locator.locator("..")
parent_javascript_triggered = await is_javascript_triggered(scraped_page, page, parent_locator)
javascript_triggered = javascript_triggered or parent_javascript_triggered javascript_triggered = javascript_triggered or parent_javascript_triggered
parent_locator = resolve_locator(scraped_page, page, frame, xpath).locator("..")
await parent_locator.click(timeout=timeout) await parent_locator.click(timeout=timeout)
LOG.info( LOG.info(
"Chain click: successfully clicked parent element", "Chain click: successfully clicked parent element",
action=action, action=action,
parent_xpath=parent_xpath, parent_locator=parent_locator,
) )
action_results.append( action_results.append(
ActionSuccess( ActionSuccess(
@@ -848,7 +813,7 @@ async def chain_click(
LOG.warning( LOG.warning(
"Failed to click parent element", "Failed to click parent element",
action=action, action=action,
parent_xpath=parent_xpath, parent_locator=parent_locator,
exc_info=True, exc_info=True,
) )
action_results.append( action_results.append(
@@ -875,8 +840,6 @@ async def chain_click(
async def normal_select( async def normal_select(
action: actions.SelectOptionAction, action: actions.SelectOptionAction,
skyvern_element: SkyvernElement, skyvern_element: SkyvernElement,
xpath: str,
frame: str,
) -> List[ActionResult]: ) -> List[ActionResult]:
action_result: List[ActionResult] = [] action_result: List[ActionResult] = []
is_success = False is_success = False
@@ -891,8 +854,7 @@ async def normal_select(
"Failed to click before select action", "Failed to click before select action",
exc_info=True, exc_info=True,
action=action, action=action,
xpath=xpath, locator=locator,
frame=frame,
) )
action_result.append(ActionFailure(e)) action_result.append(ActionFailure(e))
return action_result return action_result
@@ -912,8 +874,7 @@ async def normal_select(
"Failed to take select action by label", "Failed to take select action by label",
exc_info=True, exc_info=True,
action=action, action=action,
xpath=xpath, locator=locator,
frame=frame,
) )
if not is_success and action.option.value is not None: if not is_success and action.option.value is not None:
@@ -931,8 +892,7 @@ async def normal_select(
"Failed to take select action by value", "Failed to take select action by value",
exc_info=True, exc_info=True,
action=action, action=action,
xpath=xpath, locator=locator,
frame=frame,
) )
if not is_success and action.option.index is not None: if not is_success and action.option.index is not None:
@@ -941,12 +901,11 @@ async def normal_select(
LOG.error( LOG.error(
"option index is out of bound", "option index is out of bound",
action=action, action=action,
xpath=xpath, locator=locator,
frame=frame,
) )
else: else:
try: try:
# This means the supplied index was for the select element, not a reference to the xpath dict # This means the supplied index was for the select element, not a reference to the css dict
await locator.select_option( await locator.select_option(
index=action.option.index, index=action.option.index,
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS, timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
@@ -959,8 +918,7 @@ async def normal_select(
"Failed to click on the option by index", "Failed to click on the option by index",
exc_info=True, exc_info=True,
action=action, action=action,
xpath=xpath, locator=locator,
frame=frame,
) )
try: try:
@@ -972,8 +930,7 @@ async def normal_select(
"Failed to click after select action", "Failed to click after select action",
exc_info=True, exc_info=True,
action=action, action=action,
xpath=xpath, locator=locator,
frame=frame,
) )
action_result.append(ActionFailure(e)) action_result.append(ActionFailure(e))
return action_result return action_result
@@ -993,7 +950,7 @@ def get_anchor_to_click(scraped_page: ScrapedPage, element_id: str) -> str | Non
if "id" in ele and ele["id"] == element_id: if "id" in ele and ele["id"] == element_id:
for child in ele["children"]: for child in ele["children"]:
if "tagName" in child and child["tagName"] == "a": if "tagName" in child and child["tagName"] == "a":
return scraped_page.id_to_xpath_dict[child["id"]] return scraped_page.id_to_css_dict[child["id"]]
return None return None
@@ -1029,8 +986,8 @@ def get_checkbox_id_in_label_children(scraped_page: ScrapedPage, element_id: str
return None return None
async def is_javascript_triggered(scraped_page: ScrapedPage, page: Page, frame: str, xpath: str) -> bool: @deprecated("This function is deprecated. It was used for select2 dropdown, but we don't use it anymore.")
locator = resolve_locator(scraped_page, page, frame, xpath) async def is_javascript_triggered(scraped_page: ScrapedPage, page: Page, locator: Locator) -> bool:
element = locator.first element = locator.first
tag_name = await element.evaluate("e => e.tagName") tag_name = await element.evaluate("e => e.tagName")
@@ -1077,12 +1034,12 @@ async def click_sibling_of_input(
parent_locator = locator.locator("..") parent_locator = locator.locator("..")
if input_element: if input_element:
input_id = await input_element.get_attribute("id") input_id = await input_element.get_attribute("id")
sibling_label_xpath = f'//label[@for="{input_id}"]' sibling_label_css = f'label[for="{input_id}"]'
label_locator = parent_locator.locator(sibling_label_xpath) label_locator = parent_locator.locator(sibling_label_css)
await label_locator.click(timeout=timeout) await label_locator.click(timeout=timeout)
LOG.info( LOG.info(
"Successfully clicked sibling label of input element", "Successfully clicked sibling label of input element",
sibling_label_xpath=sibling_label_xpath, sibling_label_css=sibling_label_css,
) )
return ActionSuccess(javascript_triggered=javascript_triggered, interacted_with_sibling=True) return ActionSuccess(javascript_triggered=javascript_triggered, interacted_with_sibling=True)
# Should never get here # Should never get here
@@ -1154,12 +1111,10 @@ async def click_listbox_option(
LOG.info("found option", element_id=child["id"]) LOG.info("found option", element_id=child["id"])
text = child["text"] if "text" in child else "" text = child["text"] if "text" in child else ""
if text and (text == action.option.label or text == action.option.value): if text and (text == action.option.label or text == action.option.value):
option_xpath = scraped_page.id_to_xpath_dict[child["id"]] dom = DomUtil(scraped_page=scraped_page, page=page)
option_frame = scraped_page.id_to_frame_dict[child["id"]]
try: try:
locator = resolve_locator(scraped_page, page, option_frame, option_xpath) skyvern_element = await dom.get_skyvern_element_by_id(child["id"])
locator = skyvern_element.locator
await locator.click(timeout=1000) await locator.click(timeout=1000)
return True return True
@@ -1167,7 +1122,7 @@ async def click_listbox_option(
LOG.error( LOG.error(
"Failed to click on the option", "Failed to click on the option",
action=action, action=action,
option_xpath=option_xpath, locator=locator,
exc_info=True, exc_info=True,
) )
if "children" in child: if "children" in child:

View File

@@ -111,7 +111,7 @@ class ScrapedPage(BaseModel):
""" """
Scraped response from a webpage, including: Scraped response from a webpage, including:
1. List of elements 1. List of elements
2. ID to xpath map 2. ID to css map
3. The element tree of the page (list of dicts). Each element has children and attributes. 3. The element tree of the page (list of dicts). Each element has children and attributes.
4. The screenshot (base64 encoded) 4. The screenshot (base64 encoded)
5. The URL of the page 5. The URL of the page
@@ -122,7 +122,7 @@ class ScrapedPage(BaseModel):
elements: list[dict] elements: list[dict]
id_to_element_dict: dict[str, dict] = {} id_to_element_dict: dict[str, dict] = {}
id_to_frame_dict: dict[str, str] = {} id_to_frame_dict: dict[str, str] = {}
id_to_xpath_dict: dict[str, str] id_to_css_dict: dict[str, str]
element_tree: list[dict] element_tree: list[dict]
element_tree_trimmed: list[dict] element_tree_trimmed: list[dict]
screenshots: list[bytes] screenshots: list[bytes]
@@ -276,14 +276,14 @@ async def scrape_web_unsafe(
_build_element_links(elements) _build_element_links(elements)
id_to_xpath_dict = {} id_to_css_dict = {}
id_to_element_dict = {} id_to_element_dict = {}
id_to_frame_dict = {} id_to_frame_dict = {}
for element in elements: for element in elements:
element_id = element["id"] element_id = element["id"]
# get_interactable_element_tree marks each interactable element with a unique_id attribute # get_interactable_element_tree marks each interactable element with a unique_id attribute
id_to_xpath_dict[element_id] = f"//*[@{SKYVERN_ID_ATTR}='{element_id}']" id_to_css_dict[element_id] = f"[{SKYVERN_ID_ATTR}='{element_id}']"
id_to_element_dict[element_id] = element id_to_element_dict[element_id] = element
id_to_frame_dict[element_id] = element["frame"] id_to_frame_dict[element_id] = element["frame"]
@@ -301,7 +301,7 @@ async def scrape_web_unsafe(
return ScrapedPage( return ScrapedPage(
elements=elements, elements=elements,
id_to_xpath_dict=id_to_xpath_dict, id_to_css_dict=id_to_css_dict,
id_to_element_dict=id_to_element_dict, id_to_element_dict=id_to_element_dict,
id_to_frame_dict=id_to_frame_dict, id_to_frame_dict=id_to_frame_dict,
element_tree=element_tree, element_tree=element_tree,

View File

@@ -10,6 +10,7 @@ from skyvern.exceptions import (
ElementIsNotLabel, ElementIsNotLabel,
MissingElement, MissingElement,
MissingElementDict, MissingElementDict,
MissingElementInCSSMap,
MissingElementInIframe, MissingElementInIframe,
MultipleElementsFound, MultipleElementsFound,
SkyvernException, SkyvernException,
@@ -20,7 +21,7 @@ from skyvern.webeye.scraper.scraper import ScrapedPage, get_select2_options
LOG = structlog.get_logger() LOG = structlog.get_logger()
def resolve_locator(scrape_page: ScrapedPage, page: Page, frame: str, xpath: str) -> Locator: def resolve_locator(scrape_page: ScrapedPage, page: Page, frame: str, css: str) -> Locator:
iframe_path: list[str] = [] iframe_path: list[str] = []
while frame != "main.frame": while frame != "main.frame":
@@ -42,7 +43,7 @@ def resolve_locator(scrape_page: ScrapedPage, page: Page, frame: str, xpath: str
child_frame = iframe_path.pop() child_frame = iframe_path.pop()
current_page = current_page.frame_locator(f"[{SKYVERN_ID_ATTR}='{child_frame}']") current_page = current_page.frame_locator(f"[{SKYVERN_ID_ATTR}='{child_frame}']")
return current_page.locator(f"xpath={xpath}") return current_page.locator(css)
class InteractiveElement(StrEnum): class InteractiveElement(StrEnum):
@@ -161,21 +162,25 @@ class DomUtil:
if not frame: if not frame:
raise MissingElementInIframe(element_id) raise MissingElementInIframe(element_id)
xpath = self.scraped_page.id_to_xpath_dict[element_id] css = self.scraped_page.id_to_css_dict.get(element_id)
if not css:
raise MissingElementInCSSMap(element_id)
locator = resolve_locator(self.scraped_page, self.page, frame, xpath) locator = resolve_locator(self.scraped_page, self.page, frame, css)
num_elements = await locator.count() num_elements = await locator.count()
if num_elements < 1: if num_elements < 1:
LOG.warning("No elements found with xpath. Validation failed.", xpath=xpath) LOG.warning("No elements found with css. Validation failed.", css=css, element_id=element_id)
raise MissingElement(xpath=xpath, element_id=element_id) raise MissingElement(selector=css, element_id=element_id)
elif num_elements > 1: elif num_elements > 1:
LOG.warning( LOG.warning(
"Multiple elements found with xpath. Expected 1. Validation failed.", "Multiple elements found with css. Expected 1. Validation failed.",
num_elements=num_elements, num_elements=num_elements,
selector=css,
element_id=element_id,
) )
raise MultipleElementsFound(num=num_elements, xpath=xpath, element_id=element_id) raise MultipleElementsFound(num=num_elements, selector=css, element_id=element_id)
return SkyvernElement(locator, element) return SkyvernElement(locator, element)