Add download flag to click action, update dom logic (#310)

This commit is contained in:
Kerem Yilmaz
2024-05-13 22:18:42 -07:00
committed by GitHub
parent a4ed6de34c
commit 9f1c679e09
4 changed files with 31 additions and 5 deletions

View File

@@ -16,6 +16,7 @@ Reply in JSON format with the following keys:
"id": int, // The id of the element to take action on. The id has to be one from the elements list
"text": str, // Text for INPUT_TEXT action only
"file_url": str, // The url of the file to upload if applicable. This field must be present for UPLOAD_FILE but can also be present for CLICK only if the click is to upload the file. It should be null otherwise.
"download": bool, // Can only be true for CLICK actions. If true, the browser will trigger a download by clicking the element. If false, the browser will click the element without triggering a download.
"option": { // The option to select for SELECT_OPTION action only. null if not SELECT_OPTION action
"label": str, // the label of the option if any. MAKE SURE YOU USE THIS LABEL TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION LABEL HERE
"index": int, // the id corresponding to the optionIndex under the the select element.

View File

@@ -52,9 +52,7 @@ class DecisiveAction(Action, abc.ABC):
class ClickAction(WebAction):
action_type: ActionType = ActionType.CLICK
file_url: str | None = None
def __repr__(self) -> str:
return f"ClickAction(element_id={self.element_id}, file_url={self.file_url})"
download: bool = False
class InputTextAction(WebAction):
@@ -162,7 +160,14 @@ def parse_actions(task: Task, json_response: List[Dict[str, Any]]) -> List[Actio
actions.append(TerminateAction(reasoning=reasoning, errors=action["errors"] if "errors" in action else []))
elif action_type == ActionType.CLICK:
file_url = action["file_url"] if "file_url" in action else None
actions.append(ClickAction(element_id=element_id, reasoning=reasoning, file_url=file_url))
actions.append(
ClickAction(
element_id=element_id,
reasoning=reasoning,
file_url=file_url,
download=action.get("download", False),
)
)
elif action_type == ActionType.INPUT_TEXT:
actions.append(InputTextAction(element_id=element_id, text=action["text"], reasoning=reasoning))
elif action_type == ActionType.UPLOAD_FILE:

View File

@@ -142,11 +142,30 @@ async def handle_click_action(
) -> list[ActionResult]:
xpath = await validate_actions_in_dom(action, page, scraped_page)
await asyncio.sleep(0.3)
if action.download:
return await handle_click_to_download_file_action(action, page, scraped_page)
return await chain_click(
task, page, action, xpath, timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
)
async def handle_click_to_download_file_action(
action: actions.ClickAction,
page: Page,
scraped_page: ScrapedPage,
) -> list[ActionResult]:
xpath = await validate_actions_in_dom(action, page, scraped_page)
try:
await page.click(
f"xpath={xpath}", timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS, modifiers=["Alt"]
)
except Exception as e:
LOG.exception("ClickAction with download failed", action=action, exc_info=True)
return [ActionFailure(e, download_triggered=False)]
return [ActionSuccess()]
async def handle_input_text_action(
action: actions.InputTextAction, page: Page, scraped_page: ScrapedPage, task: Task, step: Step
) -> list[ActionResult]:

View File

@@ -26,6 +26,7 @@ RESERVED_ATTRIBUTES = {
"aria-role",
"aria-selected", # for option tag
"checked",
"data-original-title", # for bootstrap tooltip
"data-ui",
"for",
"href", # For a tags
@@ -373,7 +374,7 @@ def _trimmed_attributes(tag_name: str, attributes: dict) -> dict:
new_attributes[key] = attributes[key]
if key == "role" and attributes[key] in ["listbox", "option"]:
new_attributes[key] = attributes[key]
if key in RESERVED_ATTRIBUTES:
if key in RESERVED_ATTRIBUTES and attributes[key]:
new_attributes[key] = attributes[key]
return new_attributes