anthropic CUA - support double and triple clicks (#2264)
This commit is contained in:
@@ -189,6 +189,8 @@ class ClickAction(WebAction):
|
||||
x: int | None = None
|
||||
y: int | None = None
|
||||
button: str = "left"
|
||||
# normal click: 1, double click: 2, triple click: 3
|
||||
repeat: int = 1
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"ClickAction(element_id={self.element_id}, file_url={self.file_url}, download={self.download}, x={self.x}, y={self.y}, button={self.button}, tool_call_id={self.tool_call_id})"
|
||||
|
||||
@@ -505,7 +505,15 @@ async def handle_click_action(
|
||||
)
|
||||
LOG.info("Clicked element at location", x=action.x, y=action.y, element_id=element_id, button=action.button)
|
||||
|
||||
await page.mouse.click(x=action.x, y=action.y, button=action.button)
|
||||
if action.repeat == 1:
|
||||
await page.mouse.click(x=action.x, y=action.y, button=action.button)
|
||||
elif action.repeat == 2:
|
||||
await page.mouse.dblclick(x=action.x, y=action.y, button=action.button)
|
||||
elif action.repeat == 3:
|
||||
await page.mouse.click(x=action.x, y=action.y, button=action.button, click_count=3)
|
||||
else:
|
||||
raise ValueError(f"Invalid repeat value: {action.repeat}")
|
||||
|
||||
return [ActionSuccess()]
|
||||
|
||||
dom = DomUtil(scraped_page=scraped_page, page=page)
|
||||
|
||||
@@ -420,7 +420,7 @@ async def parse_anthropic_actions(
|
||||
tool_call_id=tool_call_id,
|
||||
)
|
||||
)
|
||||
elif action == "left_click":
|
||||
elif action in ["left_click", "double_click", "triple_click"]:
|
||||
coordinate = tool_call_input.get("coordinate")
|
||||
if not coordinate and idx - 1 >= 0:
|
||||
prev_tool_call = tool_calls[idx - 1]
|
||||
@@ -438,6 +438,12 @@ async def parse_anthropic_actions(
|
||||
x, y = validate_and_get_coordinates(
|
||||
coordinate, screenshot_resize_target_dimension, browser_window_dimension
|
||||
)
|
||||
repeat = 1
|
||||
if action == "double_click":
|
||||
repeat = 2
|
||||
elif action == "triple_click":
|
||||
repeat = 3
|
||||
|
||||
response = f"Click at: ({x}, {y})"
|
||||
reasoning = reasoning or response
|
||||
actions.append(
|
||||
@@ -446,6 +452,7 @@ async def parse_anthropic_actions(
|
||||
x=x,
|
||||
y=y,
|
||||
button="left",
|
||||
repeat=repeat,
|
||||
reasoning=reasoning,
|
||||
intention=reasoning,
|
||||
response=response,
|
||||
|
||||
Reference in New Issue
Block a user