anthropic CUA - support double and triple clicks (#2264)
This commit is contained in:
@@ -189,6 +189,8 @@ class ClickAction(WebAction):
|
|||||||
x: int | None = None
|
x: int | None = None
|
||||||
y: int | None = None
|
y: int | None = None
|
||||||
button: str = "left"
|
button: str = "left"
|
||||||
|
# normal click: 1, double click: 2, triple click: 3
|
||||||
|
repeat: int = 1
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
return f"ClickAction(element_id={self.element_id}, file_url={self.file_url}, download={self.download}, x={self.x}, y={self.y}, button={self.button}, tool_call_id={self.tool_call_id})"
|
return f"ClickAction(element_id={self.element_id}, file_url={self.file_url}, download={self.download}, x={self.x}, y={self.y}, button={self.button}, tool_call_id={self.tool_call_id})"
|
||||||
|
|||||||
@@ -505,7 +505,15 @@ async def handle_click_action(
|
|||||||
)
|
)
|
||||||
LOG.info("Clicked element at location", x=action.x, y=action.y, element_id=element_id, button=action.button)
|
LOG.info("Clicked element at location", x=action.x, y=action.y, element_id=element_id, button=action.button)
|
||||||
|
|
||||||
await page.mouse.click(x=action.x, y=action.y, button=action.button)
|
if action.repeat == 1:
|
||||||
|
await page.mouse.click(x=action.x, y=action.y, button=action.button)
|
||||||
|
elif action.repeat == 2:
|
||||||
|
await page.mouse.dblclick(x=action.x, y=action.y, button=action.button)
|
||||||
|
elif action.repeat == 3:
|
||||||
|
await page.mouse.click(x=action.x, y=action.y, button=action.button, click_count=3)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Invalid repeat value: {action.repeat}")
|
||||||
|
|
||||||
return [ActionSuccess()]
|
return [ActionSuccess()]
|
||||||
|
|
||||||
dom = DomUtil(scraped_page=scraped_page, page=page)
|
dom = DomUtil(scraped_page=scraped_page, page=page)
|
||||||
|
|||||||
@@ -420,7 +420,7 @@ async def parse_anthropic_actions(
|
|||||||
tool_call_id=tool_call_id,
|
tool_call_id=tool_call_id,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
elif action == "left_click":
|
elif action in ["left_click", "double_click", "triple_click"]:
|
||||||
coordinate = tool_call_input.get("coordinate")
|
coordinate = tool_call_input.get("coordinate")
|
||||||
if not coordinate and idx - 1 >= 0:
|
if not coordinate and idx - 1 >= 0:
|
||||||
prev_tool_call = tool_calls[idx - 1]
|
prev_tool_call = tool_calls[idx - 1]
|
||||||
@@ -438,6 +438,12 @@ async def parse_anthropic_actions(
|
|||||||
x, y = validate_and_get_coordinates(
|
x, y = validate_and_get_coordinates(
|
||||||
coordinate, screenshot_resize_target_dimension, browser_window_dimension
|
coordinate, screenshot_resize_target_dimension, browser_window_dimension
|
||||||
)
|
)
|
||||||
|
repeat = 1
|
||||||
|
if action == "double_click":
|
||||||
|
repeat = 2
|
||||||
|
elif action == "triple_click":
|
||||||
|
repeat = 3
|
||||||
|
|
||||||
response = f"Click at: ({x}, {y})"
|
response = f"Click at: ({x}, {y})"
|
||||||
reasoning = reasoning or response
|
reasoning = reasoning or response
|
||||||
actions.append(
|
actions.append(
|
||||||
@@ -446,6 +452,7 @@ async def parse_anthropic_actions(
|
|||||||
x=x,
|
x=x,
|
||||||
y=y,
|
y=y,
|
||||||
button="left",
|
button="left",
|
||||||
|
repeat=repeat,
|
||||||
reasoning=reasoning,
|
reasoning=reasoning,
|
||||||
intention=reasoning,
|
intention=reasoning,
|
||||||
response=response,
|
response=response,
|
||||||
|
|||||||
Reference in New Issue
Block a user