CUA feature - support drag action (#2160)
This commit is contained in:
@@ -32,6 +32,7 @@ class ActionType(StrEnum):
|
||||
KEYPRESS = "keypress"
|
||||
TYPE = "type"
|
||||
MOVE = "move"
|
||||
DRAG = "drag"
|
||||
|
||||
def is_web_action(self) -> bool:
|
||||
return self in [
|
||||
@@ -285,6 +286,13 @@ class MoveAction(Action):
|
||||
y: int
|
||||
|
||||
|
||||
class DragAction(Action):
|
||||
action_type: ActionType = ActionType.DRAG
|
||||
start_x: int
|
||||
start_y: int
|
||||
path: list[tuple[int, int]] = []
|
||||
|
||||
|
||||
class ScrapeResult(BaseModel):
|
||||
"""
|
||||
Scraped response from a webpage, including:
|
||||
|
||||
@@ -1543,6 +1543,22 @@ async def handle_move_action(
|
||||
return [ActionSuccess()]
|
||||
|
||||
|
||||
async def handle_drag_action(
|
||||
action: actions.DragAction,
|
||||
page: Page,
|
||||
scraped_page: ScrapedPage,
|
||||
task: Task,
|
||||
step: Step,
|
||||
) -> list[ActionResult]:
|
||||
await page.mouse.move(action.start_x, action.start_y)
|
||||
await page.mouse.down()
|
||||
for point in action.path:
|
||||
x, y = point[0], point[1]
|
||||
await page.mouse.move(x, y)
|
||||
await page.mouse.up()
|
||||
return [ActionSuccess()]
|
||||
|
||||
|
||||
ActionHandler.register_action_type(ActionType.SOLVE_CAPTCHA, handle_solve_captcha_action)
|
||||
ActionHandler.register_action_type(ActionType.CLICK, handle_click_action)
|
||||
ActionHandler.register_action_type(ActionType.INPUT_TEXT, handle_input_text_action)
|
||||
@@ -1557,6 +1573,7 @@ ActionHandler.register_action_type(ActionType.EXTRACT, handle_extract_action)
|
||||
ActionHandler.register_action_type(ActionType.SCROLL, handle_scroll_action)
|
||||
ActionHandler.register_action_type(ActionType.KEYPRESS, handle_keypress_action)
|
||||
ActionHandler.register_action_type(ActionType.MOVE, handle_move_action)
|
||||
ActionHandler.register_action_type(ActionType.DRAG, handle_drag_action)
|
||||
|
||||
|
||||
async def get_actual_value_of_parameter_if_secret(task: Task, parameter: str) -> Any:
|
||||
|
||||
@@ -16,6 +16,7 @@ from skyvern.webeye.actions.actions import (
|
||||
ClickAction,
|
||||
CompleteAction,
|
||||
DownloadFileAction,
|
||||
DragAction,
|
||||
InputTextAction,
|
||||
KeypressAction,
|
||||
MoveAction,
|
||||
@@ -290,6 +291,32 @@ async def parse_cua_actions(
|
||||
reasoning=reasoning,
|
||||
intention=reasoning,
|
||||
)
|
||||
case "drag":
|
||||
whole_path = cua_action.path
|
||||
if not whole_path or len(whole_path) < 2:
|
||||
LOG.warning(
|
||||
"Invalid drag action",
|
||||
task_id=task.task_id,
|
||||
step_id=step.step_id,
|
||||
step_order=step.order,
|
||||
action_order=idx,
|
||||
whole_path=whole_path,
|
||||
)
|
||||
action = WaitAction(
|
||||
seconds=5,
|
||||
reasoning=reasoning,
|
||||
intention=reasoning,
|
||||
)
|
||||
else:
|
||||
start_x, start_y = whole_path[0][0], whole_path[0][1]
|
||||
reasoning = reasoning or f"Drag action path: {whole_path}"
|
||||
action = DragAction(
|
||||
start_x=start_x,
|
||||
start_y=start_y,
|
||||
path=whole_path[1:],
|
||||
reasoning=reasoning,
|
||||
intention=reasoning,
|
||||
)
|
||||
case _:
|
||||
raise ValueError(f"Unsupported action type: {action_type}")
|
||||
action.organization_id = task.organization_id
|
||||
|
||||
Reference in New Issue
Block a user