Ykeremy/click instead of download (#275)

This commit is contained in:
Kerem Yilmaz
2024-05-07 23:54:07 -07:00
committed by GitHub
parent 2788b53a0c
commit cc91c1b2b6
5 changed files with 42 additions and 12 deletions

View File

@@ -3,6 +3,7 @@ from enum import StrEnum
from typing import Any, Dict, List
import structlog
from deprecation import deprecated
from pydantic import BaseModel, Field
from skyvern.forge.sdk.schemas.tasks import Task
@@ -14,7 +15,10 @@ class ActionType(StrEnum):
CLICK = "click"
INPUT_TEXT = "input_text"
UPLOAD_FILE = "upload_file"
# This action is not used in the current implementation. Click actions are used instead."
DOWNLOAD_FILE = "download_file"
SELECT_OPTION = "select_option"
CHECKBOX = "checkbox"
WAIT = "wait"
@@ -70,6 +74,7 @@ class UploadFileAction(WebAction):
return f"UploadFileAction(element_id={self.element_id}, file={self.file_url}, is_upload_file_tag={self.is_upload_file_tag})"
@deprecated("This action is not used in the current implementation. Click actions are used instead.")
class DownloadFileAction(WebAction):
action_type: ActionType = ActionType.DOWNLOAD_FILE
file_name: str
@@ -158,6 +163,7 @@ def parse_actions(task: Task, json_response: List[Dict[str, Any]]) -> List[Actio
# TODO: see if the element is a file input element. if it's not, convert this action into a click action
actions.append(UploadFileAction(element_id=element_id, file_url=action["file_url"], reasoning=reasoning))
# This action is not used in the current implementation. Click actions are used instead.
elif action_type == ActionType.DOWNLOAD_FILE:
actions.append(
DownloadFileAction(element_id=element_id, file_name=action["file_name"], reasoning=reasoning)
@@ -214,7 +220,8 @@ ActionTypeUnion = (
ClickAction
| InputTextAction
| UploadFileAction
| DownloadFileAction
# Deprecated
# | DownloadFileAction
| SelectOptionAction
| CheckboxAction
| WaitAction

View File

@@ -6,9 +6,10 @@ import uuid
from typing import Any, Awaitable, Callable, List
import structlog
from deprecation import deprecated
from playwright.async_api import Locator, Page
from skyvern.constants import SKYVERN_DIR
from skyvern.constants import REPO_ROOT_DIR
from skyvern.exceptions import ImaginaryFileUrl, MissingElement, MissingFileUrl, MultipleElementsFound
from skyvern.forge import app
from skyvern.forge.prompts import prompt_engine
@@ -205,12 +206,13 @@ async def handle_upload_file_action(
)
@deprecated("This function is deprecated. Downloads are handled by the click action handler now.")
async def handle_download_file_action(
action: actions.DownloadFileAction, page: Page, scraped_page: ScrapedPage, task: Task, step: Step
) -> list[ActionResult]:
xpath = await validate_actions_in_dom(action, page, scraped_page)
file_name = f"{action.file_name or uuid.uuid4()}"
full_file_path = f"{SKYVERN_DIR}/downloads/{task.workflow_run_id or task.task_id}/{file_name}"
full_file_path = f"{REPO_ROOT_DIR}/downloads/{task.workflow_run_id or task.task_id}/{file_name}"
try:
# Start waiting for the download
async with page.expect_download() as download_info:
@@ -222,7 +224,7 @@ async def handle_download_file_action(
download = await download_info.value
# Create download folders if they don't exist
download_folder = f"{SKYVERN_DIR}/downloads/{task.workflow_run_id or task.task_id}"
download_folder = f"{REPO_ROOT_DIR}/downloads/{task.workflow_run_id or task.task_id}"
os.makedirs(download_folder, exist_ok=True)
# Wait for the download process to complete and save the downloaded file
await download.save_as(full_file_path)
@@ -452,7 +454,7 @@ ActionHandler.register_action_type(ActionType.SOLVE_CAPTCHA, handle_solve_captch
ActionHandler.register_action_type(ActionType.CLICK, handle_click_action)
ActionHandler.register_action_type(ActionType.INPUT_TEXT, handle_input_text_action)
ActionHandler.register_action_type(ActionType.UPLOAD_FILE, handle_upload_file_action)
ActionHandler.register_action_type(ActionType.DOWNLOAD_FILE, handle_download_file_action)
# ActionHandler.register_action_type(ActionType.DOWNLOAD_FILE, handle_download_file_action)
ActionHandler.register_action_type(ActionType.NULL_ACTION, handle_null_action)
ActionHandler.register_action_type(ActionType.SELECT_OPTION, handle_select_option_action)
ActionHandler.register_action_type(ActionType.WAIT, handle_wait_action)
@@ -525,8 +527,18 @@ async def chain_click(
fc_func = lambda fc: fc.set_files(files=file)
page.on("filechooser", fc_func)
LOG.info("Registered file chooser listener", action=action, path=file)
# If a download is triggered due to the click, we need to let LLM know in action_results
download_triggered = False
def download_func(download: Any) -> None:
nonlocal download_triggered
download_triggered = True
page.on("download", download_func)
LOG.info("Registered download listener", action=action)
"""
Clicks on an element identified by the xpath and its parent if failed.
:param xpath: xpath of the element to click
@@ -535,12 +547,15 @@ async def chain_click(
try:
await page.click(f"xpath={xpath}", timeout=timeout)
LOG.info("Chain click: main element click succeeded", action=action, xpath=xpath)
return [ActionSuccess(javascript_triggered=javascript_triggered)]
return [ActionSuccess(javascript_triggered=javascript_triggered, download_triggered=download_triggered)]
except Exception as e:
action_results: list[ActionResult] = [ActionFailure(e, javascript_triggered=javascript_triggered)]
action_results: list[ActionResult] = [
ActionFailure(e, javascript_triggered=javascript_triggered, download_triggered=download_triggered)
]
if await is_input_element(page.locator(xpath)):
LOG.info("Chain click: it's an input element. going to try sibling click", action=action, xpath=xpath)
sibling_action_result = await click_sibling_of_input(page.locator(xpath), timeout=timeout)
sibling_action_result.download_triggered = download_triggered
action_results.append(sibling_action_result)
if type(sibling_action_result) == ActionSuccess:
return action_results
@@ -556,6 +571,7 @@ async def chain_click(
ActionSuccess(
javascript_triggered=javascript_triggered,
interacted_with_parent=True,
download_triggered=download_triggered,
)
)
except Exception as pe:
@@ -575,6 +591,7 @@ async def chain_click(
if file:
await asyncio.sleep(10)
page.remove_listener("filechooser", fc_func)
page.remove_listener("download", download_func)
def get_anchor_to_click(scraped_page: ScrapedPage, element_id: int) -> str | None:

View File

@@ -13,6 +13,7 @@ class ActionResult(BaseModel):
step_retry_number: int | None = None
step_order: int | None = None
javascript_triggered: bool = False
download_triggered: bool | None = None
# None is used for old data so that we can differentiate between old and new data which only has boolean
interacted_with_sibling: bool | None = None
interacted_with_parent: bool | None = None
@@ -32,6 +33,7 @@ class ActionSuccess(ActionResult):
self,
data: dict[str, Any] | list | str | None = None,
javascript_triggered: bool = False,
download_triggered: bool | None = None,
interacted_with_sibling: bool = False,
interacted_with_parent: bool = False,
):
@@ -39,6 +41,7 @@ class ActionSuccess(ActionResult):
success=True,
data=data,
javascript_triggered=javascript_triggered,
download_triggered=download_triggered,
interacted_with_sibling=interacted_with_sibling,
interacted_with_parent=interacted_with_parent,
)
@@ -49,6 +52,7 @@ class ActionFailure(ActionResult):
self,
exception: Exception,
javascript_triggered: bool = False,
download_triggered: bool | None = None,
interacted_with_sibling: bool = False,
interacted_with_parent: bool = False,
):
@@ -57,6 +61,7 @@ class ActionFailure(ActionResult):
exception_type=type(exception).__name__,
exception_message=remove_whitespace(str(exception)),
javascript_triggered=javascript_triggered,
download_triggered=download_triggered,
interacted_with_sibling=interacted_with_sibling,
interacted_with_parent=interacted_with_parent,
)
@@ -68,12 +73,14 @@ class ActionAbort(ActionResult):
def __init__(
self,
javascript_triggered: bool = False,
download_triggered: bool | None = None,
interacted_with_sibling: bool = False,
interacted_with_parent: bool = False,
):
super().__init__(
success=True,
javascript_triggered=javascript_triggered,
download_triggered=download_triggered,
interacted_with_sibling=interacted_with_sibling,
interacted_with_parent=interacted_with_parent,
)