Ykeremy/click instead of download (#275)
This commit is contained in:
@@ -12,11 +12,10 @@ Reply in JSON format with the following keys:
|
||||
[{
|
||||
"reasoning": str, // The reasoning behind the action. Be specific, referencing any user information and their fields and element ids in your reasoning. Mention why you chose the action type, and why you chose the element id. Keep the reasoning short and to the point.
|
||||
"confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
|
||||
"action_type": str, // It's a string enum: "CLICK", "INPUT_TEXT", "UPLOAD_FILE", "DOWNLOAD_FILE", "SELECT_OPTION", "WAIT", "SOLVE_CAPTCHA", "COMPLETE", "TERMINATE". "CLICK" is an element you'd like to click. "INPUT_TEXT" is an element you'd like to input text into. "UPLOAD_FILE" is an element you'd like to upload a file into. "DOWNLOAD_FILE" is an element you'd like to download a file from, and the file will be saved with the name provided in the "file_name" field. You can download multiple files in one action by returning multiple "DOWNLOAD_FILE" actions in one step. The "file_name" field should be unique for each file download action. Do not download the same file multiple times, check action history to see if the file has already been downloaded. "SELECT_OPTION" is an element you'd like to select an option from. "WAIT" action should be used if there are no actions to take and there is some indication on screen that waiting could yield more actions. "WAIT" should not be used if there are actions to take. "SOLVE_CAPTCHA" should be used if there's a captcha to solve on the screen. "COMPLETE" is used when the user goal has been achieved AND if there's any data extraction goal, you should be able to get data from the page. Never return a COMPLETE action unless the user goal is achieved. "TERMINATE" is used to terminate the whole task with a failure when it doesn't seem like the user goal can be achieved. Do not use "TERMINATE" if waiting could lead the user towards the goal. Only return "TERMINATE" if you are on a page where the user goal cannot be achieved. All other actions are ignored when "TERMINATE" is returned.
|
||||
"action_type": str, // It's a string enum: "CLICK", "INPUT_TEXT", "UPLOAD_FILE", "SELECT_OPTION", "WAIT", "SOLVE_CAPTCHA", "COMPLETE", "TERMINATE". "CLICK" is an element you'd like to click. "INPUT_TEXT" is an element you'd like to input text into. "UPLOAD_FILE" is an element you'd like to upload a file into. "SELECT_OPTION" is an element you'd like to select an option from. "WAIT" action should be used if there are no actions to take and there is some indication on screen that waiting could yield more actions. "WAIT" should not be used if there are actions to take. "SOLVE_CAPTCHA" should be used if there's a captcha to solve on the screen. "COMPLETE" is used when the user goal has been achieved AND if there's any data extraction goal, you should be able to get data from the page. Never return a COMPLETE action unless the user goal is achieved. "TERMINATE" is used to terminate the whole task with a failure when it doesn't seem like the user goal can be achieved. Do not use "TERMINATE" if waiting could lead the user towards the goal. Only return "TERMINATE" if you are on a page where the user goal cannot be achieved. All other actions are ignored when "TERMINATE" is returned.
|
||||
"id": int, // The id of the element to take action on. The id has to be one from the elements list
|
||||
"text": str, // Text for INPUT_TEXT action only
|
||||
"file_url": str, // The url of the file to upload if applicable. This field must be present for UPLOAD_FILE but can also be present for CLICK only if the click is to upload the file. It should be null otherwise.
|
||||
"file_name": str, // The name of the file to save the downloaded file as. This field must be present for DOWNLOAD_FILE only. It should be null otherwise.
|
||||
"option": { // The option to select for SELECT_OPTION action only. null if not SELECT_OPTION action
|
||||
"label": str, // the label of the option if any. MAKE SURE YOU USE THIS LABEL TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION LABEL HERE
|
||||
"index": int, // the id corresponding to the optionIndex under the the select element.
|
||||
|
||||
@@ -7,7 +7,7 @@ from urllib.parse import urlparse
|
||||
import aiohttp
|
||||
import structlog
|
||||
|
||||
from skyvern.constants import SKYVERN_DIR
|
||||
from skyvern.constants import REPO_ROOT_DIR
|
||||
from skyvern.exceptions import DownloadFileMaxSizeExceeded
|
||||
|
||||
LOG = structlog.get_logger()
|
||||
@@ -67,4 +67,4 @@ def zip_files(files_path: str, zip_file_path: str) -> str:
|
||||
|
||||
|
||||
def get_path_for_workflow_download_directory(workflow_run_id: str) -> Path:
|
||||
return Path(f"{SKYVERN_DIR}/downloads/{workflow_run_id}/")
|
||||
return Path(f"{REPO_ROOT_DIR}/downloads/{workflow_run_id}/")
|
||||
|
||||
@@ -3,6 +3,7 @@ from enum import StrEnum
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import structlog
|
||||
from deprecation import deprecated
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from skyvern.forge.sdk.schemas.tasks import Task
|
||||
@@ -14,7 +15,10 @@ class ActionType(StrEnum):
|
||||
CLICK = "click"
|
||||
INPUT_TEXT = "input_text"
|
||||
UPLOAD_FILE = "upload_file"
|
||||
|
||||
# This action is not used in the current implementation. Click actions are used instead."
|
||||
DOWNLOAD_FILE = "download_file"
|
||||
|
||||
SELECT_OPTION = "select_option"
|
||||
CHECKBOX = "checkbox"
|
||||
WAIT = "wait"
|
||||
@@ -70,6 +74,7 @@ class UploadFileAction(WebAction):
|
||||
return f"UploadFileAction(element_id={self.element_id}, file={self.file_url}, is_upload_file_tag={self.is_upload_file_tag})"
|
||||
|
||||
|
||||
@deprecated("This action is not used in the current implementation. Click actions are used instead.")
|
||||
class DownloadFileAction(WebAction):
|
||||
action_type: ActionType = ActionType.DOWNLOAD_FILE
|
||||
file_name: str
|
||||
@@ -158,6 +163,7 @@ def parse_actions(task: Task, json_response: List[Dict[str, Any]]) -> List[Actio
|
||||
# TODO: see if the element is a file input element. if it's not, convert this action into a click action
|
||||
|
||||
actions.append(UploadFileAction(element_id=element_id, file_url=action["file_url"], reasoning=reasoning))
|
||||
# This action is not used in the current implementation. Click actions are used instead.
|
||||
elif action_type == ActionType.DOWNLOAD_FILE:
|
||||
actions.append(
|
||||
DownloadFileAction(element_id=element_id, file_name=action["file_name"], reasoning=reasoning)
|
||||
@@ -214,7 +220,8 @@ ActionTypeUnion = (
|
||||
ClickAction
|
||||
| InputTextAction
|
||||
| UploadFileAction
|
||||
| DownloadFileAction
|
||||
# Deprecated
|
||||
# | DownloadFileAction
|
||||
| SelectOptionAction
|
||||
| CheckboxAction
|
||||
| WaitAction
|
||||
|
||||
@@ -6,9 +6,10 @@ import uuid
|
||||
from typing import Any, Awaitable, Callable, List
|
||||
|
||||
import structlog
|
||||
from deprecation import deprecated
|
||||
from playwright.async_api import Locator, Page
|
||||
|
||||
from skyvern.constants import SKYVERN_DIR
|
||||
from skyvern.constants import REPO_ROOT_DIR
|
||||
from skyvern.exceptions import ImaginaryFileUrl, MissingElement, MissingFileUrl, MultipleElementsFound
|
||||
from skyvern.forge import app
|
||||
from skyvern.forge.prompts import prompt_engine
|
||||
@@ -205,12 +206,13 @@ async def handle_upload_file_action(
|
||||
)
|
||||
|
||||
|
||||
@deprecated("This function is deprecated. Downloads are handled by the click action handler now.")
|
||||
async def handle_download_file_action(
|
||||
action: actions.DownloadFileAction, page: Page, scraped_page: ScrapedPage, task: Task, step: Step
|
||||
) -> list[ActionResult]:
|
||||
xpath = await validate_actions_in_dom(action, page, scraped_page)
|
||||
file_name = f"{action.file_name or uuid.uuid4()}"
|
||||
full_file_path = f"{SKYVERN_DIR}/downloads/{task.workflow_run_id or task.task_id}/{file_name}"
|
||||
full_file_path = f"{REPO_ROOT_DIR}/downloads/{task.workflow_run_id or task.task_id}/{file_name}"
|
||||
try:
|
||||
# Start waiting for the download
|
||||
async with page.expect_download() as download_info:
|
||||
@@ -222,7 +224,7 @@ async def handle_download_file_action(
|
||||
download = await download_info.value
|
||||
|
||||
# Create download folders if they don't exist
|
||||
download_folder = f"{SKYVERN_DIR}/downloads/{task.workflow_run_id or task.task_id}"
|
||||
download_folder = f"{REPO_ROOT_DIR}/downloads/{task.workflow_run_id or task.task_id}"
|
||||
os.makedirs(download_folder, exist_ok=True)
|
||||
# Wait for the download process to complete and save the downloaded file
|
||||
await download.save_as(full_file_path)
|
||||
@@ -452,7 +454,7 @@ ActionHandler.register_action_type(ActionType.SOLVE_CAPTCHA, handle_solve_captch
|
||||
ActionHandler.register_action_type(ActionType.CLICK, handle_click_action)
|
||||
ActionHandler.register_action_type(ActionType.INPUT_TEXT, handle_input_text_action)
|
||||
ActionHandler.register_action_type(ActionType.UPLOAD_FILE, handle_upload_file_action)
|
||||
ActionHandler.register_action_type(ActionType.DOWNLOAD_FILE, handle_download_file_action)
|
||||
# ActionHandler.register_action_type(ActionType.DOWNLOAD_FILE, handle_download_file_action)
|
||||
ActionHandler.register_action_type(ActionType.NULL_ACTION, handle_null_action)
|
||||
ActionHandler.register_action_type(ActionType.SELECT_OPTION, handle_select_option_action)
|
||||
ActionHandler.register_action_type(ActionType.WAIT, handle_wait_action)
|
||||
@@ -525,8 +527,18 @@ async def chain_click(
|
||||
|
||||
fc_func = lambda fc: fc.set_files(files=file)
|
||||
page.on("filechooser", fc_func)
|
||||
|
||||
LOG.info("Registered file chooser listener", action=action, path=file)
|
||||
|
||||
# If a download is triggered due to the click, we need to let LLM know in action_results
|
||||
download_triggered = False
|
||||
|
||||
def download_func(download: Any) -> None:
|
||||
nonlocal download_triggered
|
||||
download_triggered = True
|
||||
|
||||
page.on("download", download_func)
|
||||
LOG.info("Registered download listener", action=action)
|
||||
|
||||
"""
|
||||
Clicks on an element identified by the xpath and its parent if failed.
|
||||
:param xpath: xpath of the element to click
|
||||
@@ -535,12 +547,15 @@ async def chain_click(
|
||||
try:
|
||||
await page.click(f"xpath={xpath}", timeout=timeout)
|
||||
LOG.info("Chain click: main element click succeeded", action=action, xpath=xpath)
|
||||
return [ActionSuccess(javascript_triggered=javascript_triggered)]
|
||||
return [ActionSuccess(javascript_triggered=javascript_triggered, download_triggered=download_triggered)]
|
||||
except Exception as e:
|
||||
action_results: list[ActionResult] = [ActionFailure(e, javascript_triggered=javascript_triggered)]
|
||||
action_results: list[ActionResult] = [
|
||||
ActionFailure(e, javascript_triggered=javascript_triggered, download_triggered=download_triggered)
|
||||
]
|
||||
if await is_input_element(page.locator(xpath)):
|
||||
LOG.info("Chain click: it's an input element. going to try sibling click", action=action, xpath=xpath)
|
||||
sibling_action_result = await click_sibling_of_input(page.locator(xpath), timeout=timeout)
|
||||
sibling_action_result.download_triggered = download_triggered
|
||||
action_results.append(sibling_action_result)
|
||||
if type(sibling_action_result) == ActionSuccess:
|
||||
return action_results
|
||||
@@ -556,6 +571,7 @@ async def chain_click(
|
||||
ActionSuccess(
|
||||
javascript_triggered=javascript_triggered,
|
||||
interacted_with_parent=True,
|
||||
download_triggered=download_triggered,
|
||||
)
|
||||
)
|
||||
except Exception as pe:
|
||||
@@ -575,6 +591,7 @@ async def chain_click(
|
||||
if file:
|
||||
await asyncio.sleep(10)
|
||||
page.remove_listener("filechooser", fc_func)
|
||||
page.remove_listener("download", download_func)
|
||||
|
||||
|
||||
def get_anchor_to_click(scraped_page: ScrapedPage, element_id: int) -> str | None:
|
||||
|
||||
@@ -13,6 +13,7 @@ class ActionResult(BaseModel):
|
||||
step_retry_number: int | None = None
|
||||
step_order: int | None = None
|
||||
javascript_triggered: bool = False
|
||||
download_triggered: bool | None = None
|
||||
# None is used for old data so that we can differentiate between old and new data which only has boolean
|
||||
interacted_with_sibling: bool | None = None
|
||||
interacted_with_parent: bool | None = None
|
||||
@@ -32,6 +33,7 @@ class ActionSuccess(ActionResult):
|
||||
self,
|
||||
data: dict[str, Any] | list | str | None = None,
|
||||
javascript_triggered: bool = False,
|
||||
download_triggered: bool | None = None,
|
||||
interacted_with_sibling: bool = False,
|
||||
interacted_with_parent: bool = False,
|
||||
):
|
||||
@@ -39,6 +41,7 @@ class ActionSuccess(ActionResult):
|
||||
success=True,
|
||||
data=data,
|
||||
javascript_triggered=javascript_triggered,
|
||||
download_triggered=download_triggered,
|
||||
interacted_with_sibling=interacted_with_sibling,
|
||||
interacted_with_parent=interacted_with_parent,
|
||||
)
|
||||
@@ -49,6 +52,7 @@ class ActionFailure(ActionResult):
|
||||
self,
|
||||
exception: Exception,
|
||||
javascript_triggered: bool = False,
|
||||
download_triggered: bool | None = None,
|
||||
interacted_with_sibling: bool = False,
|
||||
interacted_with_parent: bool = False,
|
||||
):
|
||||
@@ -57,6 +61,7 @@ class ActionFailure(ActionResult):
|
||||
exception_type=type(exception).__name__,
|
||||
exception_message=remove_whitespace(str(exception)),
|
||||
javascript_triggered=javascript_triggered,
|
||||
download_triggered=download_triggered,
|
||||
interacted_with_sibling=interacted_with_sibling,
|
||||
interacted_with_parent=interacted_with_parent,
|
||||
)
|
||||
@@ -68,12 +73,14 @@ class ActionAbort(ActionResult):
|
||||
def __init__(
|
||||
self,
|
||||
javascript_triggered: bool = False,
|
||||
download_triggered: bool | None = None,
|
||||
interacted_with_sibling: bool = False,
|
||||
interacted_with_parent: bool = False,
|
||||
):
|
||||
super().__init__(
|
||||
success=True,
|
||||
javascript_triggered=javascript_triggered,
|
||||
download_triggered=download_triggered,
|
||||
interacted_with_sibling=interacted_with_sibling,
|
||||
interacted_with_parent=interacted_with_parent,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user