Ykeremy/click instead of download (#275)
This commit is contained in:
@@ -12,11 +12,10 @@ Reply in JSON format with the following keys:
|
|||||||
[{
|
[{
|
||||||
"reasoning": str, // The reasoning behind the action. Be specific, referencing any user information and their fields and element ids in your reasoning. Mention why you chose the action type, and why you chose the element id. Keep the reasoning short and to the point.
|
"reasoning": str, // The reasoning behind the action. Be specific, referencing any user information and their fields and element ids in your reasoning. Mention why you chose the action type, and why you chose the element id. Keep the reasoning short and to the point.
|
||||||
"confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
|
"confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
|
||||||
"action_type": str, // It's a string enum: "CLICK", "INPUT_TEXT", "UPLOAD_FILE", "DOWNLOAD_FILE", "SELECT_OPTION", "WAIT", "SOLVE_CAPTCHA", "COMPLETE", "TERMINATE". "CLICK" is an element you'd like to click. "INPUT_TEXT" is an element you'd like to input text into. "UPLOAD_FILE" is an element you'd like to upload a file into. "DOWNLOAD_FILE" is an element you'd like to download a file from, and the file will be saved with the name provided in the "file_name" field. You can download multiple files in one action by returning multiple "DOWNLOAD_FILE" actions in one step. The "file_name" field should be unique for each file download action. Do not download the same file multiple times, check action history to see if the file has already been downloaded. "SELECT_OPTION" is an element you'd like to select an option from. "WAIT" action should be used if there are no actions to take and there is some indication on screen that waiting could yield more actions. "WAIT" should not be used if there are actions to take. "SOLVE_CAPTCHA" should be used if there's a captcha to solve on the screen. "COMPLETE" is used when the user goal has been achieved AND if there's any data extraction goal, you should be able to get data from the page. Never return a COMPLETE action unless the user goal is achieved. "TERMINATE" is used to terminate the whole task with a failure when it doesn't seem like the user goal can be achieved. Do not use "TERMINATE" if waiting could lead the user towards the goal. Only return "TERMINATE" if you are on a page where the user goal cannot be achieved. All other actions are ignored when "TERMINATE" is returned.
|
"action_type": str, // It's a string enum: "CLICK", "INPUT_TEXT", "UPLOAD_FILE", "SELECT_OPTION", "WAIT", "SOLVE_CAPTCHA", "COMPLETE", "TERMINATE". "CLICK" is an element you'd like to click. "INPUT_TEXT" is an element you'd like to input text into. "UPLOAD_FILE" is an element you'd like to upload a file into. "SELECT_OPTION" is an element you'd like to select an option from. "WAIT" action should be used if there are no actions to take and there is some indication on screen that waiting could yield more actions. "WAIT" should not be used if there are actions to take. "SOLVE_CAPTCHA" should be used if there's a captcha to solve on the screen. "COMPLETE" is used when the user goal has been achieved AND if there's any data extraction goal, you should be able to get data from the page. Never return a COMPLETE action unless the user goal is achieved. "TERMINATE" is used to terminate the whole task with a failure when it doesn't seem like the user goal can be achieved. Do not use "TERMINATE" if waiting could lead the user towards the goal. Only return "TERMINATE" if you are on a page where the user goal cannot be achieved. All other actions are ignored when "TERMINATE" is returned.
|
||||||
"id": int, // The id of the element to take action on. The id has to be one from the elements list
|
"id": int, // The id of the element to take action on. The id has to be one from the elements list
|
||||||
"text": str, // Text for INPUT_TEXT action only
|
"text": str, // Text for INPUT_TEXT action only
|
||||||
"file_url": str, // The url of the file to upload if applicable. This field must be present for UPLOAD_FILE but can also be present for CLICK only if the click is to upload the file. It should be null otherwise.
|
"file_url": str, // The url of the file to upload if applicable. This field must be present for UPLOAD_FILE but can also be present for CLICK only if the click is to upload the file. It should be null otherwise.
|
||||||
"file_name": str, // The name of the file to save the downloaded file as. This field must be present for DOWNLOAD_FILE only. It should be null otherwise.
|
|
||||||
"option": { // The option to select for SELECT_OPTION action only. null if not SELECT_OPTION action
|
"option": { // The option to select for SELECT_OPTION action only. null if not SELECT_OPTION action
|
||||||
"label": str, // the label of the option if any. MAKE SURE YOU USE THIS LABEL TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION LABEL HERE
|
"label": str, // the label of the option if any. MAKE SURE YOU USE THIS LABEL TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION LABEL HERE
|
||||||
"index": int, // the id corresponding to the optionIndex under the the select element.
|
"index": int, // the id corresponding to the optionIndex under the the select element.
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ from urllib.parse import urlparse
|
|||||||
import aiohttp
|
import aiohttp
|
||||||
import structlog
|
import structlog
|
||||||
|
|
||||||
from skyvern.constants import SKYVERN_DIR
|
from skyvern.constants import REPO_ROOT_DIR
|
||||||
from skyvern.exceptions import DownloadFileMaxSizeExceeded
|
from skyvern.exceptions import DownloadFileMaxSizeExceeded
|
||||||
|
|
||||||
LOG = structlog.get_logger()
|
LOG = structlog.get_logger()
|
||||||
@@ -67,4 +67,4 @@ def zip_files(files_path: str, zip_file_path: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def get_path_for_workflow_download_directory(workflow_run_id: str) -> Path:
|
def get_path_for_workflow_download_directory(workflow_run_id: str) -> Path:
|
||||||
return Path(f"{SKYVERN_DIR}/downloads/{workflow_run_id}/")
|
return Path(f"{REPO_ROOT_DIR}/downloads/{workflow_run_id}/")
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ from enum import StrEnum
|
|||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
import structlog
|
import structlog
|
||||||
|
from deprecation import deprecated
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from skyvern.forge.sdk.schemas.tasks import Task
|
from skyvern.forge.sdk.schemas.tasks import Task
|
||||||
@@ -14,7 +15,10 @@ class ActionType(StrEnum):
|
|||||||
CLICK = "click"
|
CLICK = "click"
|
||||||
INPUT_TEXT = "input_text"
|
INPUT_TEXT = "input_text"
|
||||||
UPLOAD_FILE = "upload_file"
|
UPLOAD_FILE = "upload_file"
|
||||||
|
|
||||||
|
# This action is not used in the current implementation. Click actions are used instead."
|
||||||
DOWNLOAD_FILE = "download_file"
|
DOWNLOAD_FILE = "download_file"
|
||||||
|
|
||||||
SELECT_OPTION = "select_option"
|
SELECT_OPTION = "select_option"
|
||||||
CHECKBOX = "checkbox"
|
CHECKBOX = "checkbox"
|
||||||
WAIT = "wait"
|
WAIT = "wait"
|
||||||
@@ -70,6 +74,7 @@ class UploadFileAction(WebAction):
|
|||||||
return f"UploadFileAction(element_id={self.element_id}, file={self.file_url}, is_upload_file_tag={self.is_upload_file_tag})"
|
return f"UploadFileAction(element_id={self.element_id}, file={self.file_url}, is_upload_file_tag={self.is_upload_file_tag})"
|
||||||
|
|
||||||
|
|
||||||
|
@deprecated("This action is not used in the current implementation. Click actions are used instead.")
|
||||||
class DownloadFileAction(WebAction):
|
class DownloadFileAction(WebAction):
|
||||||
action_type: ActionType = ActionType.DOWNLOAD_FILE
|
action_type: ActionType = ActionType.DOWNLOAD_FILE
|
||||||
file_name: str
|
file_name: str
|
||||||
@@ -158,6 +163,7 @@ def parse_actions(task: Task, json_response: List[Dict[str, Any]]) -> List[Actio
|
|||||||
# TODO: see if the element is a file input element. if it's not, convert this action into a click action
|
# TODO: see if the element is a file input element. if it's not, convert this action into a click action
|
||||||
|
|
||||||
actions.append(UploadFileAction(element_id=element_id, file_url=action["file_url"], reasoning=reasoning))
|
actions.append(UploadFileAction(element_id=element_id, file_url=action["file_url"], reasoning=reasoning))
|
||||||
|
# This action is not used in the current implementation. Click actions are used instead.
|
||||||
elif action_type == ActionType.DOWNLOAD_FILE:
|
elif action_type == ActionType.DOWNLOAD_FILE:
|
||||||
actions.append(
|
actions.append(
|
||||||
DownloadFileAction(element_id=element_id, file_name=action["file_name"], reasoning=reasoning)
|
DownloadFileAction(element_id=element_id, file_name=action["file_name"], reasoning=reasoning)
|
||||||
@@ -214,7 +220,8 @@ ActionTypeUnion = (
|
|||||||
ClickAction
|
ClickAction
|
||||||
| InputTextAction
|
| InputTextAction
|
||||||
| UploadFileAction
|
| UploadFileAction
|
||||||
| DownloadFileAction
|
# Deprecated
|
||||||
|
# | DownloadFileAction
|
||||||
| SelectOptionAction
|
| SelectOptionAction
|
||||||
| CheckboxAction
|
| CheckboxAction
|
||||||
| WaitAction
|
| WaitAction
|
||||||
|
|||||||
@@ -6,9 +6,10 @@ import uuid
|
|||||||
from typing import Any, Awaitable, Callable, List
|
from typing import Any, Awaitable, Callable, List
|
||||||
|
|
||||||
import structlog
|
import structlog
|
||||||
|
from deprecation import deprecated
|
||||||
from playwright.async_api import Locator, Page
|
from playwright.async_api import Locator, Page
|
||||||
|
|
||||||
from skyvern.constants import SKYVERN_DIR
|
from skyvern.constants import REPO_ROOT_DIR
|
||||||
from skyvern.exceptions import ImaginaryFileUrl, MissingElement, MissingFileUrl, MultipleElementsFound
|
from skyvern.exceptions import ImaginaryFileUrl, MissingElement, MissingFileUrl, MultipleElementsFound
|
||||||
from skyvern.forge import app
|
from skyvern.forge import app
|
||||||
from skyvern.forge.prompts import prompt_engine
|
from skyvern.forge.prompts import prompt_engine
|
||||||
@@ -205,12 +206,13 @@ async def handle_upload_file_action(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@deprecated("This function is deprecated. Downloads are handled by the click action handler now.")
|
||||||
async def handle_download_file_action(
|
async def handle_download_file_action(
|
||||||
action: actions.DownloadFileAction, page: Page, scraped_page: ScrapedPage, task: Task, step: Step
|
action: actions.DownloadFileAction, page: Page, scraped_page: ScrapedPage, task: Task, step: Step
|
||||||
) -> list[ActionResult]:
|
) -> list[ActionResult]:
|
||||||
xpath = await validate_actions_in_dom(action, page, scraped_page)
|
xpath = await validate_actions_in_dom(action, page, scraped_page)
|
||||||
file_name = f"{action.file_name or uuid.uuid4()}"
|
file_name = f"{action.file_name or uuid.uuid4()}"
|
||||||
full_file_path = f"{SKYVERN_DIR}/downloads/{task.workflow_run_id or task.task_id}/{file_name}"
|
full_file_path = f"{REPO_ROOT_DIR}/downloads/{task.workflow_run_id or task.task_id}/{file_name}"
|
||||||
try:
|
try:
|
||||||
# Start waiting for the download
|
# Start waiting for the download
|
||||||
async with page.expect_download() as download_info:
|
async with page.expect_download() as download_info:
|
||||||
@@ -222,7 +224,7 @@ async def handle_download_file_action(
|
|||||||
download = await download_info.value
|
download = await download_info.value
|
||||||
|
|
||||||
# Create download folders if they don't exist
|
# Create download folders if they don't exist
|
||||||
download_folder = f"{SKYVERN_DIR}/downloads/{task.workflow_run_id or task.task_id}"
|
download_folder = f"{REPO_ROOT_DIR}/downloads/{task.workflow_run_id or task.task_id}"
|
||||||
os.makedirs(download_folder, exist_ok=True)
|
os.makedirs(download_folder, exist_ok=True)
|
||||||
# Wait for the download process to complete and save the downloaded file
|
# Wait for the download process to complete and save the downloaded file
|
||||||
await download.save_as(full_file_path)
|
await download.save_as(full_file_path)
|
||||||
@@ -452,7 +454,7 @@ ActionHandler.register_action_type(ActionType.SOLVE_CAPTCHA, handle_solve_captch
|
|||||||
ActionHandler.register_action_type(ActionType.CLICK, handle_click_action)
|
ActionHandler.register_action_type(ActionType.CLICK, handle_click_action)
|
||||||
ActionHandler.register_action_type(ActionType.INPUT_TEXT, handle_input_text_action)
|
ActionHandler.register_action_type(ActionType.INPUT_TEXT, handle_input_text_action)
|
||||||
ActionHandler.register_action_type(ActionType.UPLOAD_FILE, handle_upload_file_action)
|
ActionHandler.register_action_type(ActionType.UPLOAD_FILE, handle_upload_file_action)
|
||||||
ActionHandler.register_action_type(ActionType.DOWNLOAD_FILE, handle_download_file_action)
|
# ActionHandler.register_action_type(ActionType.DOWNLOAD_FILE, handle_download_file_action)
|
||||||
ActionHandler.register_action_type(ActionType.NULL_ACTION, handle_null_action)
|
ActionHandler.register_action_type(ActionType.NULL_ACTION, handle_null_action)
|
||||||
ActionHandler.register_action_type(ActionType.SELECT_OPTION, handle_select_option_action)
|
ActionHandler.register_action_type(ActionType.SELECT_OPTION, handle_select_option_action)
|
||||||
ActionHandler.register_action_type(ActionType.WAIT, handle_wait_action)
|
ActionHandler.register_action_type(ActionType.WAIT, handle_wait_action)
|
||||||
@@ -525,8 +527,18 @@ async def chain_click(
|
|||||||
|
|
||||||
fc_func = lambda fc: fc.set_files(files=file)
|
fc_func = lambda fc: fc.set_files(files=file)
|
||||||
page.on("filechooser", fc_func)
|
page.on("filechooser", fc_func)
|
||||||
|
|
||||||
LOG.info("Registered file chooser listener", action=action, path=file)
|
LOG.info("Registered file chooser listener", action=action, path=file)
|
||||||
|
|
||||||
|
# If a download is triggered due to the click, we need to let LLM know in action_results
|
||||||
|
download_triggered = False
|
||||||
|
|
||||||
|
def download_func(download: Any) -> None:
|
||||||
|
nonlocal download_triggered
|
||||||
|
download_triggered = True
|
||||||
|
|
||||||
|
page.on("download", download_func)
|
||||||
|
LOG.info("Registered download listener", action=action)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Clicks on an element identified by the xpath and its parent if failed.
|
Clicks on an element identified by the xpath and its parent if failed.
|
||||||
:param xpath: xpath of the element to click
|
:param xpath: xpath of the element to click
|
||||||
@@ -535,12 +547,15 @@ async def chain_click(
|
|||||||
try:
|
try:
|
||||||
await page.click(f"xpath={xpath}", timeout=timeout)
|
await page.click(f"xpath={xpath}", timeout=timeout)
|
||||||
LOG.info("Chain click: main element click succeeded", action=action, xpath=xpath)
|
LOG.info("Chain click: main element click succeeded", action=action, xpath=xpath)
|
||||||
return [ActionSuccess(javascript_triggered=javascript_triggered)]
|
return [ActionSuccess(javascript_triggered=javascript_triggered, download_triggered=download_triggered)]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
action_results: list[ActionResult] = [ActionFailure(e, javascript_triggered=javascript_triggered)]
|
action_results: list[ActionResult] = [
|
||||||
|
ActionFailure(e, javascript_triggered=javascript_triggered, download_triggered=download_triggered)
|
||||||
|
]
|
||||||
if await is_input_element(page.locator(xpath)):
|
if await is_input_element(page.locator(xpath)):
|
||||||
LOG.info("Chain click: it's an input element. going to try sibling click", action=action, xpath=xpath)
|
LOG.info("Chain click: it's an input element. going to try sibling click", action=action, xpath=xpath)
|
||||||
sibling_action_result = await click_sibling_of_input(page.locator(xpath), timeout=timeout)
|
sibling_action_result = await click_sibling_of_input(page.locator(xpath), timeout=timeout)
|
||||||
|
sibling_action_result.download_triggered = download_triggered
|
||||||
action_results.append(sibling_action_result)
|
action_results.append(sibling_action_result)
|
||||||
if type(sibling_action_result) == ActionSuccess:
|
if type(sibling_action_result) == ActionSuccess:
|
||||||
return action_results
|
return action_results
|
||||||
@@ -556,6 +571,7 @@ async def chain_click(
|
|||||||
ActionSuccess(
|
ActionSuccess(
|
||||||
javascript_triggered=javascript_triggered,
|
javascript_triggered=javascript_triggered,
|
||||||
interacted_with_parent=True,
|
interacted_with_parent=True,
|
||||||
|
download_triggered=download_triggered,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
except Exception as pe:
|
except Exception as pe:
|
||||||
@@ -575,6 +591,7 @@ async def chain_click(
|
|||||||
if file:
|
if file:
|
||||||
await asyncio.sleep(10)
|
await asyncio.sleep(10)
|
||||||
page.remove_listener("filechooser", fc_func)
|
page.remove_listener("filechooser", fc_func)
|
||||||
|
page.remove_listener("download", download_func)
|
||||||
|
|
||||||
|
|
||||||
def get_anchor_to_click(scraped_page: ScrapedPage, element_id: int) -> str | None:
|
def get_anchor_to_click(scraped_page: ScrapedPage, element_id: int) -> str | None:
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ class ActionResult(BaseModel):
|
|||||||
step_retry_number: int | None = None
|
step_retry_number: int | None = None
|
||||||
step_order: int | None = None
|
step_order: int | None = None
|
||||||
javascript_triggered: bool = False
|
javascript_triggered: bool = False
|
||||||
|
download_triggered: bool | None = None
|
||||||
# None is used for old data so that we can differentiate between old and new data which only has boolean
|
# None is used for old data so that we can differentiate between old and new data which only has boolean
|
||||||
interacted_with_sibling: bool | None = None
|
interacted_with_sibling: bool | None = None
|
||||||
interacted_with_parent: bool | None = None
|
interacted_with_parent: bool | None = None
|
||||||
@@ -32,6 +33,7 @@ class ActionSuccess(ActionResult):
|
|||||||
self,
|
self,
|
||||||
data: dict[str, Any] | list | str | None = None,
|
data: dict[str, Any] | list | str | None = None,
|
||||||
javascript_triggered: bool = False,
|
javascript_triggered: bool = False,
|
||||||
|
download_triggered: bool | None = None,
|
||||||
interacted_with_sibling: bool = False,
|
interacted_with_sibling: bool = False,
|
||||||
interacted_with_parent: bool = False,
|
interacted_with_parent: bool = False,
|
||||||
):
|
):
|
||||||
@@ -39,6 +41,7 @@ class ActionSuccess(ActionResult):
|
|||||||
success=True,
|
success=True,
|
||||||
data=data,
|
data=data,
|
||||||
javascript_triggered=javascript_triggered,
|
javascript_triggered=javascript_triggered,
|
||||||
|
download_triggered=download_triggered,
|
||||||
interacted_with_sibling=interacted_with_sibling,
|
interacted_with_sibling=interacted_with_sibling,
|
||||||
interacted_with_parent=interacted_with_parent,
|
interacted_with_parent=interacted_with_parent,
|
||||||
)
|
)
|
||||||
@@ -49,6 +52,7 @@ class ActionFailure(ActionResult):
|
|||||||
self,
|
self,
|
||||||
exception: Exception,
|
exception: Exception,
|
||||||
javascript_triggered: bool = False,
|
javascript_triggered: bool = False,
|
||||||
|
download_triggered: bool | None = None,
|
||||||
interacted_with_sibling: bool = False,
|
interacted_with_sibling: bool = False,
|
||||||
interacted_with_parent: bool = False,
|
interacted_with_parent: bool = False,
|
||||||
):
|
):
|
||||||
@@ -57,6 +61,7 @@ class ActionFailure(ActionResult):
|
|||||||
exception_type=type(exception).__name__,
|
exception_type=type(exception).__name__,
|
||||||
exception_message=remove_whitespace(str(exception)),
|
exception_message=remove_whitespace(str(exception)),
|
||||||
javascript_triggered=javascript_triggered,
|
javascript_triggered=javascript_triggered,
|
||||||
|
download_triggered=download_triggered,
|
||||||
interacted_with_sibling=interacted_with_sibling,
|
interacted_with_sibling=interacted_with_sibling,
|
||||||
interacted_with_parent=interacted_with_parent,
|
interacted_with_parent=interacted_with_parent,
|
||||||
)
|
)
|
||||||
@@ -68,12 +73,14 @@ class ActionAbort(ActionResult):
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
javascript_triggered: bool = False,
|
javascript_triggered: bool = False,
|
||||||
|
download_triggered: bool | None = None,
|
||||||
interacted_with_sibling: bool = False,
|
interacted_with_sibling: bool = False,
|
||||||
interacted_with_parent: bool = False,
|
interacted_with_parent: bool = False,
|
||||||
):
|
):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
success=True,
|
success=True,
|
||||||
javascript_triggered=javascript_triggered,
|
javascript_triggered=javascript_triggered,
|
||||||
|
download_triggered=download_triggered,
|
||||||
interacted_with_sibling=interacted_with_sibling,
|
interacted_with_sibling=interacted_with_sibling,
|
||||||
interacted_with_parent=interacted_with_parent,
|
interacted_with_parent=interacted_with_parent,
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user