Workflow Fixes (#156)

2024-04-04 19:09:19 -07:00
parent 8117395d73
commit 0800990627
11 changed files with 350 additions and 108 deletions
--- a/skyvern/webeye/actions/actions.py
+++ b/skyvern/webeye/actions/actions.py
@@ -14,6 +14,7 @@ class ActionType(StrEnum):
    CLICK = "click"
    INPUT_TEXT = "input_text"
    UPLOAD_FILE = "upload_file"
+    DOWNLOAD_FILE = "download_file"
    SELECT_OPTION = "select_option"
    CHECKBOX = "checkbox"
    WAIT = "wait"
@@ -69,6 +70,14 @@ class UploadFileAction(WebAction):
        return f"UploadFileAction(element_id={self.element_id}, file={self.file_url}, is_upload_file_tag={self.is_upload_file_tag})"


+class DownloadFileAction(WebAction):
+    action_type: ActionType = ActionType.DOWNLOAD_FILE
+    file_name: str
+
+    def __repr__(self) -> str:
+        return f"DownloadFileAction(element_id={self.element_id}, file_name={self.file_name})"
+
+
 class NullAction(Action):
    action_type: ActionType = ActionType.NULL_ACTION

@@ -149,6 +158,10 @@ def parse_actions(task: Task, json_response: List[Dict[str, Any]]) -> List[Actio
            # TODO: see if the element is a file input element. if it's not, convert this action into a click action

            actions.append(UploadFileAction(element_id=element_id, file_url=action["file_url"], reasoning=reasoning))
+        elif action_type == ActionType.DOWNLOAD_FILE:
+            actions.append(
+                DownloadFileAction(element_id=element_id, file_name=action["file_name"], reasoning=reasoning)
+            )
        elif action_type == ActionType.SELECT_OPTION:
            actions.append(
                SelectOptionAction(
@@ -166,22 +179,13 @@ def parse_actions(task: Task, json_response: List[Dict[str, Any]]) -> List[Actio
        elif action_type == ActionType.WAIT:
            actions.append(WaitAction(reasoning=reasoning))
        elif action_type == ActionType.COMPLETE:
-            if actions:
-                LOG.info(
-                    "Navigation goal achieved, creating complete action and discarding all other actions except "
-                    "complete action",
-                    task_id=task.task_id,
-                    nav_goal=task.navigation_goal,
-                    actions=actions,
-                    llm_response=json_response,
-                )
-            return [
+            actions.append(
                CompleteAction(
                    reasoning=reasoning,
                    data_extraction_goal=task.data_extraction_goal,
                    errors=action["errors"] if "errors" in action else [],
                )
-            ]
+            )
        elif action_type == "null":
            actions.append(NullAction(reasoning=reasoning))
        elif action_type == ActionType.SOLVE_CAPTCHA:
@@ -210,6 +214,7 @@ ActionTypeUnion = (
    ClickAction
    | InputTextAction
    | UploadFileAction
+    | DownloadFileAction
    | SelectOptionAction
    | CheckboxAction
    | WaitAction
--- a/skyvern/webeye/actions/handler.py
+++ b/skyvern/webeye/actions/handler.py
@@ -1,11 +1,14 @@
 import asyncio
 import json
+import os
 import re
+import uuid
 from typing import Any, Awaitable, Callable, List

 import structlog
 from playwright.async_api import Locator, Page

+from skyvern.constants import SKYVERN_DIR
 from skyvern.exceptions import ImaginaryFileUrl, MissingElement, MissingFileUrl, MultipleElementsFound
 from skyvern.forge import app
 from skyvern.forge.prompts import prompt_engine
@@ -154,6 +157,34 @@ async def handle_upload_file_action(
        )


+async def handle_download_file_action(
+    action: actions.DownloadFileAction, page: Page, scraped_page: ScrapedPage, task: Task, step: Step
+) -> list[ActionResult]:
+    xpath = await validate_actions_in_dom(action, page, scraped_page)
+    file_name = f"{action.file_name or uuid.uuid4()}"
+    full_file_path = f"{SKYVERN_DIR}/downloads/{task.workflow_run_id or task.task_id}/{file_name}"
+    try:
+        # Start waiting for the download
+        async with page.expect_download() as download_info:
+            await asyncio.sleep(0.3)
+            await page.click(f"xpath={xpath}", timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS)
+
+        download = await download_info.value
+
+        # Create download folders if they don't exist
+        download_folder = f"{SKYVERN_DIR}/downloads/{task.workflow_run_id or task.task_id}"
+        os.makedirs(download_folder, exist_ok=True)
+        # Wait for the download process to complete and save the downloaded file
+        await download.save_as(full_file_path)
+    except Exception as e:
+        LOG.exception(
+            "DownloadFileAction: Failed to download file", action=action, full_file_path=full_file_path, exc_info=True
+        )
+        return [ActionFailure(e)]
+
+    return [ActionSuccess(data={"file_path": full_file_path})]
+
+
 async def handle_null_action(
    action: actions.NullAction, page: Page, scraped_page: ScrapedPage, task: Task, step: Step
 ) -> list[ActionResult]:
@@ -348,6 +379,7 @@ ActionHandler.register_action_type(ActionType.SOLVE_CAPTCHA, handle_solve_captch
 ActionHandler.register_action_type(ActionType.CLICK, handle_click_action)
 ActionHandler.register_action_type(ActionType.INPUT_TEXT, handle_input_text_action)
 ActionHandler.register_action_type(ActionType.UPLOAD_FILE, handle_upload_file_action)
+ActionHandler.register_action_type(ActionType.DOWNLOAD_FILE, handle_download_file_action)
 ActionHandler.register_action_type(ActionType.NULL_ACTION, handle_null_action)
 ActionHandler.register_action_type(ActionType.SELECT_OPTION, handle_select_option_action)
 ActionHandler.register_action_type(ActionType.WAIT, handle_wait_action)
--- a/skyvern/webeye/scraper/scraper.py
+++ b/skyvern/webeye/scraper/scraper.py
@@ -169,7 +169,12 @@ async def scrape_web_unsafe(
    scroll_y_px_old = -1.0
    scroll_y_px = await scroll_to_top(page, drow_boxes=True)
    # Checking max number of screenshots to prevent infinite loop
-    while scroll_y_px_old != scroll_y_px and len(screenshots) < SettingsManager.get_settings().MAX_NUM_SCREENSHOTS:
+    # We are checking the difference between the old and new scroll_y_px to determine if we have reached the end of the
+    # page. If the difference is less than 25, we assume we have reached the end of the page.
+    while (
+        abs(scroll_y_px_old - scroll_y_px) > 25
+        and len(screenshots) < SettingsManager.get_settings().MAX_NUM_SCREENSHOTS
+    ):
        screenshot = await browser_state.take_screenshot(full_page=False)
        screenshots.append(screenshot)
        scroll_y_px_old = scroll_y_px