reenbale the download action (#4299)
This commit is contained in:
@@ -267,6 +267,7 @@ export const ActionTypes = {
|
||||
Hover: "hover",
|
||||
SelectOption: "select_option",
|
||||
UploadFile: "upload_file",
|
||||
DownloadFile: "download_file",
|
||||
complete: "complete",
|
||||
wait: "wait",
|
||||
terminate: "terminate",
|
||||
@@ -294,6 +295,7 @@ export const ReadableActionTypes: {
|
||||
hover: "Hover",
|
||||
select_option: "Select Option",
|
||||
upload_file: "Upload File",
|
||||
download_file: "Download File",
|
||||
complete: "Complete",
|
||||
wait: "Wait",
|
||||
terminate: "Terminate",
|
||||
|
||||
@@ -1,5 +1,10 @@
|
||||
import { ActionType, ReadableActionTypes } from "@/api/types";
|
||||
import { CursorArrowIcon, HandIcon, InputIcon } from "@radix-ui/react-icons";
|
||||
import {
|
||||
CursorArrowIcon,
|
||||
HandIcon,
|
||||
DownloadIcon,
|
||||
InputIcon,
|
||||
} from "@radix-ui/react-icons";
|
||||
|
||||
type Props = {
|
||||
actionType: ActionType;
|
||||
@@ -9,6 +14,7 @@ const icons: Partial<Record<ActionType, React.ReactNode>> = {
|
||||
click: <CursorArrowIcon className="h-4 w-4" />,
|
||||
hover: <HandIcon className="h-4 w-4" />,
|
||||
input_text: <InputIcon className="h-4 w-4" />,
|
||||
download_file: <DownloadIcon className="h-4 w-4" />,
|
||||
};
|
||||
|
||||
function ActionTypePill({ actionType }: Props) {
|
||||
|
||||
@@ -3,6 +3,7 @@ import {
|
||||
CheckCircledIcon,
|
||||
CursorArrowIcon,
|
||||
HandIcon,
|
||||
DownloadIcon,
|
||||
InputIcon,
|
||||
QuestionMarkIcon,
|
||||
} from "@radix-ui/react-icons";
|
||||
@@ -17,6 +18,7 @@ const icons: Partial<Record<ActionType, React.ReactNode>> = {
|
||||
hover: <HandIcon className="h-4 w-4" />,
|
||||
complete: <CheckCircledIcon className="h-4 w-4" />,
|
||||
input_text: <InputIcon className="h-4 w-4" />,
|
||||
download_file: <DownloadIcon className="h-4 w-4" />,
|
||||
};
|
||||
|
||||
function ActionTypePillMinimal({ actionType }: Props) {
|
||||
|
||||
@@ -104,6 +104,14 @@ class ImaginaryFileUrl(SkyvernException):
|
||||
super().__init__(f"File url {file_url} is imaginary.")
|
||||
|
||||
|
||||
class DownloadedFileNotFound(SkyvernException):
|
||||
def __init__(self, downloaded_path: str, download_url: str | None = None) -> None:
|
||||
message = f"Downloaded file does not exist at path: {downloaded_path}. This may indicate the download failed silently or the file was removed."
|
||||
if download_url:
|
||||
message += f" Download URL: {download_url}"
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class MissingBrowserState(SkyvernException):
|
||||
def __init__(self, task_id: str | None = None, workflow_run_id: str | None = None) -> None:
|
||||
task_str = f"task_id={task_id}" if task_id else ""
|
||||
@@ -878,3 +886,19 @@ class InvalidSchemaError(SkyvernException):
|
||||
self.message = message
|
||||
self.validation_errors = validation_errors or []
|
||||
super().__init__(self.message)
|
||||
|
||||
|
||||
class PDFEmbedBase64DecodeError(SkyvernException):
|
||||
"""Raised when failed to extract or decode base64 data from PDF embed src attribute."""
|
||||
|
||||
def __init__(self, pdf_embed_src: str | None = None, reason: str | None = None):
|
||||
self.pdf_embed_src = pdf_embed_src
|
||||
self.reason = reason
|
||||
message = "Failed to extract or decode base64 data from PDF embed src"
|
||||
if reason:
|
||||
message += f". Reason: {reason}"
|
||||
if pdf_embed_src:
|
||||
# Truncate long base64 strings for logging
|
||||
src_preview = pdf_embed_src[:100] + "..." if len(pdf_embed_src) > 100 else pdf_embed_src
|
||||
message += f". PDF embed src: {src_preview}"
|
||||
super().__init__(message)
|
||||
|
||||
@@ -6,6 +6,7 @@ import os
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
import uuid
|
||||
from asyncio.exceptions import CancelledError
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC, datetime
|
||||
@@ -51,6 +52,7 @@ from skyvern.exceptions import (
|
||||
MissingBrowserStatePage,
|
||||
MissingExtractActionsResponse,
|
||||
NoTOTPVerificationCodeFound,
|
||||
PDFEmbedBase64DecodeError,
|
||||
ScrapingFailed,
|
||||
SkyvernException,
|
||||
StepTerminationError,
|
||||
@@ -110,6 +112,7 @@ from skyvern.webeye.actions.actions import (
|
||||
CompleteAction,
|
||||
CompleteVerifyResult,
|
||||
DecisiveAction,
|
||||
DownloadFileAction,
|
||||
ExtractAction,
|
||||
GotoUrlAction,
|
||||
ReloadPageAction,
|
||||
@@ -1035,16 +1038,73 @@ class ForgeAgent:
|
||||
if json_response is None:
|
||||
raise MissingExtractActionsResponse()
|
||||
try:
|
||||
otp_json_response, otp_actions = await self.handle_potential_OTP_actions(
|
||||
task, step, scraped_page, browser_state, json_response
|
||||
)
|
||||
if otp_actions:
|
||||
detailed_agent_step_output.llm_response = otp_json_response
|
||||
actions = otp_actions
|
||||
if pdf_embed_src := scraped_page.check_pdf_viewer_embed():
|
||||
LOG.info("Generate DownloadFileAction for PDF viewer page", step_id=step.step_id)
|
||||
pdf_bytes: bytes | None = None
|
||||
download_url: str | None = None
|
||||
|
||||
# Check if the embed src is a data URI with base64 encoded PDF
|
||||
# Format: data:application/pdf[;charset=...];base64,<base64_data>
|
||||
if pdf_embed_src.startswith("data:application/pdf"):
|
||||
# Use more precise regex to extract base64 data after the base64, prefix
|
||||
# This pattern matches: data:application/pdf[;optional_params];base64,<data>
|
||||
m = re.search(r"data:application/pdf[^;]*;base64,(.+)", pdf_embed_src, re.S)
|
||||
if not m:
|
||||
raise PDFEmbedBase64DecodeError(
|
||||
pdf_embed_src=pdf_embed_src,
|
||||
reason="Failed to extract base64 data from PDF embed src. Expected format: data:application/pdf[;charset=...];base64,<data>",
|
||||
)
|
||||
|
||||
base64_data = m.group(1)
|
||||
LOG.info(
|
||||
"Found base64 data in PDF embed src",
|
||||
step_id=step.step_id,
|
||||
base64_data_length=len(base64_data),
|
||||
)
|
||||
|
||||
# Decode base64 data with error handling
|
||||
try:
|
||||
pdf_bytes = base64.b64decode(base64_data, validate=True)
|
||||
except Exception as e:
|
||||
raise PDFEmbedBase64DecodeError(
|
||||
pdf_embed_src=pdf_embed_src,
|
||||
reason=f"Failed to decode base64 data: {str(e)}",
|
||||
) from e
|
||||
else:
|
||||
# If not a data URI, treat it as a URL
|
||||
LOG.info(
|
||||
"Found PDF embed src as URL (not base64 data)",
|
||||
step_id=step.step_id,
|
||||
download_url=pdf_embed_src,
|
||||
)
|
||||
download_url = pdf_embed_src
|
||||
|
||||
actions = [
|
||||
DownloadFileAction(
|
||||
reasoning="Downloading the file from the PDF viewer.",
|
||||
organization_id=task.organization_id,
|
||||
workflow_run_id=task.workflow_run_id,
|
||||
task_id=task.task_id,
|
||||
step_id=step.step_id,
|
||||
step_order=step.order,
|
||||
action_order=0,
|
||||
file_name=f"{uuid.uuid4()}.pdf",
|
||||
byte=pdf_bytes,
|
||||
download_url=download_url,
|
||||
download=True,
|
||||
)
|
||||
]
|
||||
else:
|
||||
actions = parse_actions(
|
||||
task, step.step_id, step.order, scraped_page, json_response["actions"]
|
||||
otp_json_response, otp_actions = await self.handle_potential_OTP_actions(
|
||||
task, step, scraped_page, browser_state, json_response
|
||||
)
|
||||
if otp_actions:
|
||||
detailed_agent_step_output.llm_response = otp_json_response
|
||||
actions = otp_actions
|
||||
else:
|
||||
actions = parse_actions(
|
||||
task, step.step_id, step.order, scraped_page, json_response["actions"]
|
||||
)
|
||||
|
||||
if context:
|
||||
context.pop_totp_code(task.task_id)
|
||||
@@ -1762,6 +1822,11 @@ class ForgeAgent:
|
||||
persist_artifacts=False,
|
||||
)
|
||||
|
||||
if scraped_page.check_pdf_viewer_embed():
|
||||
next_step.is_speculative = False
|
||||
LOG.info("Skipping speculative extract-actions for PDF viewer page", step_id=current_step.step_id)
|
||||
return None
|
||||
|
||||
llm_api_handler = LLMAPIHandlerFactory.get_override_llm_api_handler(
|
||||
task.llm_key,
|
||||
default=app.LLM_API_HANDLER,
|
||||
|
||||
@@ -246,13 +246,15 @@ class UploadFileAction(WebAction):
|
||||
return f"UploadFileAction(element_id={self.element_id}, file={self.file_url}, is_upload_file_tag={self.is_upload_file_tag})"
|
||||
|
||||
|
||||
# this is a deprecated action type
|
||||
class DownloadFileAction(WebAction):
|
||||
# This action is deprecated in 'extract-actions' prompt. Only used for the download action triggered by the code.
|
||||
class DownloadFileAction(Action):
|
||||
action_type: ActionType = ActionType.DOWNLOAD_FILE
|
||||
file_name: str
|
||||
byte: Annotated[bytes | None, Field(exclude=True)] = None # bytes data
|
||||
download_url: str | None = None # URL to download file from
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"DownloadFileAction(element_id={self.element_id}, file_name={self.file_name})"
|
||||
return f"DownloadFileAction(file_name={self.file_name}, download_url={self.download_url}, has_byte={self.byte is not None})"
|
||||
|
||||
|
||||
class NullAction(Action):
|
||||
|
||||
@@ -2,6 +2,7 @@ import asyncio
|
||||
import copy
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import time
|
||||
import urllib.parse
|
||||
import uuid
|
||||
@@ -21,11 +22,11 @@ from skyvern.constants import (
|
||||
BROWSER_DOWNLOAD_MAX_WAIT_TIME,
|
||||
BROWSER_DOWNLOAD_TIMEOUT,
|
||||
DROPDOWN_MENU_MAX_DISTANCE,
|
||||
REPO_ROOT_DIR,
|
||||
SKYVERN_ID_ATTR,
|
||||
)
|
||||
from skyvern.errors.errors import TOTPExpiredError
|
||||
from skyvern.exceptions import (
|
||||
DownloadedFileNotFound,
|
||||
DownloadFileMaxWaitingTime,
|
||||
EmptySelect,
|
||||
ErrEmptyTweakValue,
|
||||
@@ -60,6 +61,7 @@ from skyvern.exceptions import (
|
||||
from skyvern.experimentation.wait_utils import get_or_create_wait_config, get_wait_time
|
||||
from skyvern.forge import app
|
||||
from skyvern.forge.prompts import prompt_engine
|
||||
from skyvern.forge.sdk.api.files import download_file as download_file_api
|
||||
from skyvern.forge.sdk.api.files import (
|
||||
get_download_dir,
|
||||
list_downloading_files_in_directory,
|
||||
@@ -102,6 +104,7 @@ from skyvern.webeye.actions.actions import (
|
||||
WebAction,
|
||||
)
|
||||
from skyvern.webeye.actions.responses import ActionAbort, ActionFailure, ActionResult, ActionSuccess
|
||||
from skyvern.webeye.browser_factory import initialize_download_dir
|
||||
from skyvern.webeye.scraper.scraped_page import (
|
||||
CleanupElementTreeFunc,
|
||||
ElementTreeBuilder,
|
||||
@@ -1591,7 +1594,8 @@ async def handle_upload_file_action(
|
||||
)
|
||||
|
||||
|
||||
# This function is deprecated. Downloads are handled by the click action handler now.
|
||||
# This function is deprecated in 'extract-actions' prompt. Downloads are handled by the click action handler now.
|
||||
# Currently, it's only used for the download action triggered by the code.
|
||||
@TraceManager.traced_async(ignore_inputs=["scraped_page", "page"])
|
||||
async def handle_download_file_action(
|
||||
action: actions.DownloadFileAction,
|
||||
@@ -1600,42 +1604,72 @@ async def handle_download_file_action(
|
||||
task: Task,
|
||||
step: Step,
|
||||
) -> list[ActionResult]:
|
||||
# Get wait config once for this handler
|
||||
wait_config = await get_or_create_wait_config(task.task_id, task.workflow_run_id, task.organization_id)
|
||||
|
||||
dom = DomUtil(scraped_page=scraped_page, page=page)
|
||||
skyvern_element = await dom.get_skyvern_element_by_id(action.element_id)
|
||||
|
||||
file_name = f"{action.file_name or uuid.uuid4()}"
|
||||
full_file_path = f"{REPO_ROOT_DIR}/downloads/{task.workflow_run_id or task.task_id}/{file_name}"
|
||||
download_folder = initialize_download_dir()
|
||||
full_file_path = f"{download_folder}/{file_name}"
|
||||
|
||||
try:
|
||||
# Start waiting for the download
|
||||
async with page.expect_download() as download_info:
|
||||
await asyncio.sleep(get_wait_time(wait_config, "post_click_delay", default=0.3))
|
||||
# Priority 1: If byte data is provided, save it directly
|
||||
if action.byte is not None:
|
||||
with open(full_file_path, "wb") as f:
|
||||
f.write(action.byte)
|
||||
|
||||
locator = skyvern_element.locator
|
||||
await locator.click(
|
||||
timeout=settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||
modifiers=["Alt"],
|
||||
LOG.info(
|
||||
"DownloadFileAction: Saved file from byte data",
|
||||
action=action,
|
||||
full_file_path=full_file_path,
|
||||
file_size=len(action.byte),
|
||||
)
|
||||
return [ActionSuccess(download_triggered=True)]
|
||||
|
||||
download = await download_info.value
|
||||
# Priority 2: If download_url is provided, download from URL
|
||||
if action.download_url is not None:
|
||||
downloaded_path = await download_file_api(action.download_url)
|
||||
# Check if the downloaded file actually exists
|
||||
if not os.path.exists(downloaded_path):
|
||||
LOG.error(
|
||||
"DownloadFileAction: Downloaded file path does not exist",
|
||||
action=action,
|
||||
downloaded_path=downloaded_path,
|
||||
download_url=action.download_url,
|
||||
full_file_path=full_file_path,
|
||||
)
|
||||
return [ActionFailure(DownloadedFileNotFound(downloaded_path, action.download_url))]
|
||||
|
||||
# Move the downloaded file to the target location
|
||||
# If the downloaded file has a different name, use it; otherwise use the specified file_name
|
||||
if os.path.basename(downloaded_path) != file_name:
|
||||
# Copy to target location with specified file_name
|
||||
shutil.copy2(downloaded_path, full_file_path)
|
||||
# Optionally remove the temporary file
|
||||
try:
|
||||
os.remove(downloaded_path)
|
||||
except Exception:
|
||||
pass # Ignore errors when removing temp file
|
||||
else:
|
||||
# Move to target location
|
||||
shutil.move(downloaded_path, full_file_path)
|
||||
|
||||
LOG.info(
|
||||
"DownloadFileAction: Downloaded file from URL",
|
||||
action=action,
|
||||
full_file_path=full_file_path,
|
||||
download_url=action.download_url,
|
||||
)
|
||||
return [ActionSuccess(download_triggered=True)]
|
||||
|
||||
return [ActionSuccess(download_triggered=False)]
|
||||
|
||||
# Create download folders if they don't exist
|
||||
download_folder = f"{REPO_ROOT_DIR}/downloads/{task.workflow_run_id or task.task_id}"
|
||||
os.makedirs(download_folder, exist_ok=True)
|
||||
# Wait for the download process to complete and save the downloaded file
|
||||
await download.save_as(full_file_path)
|
||||
except Exception as e:
|
||||
LOG.exception(
|
||||
"DownloadFileAction: Failed to download file",
|
||||
action=action,
|
||||
full_file_path=full_file_path,
|
||||
download_url=action.download_url,
|
||||
has_byte=action.byte is not None,
|
||||
)
|
||||
return [ActionFailure(e)]
|
||||
|
||||
return [ActionSuccess(data={"file_path": full_file_path})]
|
||||
|
||||
|
||||
@TraceManager.traced_async(ignore_inputs=["scraped_page", "page"])
|
||||
async def handle_null_action(
|
||||
@@ -2250,7 +2284,7 @@ ActionHandler.register_action_type(ActionType.SOLVE_CAPTCHA, handle_solve_captch
|
||||
ActionHandler.register_action_type(ActionType.CLICK, handle_click_action)
|
||||
ActionHandler.register_action_type(ActionType.INPUT_TEXT, handle_input_text_action)
|
||||
ActionHandler.register_action_type(ActionType.UPLOAD_FILE, handle_upload_file_action)
|
||||
# ActionHandler.register_action_type(ActionType.DOWNLOAD_FILE, handle_download_file_action)
|
||||
ActionHandler.register_action_type(ActionType.DOWNLOAD_FILE, handle_download_file_action)
|
||||
ActionHandler.register_action_type(ActionType.NULL_ACTION, handle_null_action)
|
||||
ActionHandler.register_action_type(ActionType.SELECT_OPTION, handle_select_option_action)
|
||||
ActionHandler.register_action_type(ActionType.WAIT, handle_wait_action)
|
||||
|
||||
@@ -1794,6 +1794,16 @@ async function buildElementTree(
|
||||
elementObj = await buildElementObject(frame, element, interactable);
|
||||
} else if (tagName === "div" && isDOMNodeRepresentDiv(element)) {
|
||||
elementObj = await buildElementObject(frame, element, interactable);
|
||||
} else if (
|
||||
tagName === "embed" &&
|
||||
element.getAttribute("type")?.toLowerCase() === "application/pdf"
|
||||
) {
|
||||
elementObj = await buildElementObject(
|
||||
frame,
|
||||
element,
|
||||
interactable,
|
||||
true,
|
||||
);
|
||||
} else if (
|
||||
getElementText(element).length > 0 &&
|
||||
getElementText(element).length <= 5000
|
||||
|
||||
@@ -178,6 +178,32 @@ class ScrapedPage(BaseModel, ElementTreeBuilder):
|
||||
self._clean_up_func = clean_up_func
|
||||
self._scrape_exclude = scrape_exclude
|
||||
|
||||
def check_pdf_viewer_embed(self) -> str | None:
|
||||
"""
|
||||
Check if the page contains a PDF viewer embed.
|
||||
If found, return the src attribute of the embed.
|
||||
"""
|
||||
if len(self.elements) != 1:
|
||||
return None
|
||||
|
||||
element = self.elements[0]
|
||||
if element.get("tagName", "") != "embed":
|
||||
return None
|
||||
|
||||
attributes: dict = element.get("attributes", {})
|
||||
if not attributes:
|
||||
return None
|
||||
|
||||
type_attr: str | None = attributes.get("type")
|
||||
if not type_attr:
|
||||
return None
|
||||
|
||||
if type_attr.lower() != "application/pdf":
|
||||
return None
|
||||
|
||||
LOG.info("Found a PDF viewer page", element=element)
|
||||
return attributes.get("src", "")
|
||||
|
||||
def support_economy_elements_tree(self) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
Reference in New Issue
Block a user