support pdf printer (#653)

This commit is contained in:
LawyZheng
2024-07-26 18:10:42 +08:00
committed by GitHub
parent 0471ae55a0
commit be75055fd8
3 changed files with 58 additions and 8 deletions

View File

@@ -55,9 +55,10 @@ from skyvern.webeye.actions.actions import (
WebAction,
)
from skyvern.webeye.actions.responses import ActionFailure, ActionResult, ActionSuccess
from skyvern.webeye.browser_factory import BrowserState
from skyvern.webeye.browser_factory import BrowserState, get_download_dir
from skyvern.webeye.scraper.scraper import ElementTreeFormat, ScrapedPage
from skyvern.webeye.utils.dom import AbstractSelectDropdown, DomUtil, SkyvernElement
from skyvern.webeye.utils.page import SkyvernFrame
LOG = structlog.get_logger()
COMMON_INPUT_TAGS = {"input", "textarea", "select"}
@@ -218,7 +219,7 @@ async def handle_click_action(
skyvern_element = await dom.get_skyvern_element_by_id(action.element_id)
await asyncio.sleep(0.3)
if action.download:
results = await handle_click_to_download_file_action(action, page, scraped_page)
results = await handle_click_to_download_file_action(action, page, scraped_page, task)
else:
results = await chain_click(
task,
@@ -248,16 +249,27 @@ async def handle_click_to_download_file_action(
action: actions.ClickAction,
page: Page,
scraped_page: ScrapedPage,
task: Task,
) -> list[ActionResult]:
dom = DomUtil(scraped_page=scraped_page, page=page)
skyvern_element = await dom.get_skyvern_element_by_id(action.element_id)
locator = skyvern_element.locator
try:
await locator.click(
timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS,
modifiers=["Alt"],
)
await locator.click(timeout=SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS)
await page.wait_for_load_state(timeout=SettingsManager.get_settings().BROWSER_LOADING_TIMEOUT_MS)
# TODO: shall we back to the previous page ?
if await SkyvernFrame.get_print_triggered(page):
path = f"{get_download_dir(task.workflow_run_id, task.task_id)}/{uuid.uuid4()}"
LOG.warning(
"Trying to download the printed PDF",
path=path,
action=action,
)
await page.pdf(format="A4", display_header_footer=True, path=path)
await SkyvernFrame.reset_print_triggered(page)
except Exception as e:
LOG.exception("ClickAction with download failed", action=action, exc_info=True)
return [ActionFailure(e, download_triggered=False)]

View File

@@ -1,6 +1,7 @@
from __future__ import annotations
import asyncio
import os
import tempfile
import time
import uuid
@@ -12,6 +13,7 @@ from playwright.async_api import BrowserContext, Error, Page, Playwright, async_
from pydantic import BaseModel
from skyvern.config import settings
from skyvern.constants import REPO_ROOT_DIR
from skyvern.exceptions import (
FailedToNavigateToUrl,
FailedToReloadPage,
@@ -23,7 +25,7 @@ from skyvern.exceptions import (
from skyvern.forge.sdk.core.skyvern_context import current
from skyvern.forge.sdk.schemas.tasks import ProxyLocation
from skyvern.forge.sdk.settings_manager import SettingsManager
from skyvern.webeye.utils.page import SkyvernFrame
from skyvern.webeye.utils.page import DISABLE_PRINTER_WITH_FLAG, SkyvernFrame
LOG = structlog.get_logger()
@@ -31,6 +33,13 @@ LOG = structlog.get_logger()
BrowserCleanupFunc = Callable[[], None] | None
def get_download_dir(workflow_run_id: str | None, task_id: str | None) -> str:
download_dir = f"{REPO_ROOT_DIR}/downloads/{workflow_run_id or task_id}"
LOG.info("Initializing download directory", download_dir=download_dir)
os.makedirs(download_dir, exist_ok=True)
return download_dir
class BrowserContextCreator(Protocol):
def __call__(
self, playwright: Playwright, **kwargs: dict[str, Any]
@@ -101,7 +110,10 @@ class BrowserContextFactory:
creator = cls._creators.get(browser_type)
if not creator:
raise UnknownBrowserType(browser_type)
return await creator(playwright, **kwargs)
browser_context, browser_artifacts, cleanup_func = await creator(playwright, **kwargs)
# overwrite the window.print() to disable the PDF printer
await browser_context.add_init_script(DISABLE_PRINTER_WITH_FLAG)
return browser_context, browser_artifacts, cleanup_func
except UnknownBrowserType as e:
raise e
except Exception as e:

View File

@@ -28,6 +28,16 @@ def load_js_script() -> str:
raise e
DISABLE_PRINTER_WITH_FLAG = """
(function() {
const originalPrint = window.print;
window.print = function() {
window.__printTriggered = true;
};
window.__printTriggered = false;
})();
"""
JS_FUNCTION_DEFS = load_js_script()
@@ -103,6 +113,22 @@ class SkyvernFrame:
await skyvern_page.scroll_to_top(draw_boxes=False)
return screenshots
@staticmethod
async def get_print_triggered(page: Page) -> bool:
"""
Get print triggered on the page. Only Page instance could be printed as PDF.
"""
# the flag was injected in the "window" object from the "add_init_script" when the BrowserContext initialized.
return await page.evaluate("window.__printTriggered")
@staticmethod
async def reset_print_triggered(page: Page) -> bool:
"""
Get print triggered on the page. Only Page instance could be printed as PDF.
"""
# the flag was injected in the "window" object from the "add_init_script" when the BrowserContext initialized.
return await page.evaluate("() => window.__printTriggered = false")
@classmethod
async def create_instance(cls, frame: Page | Frame) -> SkyvernFrame:
instance = cls(frame=frame)