From 04ed3c8dab84c0dac1a5dec28566643c6264fe96 Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Mon, 12 May 2025 07:05:02 -0700 Subject: [PATCH] revert pdf (#2328) --- poetry.lock | 20 +------------- pyproject.toml | 1 - skyvern/webeye/utils/page.py | 52 +++++------------------------------- 3 files changed, 7 insertions(+), 66 deletions(-) diff --git a/poetry.lock b/poetry.lock index a8c0a129..c6dc972f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -5081,24 +5081,6 @@ files = [ [package.extras] windows-terminal = ["colorama (>=0.4.6)"] -[[package]] -name = "pymupdf" -version = "1.25.5" -description = "A high performance Python library for data extraction, analysis, conversion & manipulation of PDF (and other) documents." -optional = false -python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "pymupdf-1.25.5-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:cde4e1c9cfb09c0e1e9c2b7f4b787dd6bb34a32cfe141a4675e24af7c0c25dd3"}, - {file = "pymupdf-1.25.5-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:5a35e2725fae0ab57f058dff77615c15eb5961eac50ba04f41ebc792cd8facad"}, - {file = "pymupdf-1.25.5-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d94b800e9501929c42283d39bc241001dd87fdeea297b5cb40d5b5714534452f"}, - {file = "pymupdf-1.25.5-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ee22155d3a634642d76553204867d862ae1bdd9f7cf70c0797d8127ebee6bed5"}, - {file = "pymupdf-1.25.5-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6ed7fc25271004d6d3279c20a80cb2bb4cda3efa9f9088dcc07cd790eca0bc63"}, - {file = "pymupdf-1.25.5-cp39-abi3-win32.whl", hash = "sha256:65e18ddb37fe8ec4edcdbebe9be3a8486b6a2f42609d0a142677e42f3a0614f8"}, - {file = "pymupdf-1.25.5-cp39-abi3-win_amd64.whl", hash = "sha256:7f44bc3d03ea45b2f68c96464f96105e8c7908896f2fb5e8c04f1fb8dae7981e"}, - {file = "pymupdf-1.25.5.tar.gz", hash = "sha256:5f96311cacd13254c905f6654a004a0a2025b71cabc04fda667f5472f72c15a0"}, -] - [[package]] name = "pyotp" version = "2.9.0" @@ -7388,4 +7370,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = "^3.11,<3.12" -content-hash = "762f5581ccfcd1c18787d26dbabb9744ee58dd0ecc9aaaf97e81d5e66d36448c" +content-hash = "81fb59e4ecd1124ab664c84c633ad015acdfc059fda8495a7fe4d40cad33435f" diff --git a/pyproject.toml b/pyproject.toml index ec1291bc..e2f09860 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,7 +57,6 @@ psutil = ">=7.0.0" tiktoken = ">=0.9.0" anthropic = "^0.50.0" google-cloud-aiplatform = "^1.90.0" -pymupdf = "^1.25.5" [tool.poetry.group.dev.dependencies] isort = "^5.13.2" diff --git a/skyvern/webeye/utils/page.py b/skyvern/webeye/utils/page.py index f82b3aef..69c31d7f 100644 --- a/skyvern/webeye/utils/page.py +++ b/skyvern/webeye/utils/page.py @@ -2,12 +2,9 @@ from __future__ import annotations import asyncio import time -from io import BytesIO from typing import Any, Dict, List -import pymupdf import structlog -from PIL import Image from playwright._impl._errors import TimeoutError from playwright.async_api import ElementHandle, Frame, Page @@ -37,6 +34,7 @@ JS_FUNCTION_DEFS = load_js_script() async def _current_viewpoint_screenshot_helper( page: Page, file_path: str | None = None, + full_page: bool = False, timeout: float = settings.BROWSER_SCREENSHOT_TIMEOUT_MS, ) -> bytes: if page.is_closed(): @@ -50,11 +48,13 @@ async def _current_viewpoint_screenshot_helper( screenshot = await page.screenshot( path=file_path, timeout=timeout, + full_page=full_page, animations="disabled", ) else: screenshot = await page.screenshot( timeout=timeout, + full_page=full_page, animations="disabled", ) end_time = time.time() @@ -150,49 +150,9 @@ class SkyvernFrame: file_path: str | None = None, timeout: float = settings.BROWSER_SCREENSHOT_TIMEOUT_MS, ) -> bytes: - if not full_page: - return await _current_viewpoint_screenshot_helper(page=page, file_path=file_path, timeout=timeout) - - LOG.debug("Page is fully loaded, agent is about to generate the full page screenshot") - start_time = time.time() - async with asyncio.timeout(timeout): - pdf_bytes = await page.pdf( - print_background=True, width=f"{settings.BROWSER_WIDTH}px", height=f"{settings.BROWSER_HEIGHT}px" - ) - - with pymupdf.open(stream=pdf_bytes, filetype="pdf") as doc: - images = [] - for pdf_page in doc: - pix = pdf_page.get_pixmap() - img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) - images.append(img) - - total_height = sum(img.height for img in images) - max_width = max(img.width for img in images) - - merged_img = Image.new("RGB", (max_width, total_height), color=(255, 255, 255)) - - current_y = 0 - for img in images: - merged_img.paste(img, (0, current_y)) - current_y += img.height - - buffer = BytesIO() - merged_img.save(buffer, format="PNG") - buffer.seek(0) - - img_data = buffer.read() - if file_path is not None: - with open(file_path, "wb") as f: - f.write(img_data) - - end_time = time.time() - LOG.debug( - "Full page screenshot taking time", - screenshot_time=end_time - start_time, - file_path=file_path, - ) - return img_data + return await _current_viewpoint_screenshot_helper( + page=page, file_path=file_path, full_page=full_page, timeout=timeout + ) @staticmethod async def take_split_screenshots(