revert pdf (#2328)

This commit is contained in:
Shuchang Zheng
2025-05-12 07:05:02 -07:00
committed by GitHub
parent 7654dc1f1f
commit 04ed3c8dab
3 changed files with 7 additions and 66 deletions

20
poetry.lock generated
View File

@@ -5081,24 +5081,6 @@ files = [
[package.extras] [package.extras]
windows-terminal = ["colorama (>=0.4.6)"] windows-terminal = ["colorama (>=0.4.6)"]
[[package]]
name = "pymupdf"
version = "1.25.5"
description = "A high performance Python library for data extraction, analysis, conversion & manipulation of PDF (and other) documents."
optional = false
python-versions = ">=3.9"
groups = ["main"]
files = [
{file = "pymupdf-1.25.5-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:cde4e1c9cfb09c0e1e9c2b7f4b787dd6bb34a32cfe141a4675e24af7c0c25dd3"},
{file = "pymupdf-1.25.5-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:5a35e2725fae0ab57f058dff77615c15eb5961eac50ba04f41ebc792cd8facad"},
{file = "pymupdf-1.25.5-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d94b800e9501929c42283d39bc241001dd87fdeea297b5cb40d5b5714534452f"},
{file = "pymupdf-1.25.5-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ee22155d3a634642d76553204867d862ae1bdd9f7cf70c0797d8127ebee6bed5"},
{file = "pymupdf-1.25.5-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6ed7fc25271004d6d3279c20a80cb2bb4cda3efa9f9088dcc07cd790eca0bc63"},
{file = "pymupdf-1.25.5-cp39-abi3-win32.whl", hash = "sha256:65e18ddb37fe8ec4edcdbebe9be3a8486b6a2f42609d0a142677e42f3a0614f8"},
{file = "pymupdf-1.25.5-cp39-abi3-win_amd64.whl", hash = "sha256:7f44bc3d03ea45b2f68c96464f96105e8c7908896f2fb5e8c04f1fb8dae7981e"},
{file = "pymupdf-1.25.5.tar.gz", hash = "sha256:5f96311cacd13254c905f6654a004a0a2025b71cabc04fda667f5472f72c15a0"},
]
[[package]] [[package]]
name = "pyotp" name = "pyotp"
version = "2.9.0" version = "2.9.0"
@@ -7388,4 +7370,4 @@ type = ["pytest-mypy"]
[metadata] [metadata]
lock-version = "2.1" lock-version = "2.1"
python-versions = "^3.11,<3.12" python-versions = "^3.11,<3.12"
content-hash = "762f5581ccfcd1c18787d26dbabb9744ee58dd0ecc9aaaf97e81d5e66d36448c" content-hash = "81fb59e4ecd1124ab664c84c633ad015acdfc059fda8495a7fe4d40cad33435f"

View File

@@ -57,7 +57,6 @@ psutil = ">=7.0.0"
tiktoken = ">=0.9.0" tiktoken = ">=0.9.0"
anthropic = "^0.50.0" anthropic = "^0.50.0"
google-cloud-aiplatform = "^1.90.0" google-cloud-aiplatform = "^1.90.0"
pymupdf = "^1.25.5"
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]
isort = "^5.13.2" isort = "^5.13.2"

View File

@@ -2,12 +2,9 @@ from __future__ import annotations
import asyncio import asyncio
import time import time
from io import BytesIO
from typing import Any, Dict, List from typing import Any, Dict, List
import pymupdf
import structlog import structlog
from PIL import Image
from playwright._impl._errors import TimeoutError from playwright._impl._errors import TimeoutError
from playwright.async_api import ElementHandle, Frame, Page from playwright.async_api import ElementHandle, Frame, Page
@@ -37,6 +34,7 @@ JS_FUNCTION_DEFS = load_js_script()
async def _current_viewpoint_screenshot_helper( async def _current_viewpoint_screenshot_helper(
page: Page, page: Page,
file_path: str | None = None, file_path: str | None = None,
full_page: bool = False,
timeout: float = settings.BROWSER_SCREENSHOT_TIMEOUT_MS, timeout: float = settings.BROWSER_SCREENSHOT_TIMEOUT_MS,
) -> bytes: ) -> bytes:
if page.is_closed(): if page.is_closed():
@@ -50,11 +48,13 @@ async def _current_viewpoint_screenshot_helper(
screenshot = await page.screenshot( screenshot = await page.screenshot(
path=file_path, path=file_path,
timeout=timeout, timeout=timeout,
full_page=full_page,
animations="disabled", animations="disabled",
) )
else: else:
screenshot = await page.screenshot( screenshot = await page.screenshot(
timeout=timeout, timeout=timeout,
full_page=full_page,
animations="disabled", animations="disabled",
) )
end_time = time.time() end_time = time.time()
@@ -150,49 +150,9 @@ class SkyvernFrame:
file_path: str | None = None, file_path: str | None = None,
timeout: float = settings.BROWSER_SCREENSHOT_TIMEOUT_MS, timeout: float = settings.BROWSER_SCREENSHOT_TIMEOUT_MS,
) -> bytes: ) -> bytes:
if not full_page: return await _current_viewpoint_screenshot_helper(
return await _current_viewpoint_screenshot_helper(page=page, file_path=file_path, timeout=timeout) page=page, file_path=file_path, full_page=full_page, timeout=timeout
)
LOG.debug("Page is fully loaded, agent is about to generate the full page screenshot")
start_time = time.time()
async with asyncio.timeout(timeout):
pdf_bytes = await page.pdf(
print_background=True, width=f"{settings.BROWSER_WIDTH}px", height=f"{settings.BROWSER_HEIGHT}px"
)
with pymupdf.open(stream=pdf_bytes, filetype="pdf") as doc:
images = []
for pdf_page in doc:
pix = pdf_page.get_pixmap()
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
images.append(img)
total_height = sum(img.height for img in images)
max_width = max(img.width for img in images)
merged_img = Image.new("RGB", (max_width, total_height), color=(255, 255, 255))
current_y = 0
for img in images:
merged_img.paste(img, (0, current_y))
current_y += img.height
buffer = BytesIO()
merged_img.save(buffer, format="PNG")
buffer.seek(0)
img_data = buffer.read()
if file_path is not None:
with open(file_path, "wb") as f:
f.write(img_data)
end_time = time.time()
LOG.debug(
"Full page screenshot taking time",
screenshot_time=end_time - start_time,
file_path=file_path,
)
return img_data
@staticmethod @staticmethod
async def take_split_screenshots( async def take_split_screenshots(