revert pdf (#2328)
This commit is contained in:
20
poetry.lock
generated
20
poetry.lock
generated
@@ -5081,24 +5081,6 @@ files = [
|
|||||||
[package.extras]
|
[package.extras]
|
||||||
windows-terminal = ["colorama (>=0.4.6)"]
|
windows-terminal = ["colorama (>=0.4.6)"]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "pymupdf"
|
|
||||||
version = "1.25.5"
|
|
||||||
description = "A high performance Python library for data extraction, analysis, conversion & manipulation of PDF (and other) documents."
|
|
||||||
optional = false
|
|
||||||
python-versions = ">=3.9"
|
|
||||||
groups = ["main"]
|
|
||||||
files = [
|
|
||||||
{file = "pymupdf-1.25.5-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:cde4e1c9cfb09c0e1e9c2b7f4b787dd6bb34a32cfe141a4675e24af7c0c25dd3"},
|
|
||||||
{file = "pymupdf-1.25.5-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:5a35e2725fae0ab57f058dff77615c15eb5961eac50ba04f41ebc792cd8facad"},
|
|
||||||
{file = "pymupdf-1.25.5-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d94b800e9501929c42283d39bc241001dd87fdeea297b5cb40d5b5714534452f"},
|
|
||||||
{file = "pymupdf-1.25.5-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ee22155d3a634642d76553204867d862ae1bdd9f7cf70c0797d8127ebee6bed5"},
|
|
||||||
{file = "pymupdf-1.25.5-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6ed7fc25271004d6d3279c20a80cb2bb4cda3efa9f9088dcc07cd790eca0bc63"},
|
|
||||||
{file = "pymupdf-1.25.5-cp39-abi3-win32.whl", hash = "sha256:65e18ddb37fe8ec4edcdbebe9be3a8486b6a2f42609d0a142677e42f3a0614f8"},
|
|
||||||
{file = "pymupdf-1.25.5-cp39-abi3-win_amd64.whl", hash = "sha256:7f44bc3d03ea45b2f68c96464f96105e8c7908896f2fb5e8c04f1fb8dae7981e"},
|
|
||||||
{file = "pymupdf-1.25.5.tar.gz", hash = "sha256:5f96311cacd13254c905f6654a004a0a2025b71cabc04fda667f5472f72c15a0"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pyotp"
|
name = "pyotp"
|
||||||
version = "2.9.0"
|
version = "2.9.0"
|
||||||
@@ -7388,4 +7370,4 @@ type = ["pytest-mypy"]
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.1"
|
lock-version = "2.1"
|
||||||
python-versions = "^3.11,<3.12"
|
python-versions = "^3.11,<3.12"
|
||||||
content-hash = "762f5581ccfcd1c18787d26dbabb9744ee58dd0ecc9aaaf97e81d5e66d36448c"
|
content-hash = "81fb59e4ecd1124ab664c84c633ad015acdfc059fda8495a7fe4d40cad33435f"
|
||||||
|
|||||||
@@ -57,7 +57,6 @@ psutil = ">=7.0.0"
|
|||||||
tiktoken = ">=0.9.0"
|
tiktoken = ">=0.9.0"
|
||||||
anthropic = "^0.50.0"
|
anthropic = "^0.50.0"
|
||||||
google-cloud-aiplatform = "^1.90.0"
|
google-cloud-aiplatform = "^1.90.0"
|
||||||
pymupdf = "^1.25.5"
|
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
isort = "^5.13.2"
|
isort = "^5.13.2"
|
||||||
|
|||||||
@@ -2,12 +2,9 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import time
|
import time
|
||||||
from io import BytesIO
|
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
import pymupdf
|
|
||||||
import structlog
|
import structlog
|
||||||
from PIL import Image
|
|
||||||
from playwright._impl._errors import TimeoutError
|
from playwright._impl._errors import TimeoutError
|
||||||
from playwright.async_api import ElementHandle, Frame, Page
|
from playwright.async_api import ElementHandle, Frame, Page
|
||||||
|
|
||||||
@@ -37,6 +34,7 @@ JS_FUNCTION_DEFS = load_js_script()
|
|||||||
async def _current_viewpoint_screenshot_helper(
|
async def _current_viewpoint_screenshot_helper(
|
||||||
page: Page,
|
page: Page,
|
||||||
file_path: str | None = None,
|
file_path: str | None = None,
|
||||||
|
full_page: bool = False,
|
||||||
timeout: float = settings.BROWSER_SCREENSHOT_TIMEOUT_MS,
|
timeout: float = settings.BROWSER_SCREENSHOT_TIMEOUT_MS,
|
||||||
) -> bytes:
|
) -> bytes:
|
||||||
if page.is_closed():
|
if page.is_closed():
|
||||||
@@ -50,11 +48,13 @@ async def _current_viewpoint_screenshot_helper(
|
|||||||
screenshot = await page.screenshot(
|
screenshot = await page.screenshot(
|
||||||
path=file_path,
|
path=file_path,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
|
full_page=full_page,
|
||||||
animations="disabled",
|
animations="disabled",
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
screenshot = await page.screenshot(
|
screenshot = await page.screenshot(
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
|
full_page=full_page,
|
||||||
animations="disabled",
|
animations="disabled",
|
||||||
)
|
)
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
@@ -150,49 +150,9 @@ class SkyvernFrame:
|
|||||||
file_path: str | None = None,
|
file_path: str | None = None,
|
||||||
timeout: float = settings.BROWSER_SCREENSHOT_TIMEOUT_MS,
|
timeout: float = settings.BROWSER_SCREENSHOT_TIMEOUT_MS,
|
||||||
) -> bytes:
|
) -> bytes:
|
||||||
if not full_page:
|
return await _current_viewpoint_screenshot_helper(
|
||||||
return await _current_viewpoint_screenshot_helper(page=page, file_path=file_path, timeout=timeout)
|
page=page, file_path=file_path, full_page=full_page, timeout=timeout
|
||||||
|
)
|
||||||
LOG.debug("Page is fully loaded, agent is about to generate the full page screenshot")
|
|
||||||
start_time = time.time()
|
|
||||||
async with asyncio.timeout(timeout):
|
|
||||||
pdf_bytes = await page.pdf(
|
|
||||||
print_background=True, width=f"{settings.BROWSER_WIDTH}px", height=f"{settings.BROWSER_HEIGHT}px"
|
|
||||||
)
|
|
||||||
|
|
||||||
with pymupdf.open(stream=pdf_bytes, filetype="pdf") as doc:
|
|
||||||
images = []
|
|
||||||
for pdf_page in doc:
|
|
||||||
pix = pdf_page.get_pixmap()
|
|
||||||
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
|
||||||
images.append(img)
|
|
||||||
|
|
||||||
total_height = sum(img.height for img in images)
|
|
||||||
max_width = max(img.width for img in images)
|
|
||||||
|
|
||||||
merged_img = Image.new("RGB", (max_width, total_height), color=(255, 255, 255))
|
|
||||||
|
|
||||||
current_y = 0
|
|
||||||
for img in images:
|
|
||||||
merged_img.paste(img, (0, current_y))
|
|
||||||
current_y += img.height
|
|
||||||
|
|
||||||
buffer = BytesIO()
|
|
||||||
merged_img.save(buffer, format="PNG")
|
|
||||||
buffer.seek(0)
|
|
||||||
|
|
||||||
img_data = buffer.read()
|
|
||||||
if file_path is not None:
|
|
||||||
with open(file_path, "wb") as f:
|
|
||||||
f.write(img_data)
|
|
||||||
|
|
||||||
end_time = time.time()
|
|
||||||
LOG.debug(
|
|
||||||
"Full page screenshot taking time",
|
|
||||||
screenshot_time=end_time - start_time,
|
|
||||||
file_path=file_path,
|
|
||||||
)
|
|
||||||
return img_data
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
async def take_split_screenshots(
|
async def take_split_screenshots(
|
||||||
|
|||||||
Reference in New Issue
Block a user