scroll back after screenshot (#2851)

This commit is contained in:
Shuchang Zheng
2025-07-01 15:00:54 +09:00
committed by GitHub
parent eb0e8a21ee
commit bba1755ff1

View File

@@ -101,13 +101,12 @@ async def _current_viewpoint_screenshot_helper(
async def _scrolling_screenshots_helper( async def _scrolling_screenshots_helper(
page: Page, skyvern_page: SkyvernFrame,
url: str | None = None, url: str | None = None,
draw_boxes: bool = False, draw_boxes: bool = False,
max_number: int = settings.MAX_NUM_SCREENSHOTS, max_number: int = settings.MAX_NUM_SCREENSHOTS,
mode: ScreenshotMode = ScreenshotMode.DETAILED, mode: ScreenshotMode = ScreenshotMode.DETAILED,
) -> tuple[list[bytes], list[int]]: ) -> tuple[list[bytes], list[int]]:
skyvern_page = await SkyvernFrame.create_instance(frame=page)
# page is the main frame and the index must be 0 # page is the main frame and the index must be 0
assert isinstance(skyvern_page.frame, Page) assert isinstance(skyvern_page.frame, Page)
frame = "main.frame" frame = "main.frame"
@@ -251,35 +250,40 @@ class SkyvernFrame:
# use spilt screenshot with lite mode, isntead of fullpage screenshot from playwright # use spilt screenshot with lite mode, isntead of fullpage screenshot from playwright
LOG.debug("Page is fully loaded, agent is about to generate the full page screenshot") LOG.debug("Page is fully loaded, agent is about to generate the full page screenshot")
start_time = time.time() start_time = time.time()
async with asyncio.timeout(timeout): skyvern_frame = await SkyvernFrame.create_instance(frame=page)
screenshots, positions = await _scrolling_screenshots_helper( x, y = await skyvern_frame.get_scroll_x_y()
page=page, mode=mode, max_number=scrolling_number try:
) async with asyncio.timeout(timeout):
images = [] screenshots, positions = await _scrolling_screenshots_helper(
skyvern_page=skyvern_frame, mode=mode, max_number=scrolling_number
)
images = []
for screenshot in screenshots: for screenshot in screenshots:
with Image.open(BytesIO(screenshot)) as img: with Image.open(BytesIO(screenshot)) as img:
img.load() img.load()
images.append(img) images.append(img)
merged_img = _merge_images_by_position(images, positions) merged_img = _merge_images_by_position(images, positions)
buffer = BytesIO() buffer = BytesIO()
merged_img.save(buffer, format="PNG") merged_img.save(buffer, format="PNG")
buffer.seek(0) buffer.seek(0)
img_data = buffer.read() img_data = buffer.read()
if file_path is not None: if file_path is not None:
with open(file_path, "wb") as f: with open(file_path, "wb") as f:
f.write(img_data) f.write(img_data)
end_time = time.time() end_time = time.time()
LOG.debug( LOG.debug(
"Full page screenshot taking time", "Full page screenshot taking time",
screenshot_time=end_time - start_time, screenshot_time=end_time - start_time,
file_path=file_path, file_path=file_path,
) )
return img_data return img_data
finally:
await skyvern_frame.scroll_to_x_y(x, y)
@staticmethod @staticmethod
async def take_split_screenshots( async def take_split_screenshots(
@@ -292,8 +296,13 @@ class SkyvernFrame:
if not scroll: if not scroll:
return [await _current_viewpoint_screenshot_helper(page=page, mode=ScreenshotMode.DETAILED)] return [await _current_viewpoint_screenshot_helper(page=page, mode=ScreenshotMode.DETAILED)]
skyvern_frame = await SkyvernFrame.create_instance(frame=page)
screenshots, _ = await _scrolling_screenshots_helper( screenshots, _ = await _scrolling_screenshots_helper(
page=page, url=url, max_number=max_number, draw_boxes=draw_boxes, mode=ScreenshotMode.DETAILED skyvern_page=skyvern_frame,
url=url,
max_number=max_number,
draw_boxes=draw_boxes,
mode=ScreenshotMode.DETAILED,
) )
return screenshots return screenshots