diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index a291c166..93973985 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -57,7 +57,8 @@ from skyvern.webeye.actions.handler import ActionHandler from skyvern.webeye.actions.models import AgentStepOutput, DetailedAgentStepOutput from skyvern.webeye.actions.responses import ActionResult from skyvern.webeye.browser_factory import BrowserState -from skyvern.webeye.scraper.scraper import ElementTreeFormat, ScrapedPage, get_page_content, scrape_website +from skyvern.webeye.scraper.scraper import ElementTreeFormat, ScrapedPage, scrape_website +from skyvern.webeye.utils.page import SkyvernFrame LOG = structlog.get_logger() @@ -797,7 +798,8 @@ class ForgeAgent: ) try: - html = await get_page_content(browser_state.page) + skyvern_frame = await SkyvernFrame.create_instance(frame=browser_state.page) + html = await skyvern_frame.get_content() await app.ARTIFACT_MANAGER.create_artifact( step=step, artifact_type=ArtifactType.HTML_ACTION, diff --git a/skyvern/webeye/browser_factory.py b/skyvern/webeye/browser_factory.py index be44486d..ac768964 100644 --- a/skyvern/webeye/browser_factory.py +++ b/skyvern/webeye/browser_factory.py @@ -8,7 +8,6 @@ from datetime import datetime from typing import Any, Awaitable, Callable, Protocol import structlog -from playwright._impl._errors import TimeoutError from playwright.async_api import BrowserContext, Error, Page, Playwright, async_playwright from pydantic import BaseModel @@ -17,7 +16,6 @@ from skyvern.exceptions import ( FailedToNavigateToUrl, FailedToReloadPage, FailedToStopLoadingPage, - FailedToTakeScreenshot, MissingBrowserStatePage, UnknownBrowserType, UnknownErrorWhileCreatingBrowserContext, @@ -25,6 +23,7 @@ from skyvern.exceptions import ( from skyvern.forge.sdk.core.skyvern_context import current from skyvern.forge.sdk.schemas.tasks import ProxyLocation from skyvern.forge.sdk.settings_manager import SettingsManager +from skyvern.webeye.utils.page import SkyvernFrame LOG = structlog.get_logger() @@ -319,40 +318,6 @@ class BrowserState: await self.pw.stop() LOG.info("Playwright is stopped") - @staticmethod - async def take_screenshot_from_page(page: Page, full_page: bool = False, file_path: str | None = None) -> bytes: - try: - await page.wait_for_load_state(timeout=SettingsManager.get_settings().BROWSER_LOADING_TIMEOUT_MS) - LOG.info("Page is fully loaded, agent is about to take screenshots") - start_time = time.time() - screenshot: bytes = bytes() - if file_path: - screenshot = await page.screenshot( - path=file_path, - full_page=full_page, - timeout=SettingsManager.get_settings().BROWSER_SCREENSHOT_TIMEOUT_MS, - ) - else: - screenshot = await page.screenshot( - full_page=full_page, - timeout=SettingsManager.get_settings().BROWSER_SCREENSHOT_TIMEOUT_MS, - animations="disabled", - ) - end_time = time.time() - LOG.info( - "Screenshot taking time", - screenshot_time=end_time - start_time, - full_page=full_page, - file_path=file_path, - ) - return screenshot - except TimeoutError as e: - LOG.exception(f"Timeout error while taking screenshot: {str(e)}") - raise FailedToTakeScreenshot(error_message=str(e)) from e - except Exception as e: - LOG.exception(f"Unknown error while taking screenshot: {str(e)}") - raise FailedToTakeScreenshot(error_message=str(e)) from e - async def take_screenshot(self, full_page: bool = False, file_path: str | None = None) -> bytes: page = self.__assert_page() - return await self.take_screenshot_from_page(page, full_page, file_path) + return await SkyvernFrame.take_screenshot(page=page, full_page=full_page, file_path=file_path) diff --git a/skyvern/webeye/scraper/scraper.py b/skyvern/webeye/scraper/scraper.py index 7d59d498..f479d8f9 100644 --- a/skyvern/webeye/scraper/scraper.py +++ b/skyvern/webeye/scraper/scraper.py @@ -6,13 +6,14 @@ from enum import StrEnum from typing import Any, Awaitable, Callable import structlog -from playwright.async_api import ElementHandle, Frame, Page +from playwright.async_api import Frame, Page from pydantic import BaseModel -from skyvern.constants import PAGE_CONTENT_TIMEOUT, SKYVERN_DIR, SKYVERN_ID_ATTR +from skyvern.constants import SKYVERN_DIR, SKYVERN_ID_ATTR from skyvern.exceptions import FailedToTakeScreenshot, UnknownElementTreeFormat from skyvern.forge.sdk.settings_manager import SettingsManager from skyvern.webeye.browser_factory import BrowserState +from skyvern.webeye.utils.page import SkyvernFrame LOG = structlog.get_logger() @@ -257,28 +258,7 @@ async def scrape_web_unsafe( LOG.info("Waiting for 5 seconds before scraping the website.") await asyncio.sleep(5) - screenshots: list[bytes] = [] - scroll_y_px_old = -30.0 - scroll_y_px = await scroll_to_top(page, drow_boxes=True) - # Checking max number of screenshots to prevent infinite loop - # We are checking the difference between the old and new scroll_y_px to determine if we have reached the end of the - # page. If the difference is less than 25, we assume we have reached the end of the page. - while ( - abs(scroll_y_px_old - scroll_y_px) > 25 - and len(screenshots) < SettingsManager.get_settings().MAX_NUM_SCREENSHOTS - ): - screenshot = await browser_state.take_screenshot(full_page=False) - screenshots.append(screenshot) - scroll_y_px_old = scroll_y_px - LOG.info("Scrolling to next page", url=url, num_screenshots=len(screenshots)) - scroll_y_px = await scroll_to_next_page(page, drow_boxes=True) - LOG.info( - "Scrolled to next page", - scroll_y_px=scroll_y_px, - scroll_y_px_old=scroll_y_px_old, - ) - await remove_bounding_boxes(page) - await scroll_to_top(page, drow_boxes=False) + screenshots = await SkyvernFrame.take_split_screenshots(page=page, url=url, draw_boxes=True) elements, element_tree = await get_interactable_element_tree(page, scrape_exclude) element_tree = cleanup_elements(copy.deepcopy(element_tree)) @@ -300,7 +280,8 @@ async def scrape_web_unsafe( html = "" try: - html = await get_page_content(page) + skyvern_frame = await SkyvernFrame.create_instance(frame=page) + html = await skyvern_frame.get_content() except Exception: LOG.error( "Failed out to get HTML content", @@ -322,23 +303,6 @@ async def scrape_web_unsafe( ) -async def get_page_content(page: Page, timeout: float = PAGE_CONTENT_TIMEOUT) -> str: - async with asyncio.timeout(timeout): - return await page.content() - - -async def get_select2_options(frame: Page | Frame, element: ElementHandle) -> list[dict[str, Any]]: - await frame.evaluate(JS_FUNCTION_DEFS) - js_script = "async (element) => await getSelect2Options(element)" - return await frame.evaluate(js_script, element) - - -async def get_combobox_options(frame: Page | Frame, element: ElementHandle) -> list[dict[str, Any]]: - await frame.evaluate(JS_FUNCTION_DEFS) - js_script = "async (element) => await getListboxOptions(element)" - return await frame.evaluate(js_script, element) - - async def get_interactable_element_tree_in_frame( frames: list[Frame], elements: list[dict], @@ -413,41 +377,6 @@ async def get_interactable_element_tree( return elements, element_tree -async def scroll_to_top(page: Page, drow_boxes: bool) -> float: - """ - Scroll to the top of the page and take a screenshot. - :param drow_boxes: If True, draw bounding boxes around the elements. - :param page: Page instance to take the screenshot from. - :return: Screenshot of the page. - """ - await page.evaluate(JS_FUNCTION_DEFS) - js_script = f"async () => await scrollToTop({str(drow_boxes).lower()})" - scroll_y_px = await page.evaluate(js_script) - return scroll_y_px - - -async def scroll_to_next_page(page: Page, drow_boxes: bool) -> bool: - """ - Scroll to the next page and take a screenshot. - :param drow_boxes: If True, draw bounding boxes around the elements. - :param page: Page instance to take the screenshot from. - :return: Screenshot of the page. - """ - await page.evaluate(JS_FUNCTION_DEFS) - js_script = f"async () => await scrollToNextPage({str(drow_boxes).lower()})" - scroll_y_px = await page.evaluate(js_script) - return scroll_y_px - - -async def remove_bounding_boxes(page: Page) -> None: - """ - Remove the bounding boxes from the page. - :param page: Page instance to remove the bounding boxes from. - """ - js_script = "() => removeBoundingBoxes()" - await page.evaluate(js_script) - - def cleanup_elements(elements: list[dict]) -> list[dict]: """ Remove rect and attribute.unique_id from the elements. diff --git a/skyvern/webeye/utils/dom.py b/skyvern/webeye/utils/dom.py index 5ccaf239..611df7b3 100644 --- a/skyvern/webeye/utils/dom.py +++ b/skyvern/webeye/utils/dom.py @@ -24,7 +24,8 @@ from skyvern.exceptions import ( SkyvernException, ) from skyvern.forge.sdk.settings_manager import SettingsManager -from skyvern.webeye.scraper.scraper import ScrapedPage, get_combobox_options, get_select2_options +from skyvern.webeye.scraper.scraper import ScrapedPage +from skyvern.webeye.utils.page import SkyvernFrame LOG = structlog.get_logger() @@ -164,13 +165,15 @@ class SkyvernElement: if not await self.is_select2_dropdown(): raise ElementIsNotSelect2Dropdown(self.get_id(), self.__static_element) - return Select2Dropdown(self.get_frame(), self) + frame = await SkyvernFrame.create_instance(self.get_frame()) + return Select2Dropdown(frame, self) async def get_combobox_dropdown(self) -> ComboboxDropdown: if not await self.is_combobox_dropdown(): raise ElementIsNotComboboxDropdown(self.get_id(), self.__static_element) - return ComboboxDropdown(self.get_frame(), self) + frame = await SkyvernFrame.create_instance(self.get_frame()) + return ComboboxDropdown(frame, self) def find_element_id_in_label_children(self, element_type: InteractiveElement) -> str | None: tag_name = self.get_tag_name() @@ -344,12 +347,12 @@ class AbstractSelectDropdown(ABC): class Select2Dropdown(AbstractSelectDropdown): - def __init__(self, frame: Page | Frame, skyvern_element: SkyvernElement) -> None: + def __init__(self, skyvern_frame: SkyvernFrame, skyvern_element: SkyvernElement) -> None: self.skyvern_element = skyvern_element - self.frame = frame + self.skyvern_frame = skyvern_frame async def __find_anchor(self, timeout: float) -> Locator: - locator = self.frame.locator("[id='select2-drop']") + locator = self.skyvern_element.get_frame().locator("[id='select2-drop']") await locator.wait_for(state="visible", timeout=timeout) cnt = await locator.count() if cnt == 0: @@ -397,7 +400,7 @@ class Select2Dropdown(AbstractSelectDropdown): ) -> typing.List[SkyvernOptionType]: anchor = await self.__find_anchor(timeout=timeout) element_handler = await anchor.element_handle(timeout=timeout) - options = await get_select2_options(self.frame, element_handler) + options = await self.skyvern_frame.get_select2_options(element_handler) return typing.cast(typing.List[SkyvernOptionType], options) async def select_by_index( @@ -409,13 +412,13 @@ class Select2Dropdown(AbstractSelectDropdown): class ComboboxDropdown(AbstractSelectDropdown): - def __init__(self, frame: Page | Frame, skyvern_element: SkyvernElement) -> None: + def __init__(self, skyvern_frame: SkyvernFrame, skyvern_element: SkyvernElement) -> None: self.skyvern_element = skyvern_element - self.frame = frame + self.skyvern_frame = skyvern_frame async def __find_anchor(self, timeout: float) -> Locator: control_id = await self.skyvern_element.get_attr("aria-controls", timeout=timeout) - locator = self.frame.locator(f"[id='{control_id}']") + locator = self.skyvern_element.get_frame().locator(f"[id='{control_id}']") await locator.wait_for(state="visible", timeout=timeout) cnt = await locator.count() if cnt == 0: @@ -445,7 +448,7 @@ class ComboboxDropdown(AbstractSelectDropdown): ) -> typing.List[SkyvernOptionType]: anchor = await self.__find_anchor(timeout=timeout) element_handler = await anchor.element_handle() - options = await get_combobox_options(self.frame, element_handler) + options = await self.skyvern_frame.get_combobox_options(element_handler) return typing.cast(typing.List[SkyvernOptionType], options) async def select_by_index( diff --git a/skyvern/webeye/utils/page.py b/skyvern/webeye/utils/page.py new file mode 100644 index 00000000..fa18947b --- /dev/null +++ b/skyvern/webeye/utils/page.py @@ -0,0 +1,160 @@ +from __future__ import annotations + +import asyncio +import time +from typing import Any, Dict, List + +import structlog +from playwright._impl._errors import TimeoutError +from playwright.async_api import ElementHandle, Frame, Page + +from skyvern.constants import PAGE_CONTENT_TIMEOUT, SKYVERN_DIR +from skyvern.exceptions import FailedToTakeScreenshot +from skyvern.forge.sdk.settings_manager import SettingsManager + +LOG = structlog.get_logger() + + +def load_js_script() -> str: + # TODO: Handle file location better. This is a hacky way to find the file location. + path = f"{SKYVERN_DIR}/webeye/scraper/domUtils.js" + try: + # TODO: Implement TS of domUtils.js and use the complied JS file instead of the raw JS file. + # This will allow our code to be type safe. + with open(path, "r") as f: + return f.read() + except FileNotFoundError as e: + LOG.exception("Failed to load the JS script", path=path) + raise e + + +JS_FUNCTION_DEFS = load_js_script() + + +class SkyvernFrame: + @staticmethod + async def take_screenshot( + page: Page, + full_page: bool = False, + file_path: str | None = None, + timeout: float = SettingsManager.get_settings().BROWSER_LOADING_TIMEOUT_MS, + ) -> bytes: + try: + await page.wait_for_load_state(timeout=SettingsManager.get_settings().BROWSER_LOADING_TIMEOUT_MS) + LOG.info("Page is fully loaded, agent is about to take screenshots") + start_time = time.time() + screenshot: bytes = bytes() + if file_path: + screenshot = await page.screenshot( + path=file_path, + full_page=full_page, + timeout=timeout, + ) + else: + screenshot = await page.screenshot( + full_page=full_page, + timeout=timeout, + animations="disabled", + ) + end_time = time.time() + LOG.info( + "Screenshot taking time", + screenshot_time=end_time - start_time, + full_page=full_page, + file_path=file_path, + ) + return screenshot + except TimeoutError as e: + LOG.exception(f"Timeout error while taking screenshot: {str(e)}") + raise FailedToTakeScreenshot(error_message=str(e)) from e + except Exception as e: + LOG.exception(f"Unknown error while taking screenshot: {str(e)}") + raise FailedToTakeScreenshot(error_message=str(e)) from e + + @staticmethod + async def take_split_screenshots( + page: Page, + url: str, + draw_boxes: bool = False, + max_number: int = SettingsManager.get_settings().MAX_NUM_SCREENSHOTS, + ) -> List[bytes]: + skyvern_page = await SkyvernFrame.create_instance(frame=page) + assert isinstance(skyvern_page.frame, Page) + + screenshots: List[bytes] = [] + scroll_y_px_old = -30.0 + scroll_y_px = await skyvern_page.scroll_to_top(draw_boxes=draw_boxes) + # Checking max number of screenshots to prevent infinite loop + # We are checking the difference between the old and new scroll_y_px to determine if we have reached the end of the + # page. If the difference is less than 25, we assume we have reached the end of the page. + while abs(scroll_y_px_old - scroll_y_px) > 25 and len(screenshots) < max_number: + screenshot = await SkyvernFrame.take_screenshot(page=skyvern_page.frame, full_page=False) + screenshots.append(screenshot) + scroll_y_px_old = scroll_y_px + LOG.info("Scrolling to next page", url=url, num_screenshots=len(screenshots)) + scroll_y_px = await skyvern_page.scroll_to_next_page(draw_boxes=draw_boxes) + LOG.info( + "Scrolled to next page", + scroll_y_px=scroll_y_px, + scroll_y_px_old=scroll_y_px_old, + ) + if draw_boxes: + await skyvern_page.remove_bounding_boxes() + await skyvern_page.scroll_to_top(draw_boxes=False) + return screenshots + + @classmethod + async def create_instance(cls, frame: Page | Frame) -> SkyvernFrame: + instance = cls(frame=frame) + await instance.frame.evaluate(JS_FUNCTION_DEFS) + return instance + + def __init__(self, frame: Page | Frame) -> None: + self.frame = frame + + def get_frame(self) -> Page | Frame: + return self.frame + + async def get_content(self, timeout: float = PAGE_CONTENT_TIMEOUT) -> str: + async with asyncio.timeout(timeout): + return await self.frame.content() + + async def get_select2_options(self, element: ElementHandle) -> List[Dict[str, Any]]: + await self.frame.evaluate(JS_FUNCTION_DEFS) + js_script = "async (element) => await getSelect2Options(element)" + return await self.frame.evaluate(js_script, element) + + async def get_combobox_options(self, element: ElementHandle) -> List[Dict[str, Any]]: + await self.frame.evaluate(JS_FUNCTION_DEFS) + js_script = "async (element) => await getListboxOptions(element)" + return await self.frame.evaluate(js_script, element) + + async def scroll_to_top(self, draw_boxes: bool) -> float: + """ + Scroll to the top of the page and take a screenshot. + :param drow_boxes: If True, draw bounding boxes around the elements. + :param page: Page instance to take the screenshot from. + :return: Screenshot of the page. + """ + js_script = f"async () => await scrollToTop({str(draw_boxes).lower()})" + scroll_y_px = await self.frame.evaluate(js_script) + return scroll_y_px + + async def scroll_to_next_page(self, draw_boxes: bool) -> float: + """ + Scroll to the next page and take a screenshot. + :param drow_boxes: If True, draw bounding boxes around the elements. + :param page: Page instance to take the screenshot from. + :return: Screenshot of the page. + """ + js_script = f"async () => await scrollToNextPage({str(draw_boxes).lower()})" + scroll_y_px = await self.frame.evaluate(js_script) + return scroll_y_px + + async def remove_bounding_boxes(self) -> None: + """ + Remove the bounding boxes from the page. + :param page: Page instance to remove the bounding boxes from. + """ + js_script = "() => removeBoundingBoxes()" + await self.frame.evaluate(js_script)