add skyvern frame (#610)
This commit is contained in:
@@ -57,7 +57,8 @@ from skyvern.webeye.actions.handler import ActionHandler
|
||||
from skyvern.webeye.actions.models import AgentStepOutput, DetailedAgentStepOutput
|
||||
from skyvern.webeye.actions.responses import ActionResult
|
||||
from skyvern.webeye.browser_factory import BrowserState
|
||||
from skyvern.webeye.scraper.scraper import ElementTreeFormat, ScrapedPage, get_page_content, scrape_website
|
||||
from skyvern.webeye.scraper.scraper import ElementTreeFormat, ScrapedPage, scrape_website
|
||||
from skyvern.webeye.utils.page import SkyvernFrame
|
||||
|
||||
LOG = structlog.get_logger()
|
||||
|
||||
@@ -797,7 +798,8 @@ class ForgeAgent:
|
||||
)
|
||||
|
||||
try:
|
||||
html = await get_page_content(browser_state.page)
|
||||
skyvern_frame = await SkyvernFrame.create_instance(frame=browser_state.page)
|
||||
html = await skyvern_frame.get_content()
|
||||
await app.ARTIFACT_MANAGER.create_artifact(
|
||||
step=step,
|
||||
artifact_type=ArtifactType.HTML_ACTION,
|
||||
|
||||
@@ -8,7 +8,6 @@ from datetime import datetime
|
||||
from typing import Any, Awaitable, Callable, Protocol
|
||||
|
||||
import structlog
|
||||
from playwright._impl._errors import TimeoutError
|
||||
from playwright.async_api import BrowserContext, Error, Page, Playwright, async_playwright
|
||||
from pydantic import BaseModel
|
||||
|
||||
@@ -17,7 +16,6 @@ from skyvern.exceptions import (
|
||||
FailedToNavigateToUrl,
|
||||
FailedToReloadPage,
|
||||
FailedToStopLoadingPage,
|
||||
FailedToTakeScreenshot,
|
||||
MissingBrowserStatePage,
|
||||
UnknownBrowserType,
|
||||
UnknownErrorWhileCreatingBrowserContext,
|
||||
@@ -25,6 +23,7 @@ from skyvern.exceptions import (
|
||||
from skyvern.forge.sdk.core.skyvern_context import current
|
||||
from skyvern.forge.sdk.schemas.tasks import ProxyLocation
|
||||
from skyvern.forge.sdk.settings_manager import SettingsManager
|
||||
from skyvern.webeye.utils.page import SkyvernFrame
|
||||
|
||||
LOG = structlog.get_logger()
|
||||
|
||||
@@ -319,40 +318,6 @@ class BrowserState:
|
||||
await self.pw.stop()
|
||||
LOG.info("Playwright is stopped")
|
||||
|
||||
@staticmethod
|
||||
async def take_screenshot_from_page(page: Page, full_page: bool = False, file_path: str | None = None) -> bytes:
|
||||
try:
|
||||
await page.wait_for_load_state(timeout=SettingsManager.get_settings().BROWSER_LOADING_TIMEOUT_MS)
|
||||
LOG.info("Page is fully loaded, agent is about to take screenshots")
|
||||
start_time = time.time()
|
||||
screenshot: bytes = bytes()
|
||||
if file_path:
|
||||
screenshot = await page.screenshot(
|
||||
path=file_path,
|
||||
full_page=full_page,
|
||||
timeout=SettingsManager.get_settings().BROWSER_SCREENSHOT_TIMEOUT_MS,
|
||||
)
|
||||
else:
|
||||
screenshot = await page.screenshot(
|
||||
full_page=full_page,
|
||||
timeout=SettingsManager.get_settings().BROWSER_SCREENSHOT_TIMEOUT_MS,
|
||||
animations="disabled",
|
||||
)
|
||||
end_time = time.time()
|
||||
LOG.info(
|
||||
"Screenshot taking time",
|
||||
screenshot_time=end_time - start_time,
|
||||
full_page=full_page,
|
||||
file_path=file_path,
|
||||
)
|
||||
return screenshot
|
||||
except TimeoutError as e:
|
||||
LOG.exception(f"Timeout error while taking screenshot: {str(e)}")
|
||||
raise FailedToTakeScreenshot(error_message=str(e)) from e
|
||||
except Exception as e:
|
||||
LOG.exception(f"Unknown error while taking screenshot: {str(e)}")
|
||||
raise FailedToTakeScreenshot(error_message=str(e)) from e
|
||||
|
||||
async def take_screenshot(self, full_page: bool = False, file_path: str | None = None) -> bytes:
|
||||
page = self.__assert_page()
|
||||
return await self.take_screenshot_from_page(page, full_page, file_path)
|
||||
return await SkyvernFrame.take_screenshot(page=page, full_page=full_page, file_path=file_path)
|
||||
|
||||
@@ -6,13 +6,14 @@ from enum import StrEnum
|
||||
from typing import Any, Awaitable, Callable
|
||||
|
||||
import structlog
|
||||
from playwright.async_api import ElementHandle, Frame, Page
|
||||
from playwright.async_api import Frame, Page
|
||||
from pydantic import BaseModel
|
||||
|
||||
from skyvern.constants import PAGE_CONTENT_TIMEOUT, SKYVERN_DIR, SKYVERN_ID_ATTR
|
||||
from skyvern.constants import SKYVERN_DIR, SKYVERN_ID_ATTR
|
||||
from skyvern.exceptions import FailedToTakeScreenshot, UnknownElementTreeFormat
|
||||
from skyvern.forge.sdk.settings_manager import SettingsManager
|
||||
from skyvern.webeye.browser_factory import BrowserState
|
||||
from skyvern.webeye.utils.page import SkyvernFrame
|
||||
|
||||
LOG = structlog.get_logger()
|
||||
|
||||
@@ -257,28 +258,7 @@ async def scrape_web_unsafe(
|
||||
LOG.info("Waiting for 5 seconds before scraping the website.")
|
||||
await asyncio.sleep(5)
|
||||
|
||||
screenshots: list[bytes] = []
|
||||
scroll_y_px_old = -30.0
|
||||
scroll_y_px = await scroll_to_top(page, drow_boxes=True)
|
||||
# Checking max number of screenshots to prevent infinite loop
|
||||
# We are checking the difference between the old and new scroll_y_px to determine if we have reached the end of the
|
||||
# page. If the difference is less than 25, we assume we have reached the end of the page.
|
||||
while (
|
||||
abs(scroll_y_px_old - scroll_y_px) > 25
|
||||
and len(screenshots) < SettingsManager.get_settings().MAX_NUM_SCREENSHOTS
|
||||
):
|
||||
screenshot = await browser_state.take_screenshot(full_page=False)
|
||||
screenshots.append(screenshot)
|
||||
scroll_y_px_old = scroll_y_px
|
||||
LOG.info("Scrolling to next page", url=url, num_screenshots=len(screenshots))
|
||||
scroll_y_px = await scroll_to_next_page(page, drow_boxes=True)
|
||||
LOG.info(
|
||||
"Scrolled to next page",
|
||||
scroll_y_px=scroll_y_px,
|
||||
scroll_y_px_old=scroll_y_px_old,
|
||||
)
|
||||
await remove_bounding_boxes(page)
|
||||
await scroll_to_top(page, drow_boxes=False)
|
||||
screenshots = await SkyvernFrame.take_split_screenshots(page=page, url=url, draw_boxes=True)
|
||||
|
||||
elements, element_tree = await get_interactable_element_tree(page, scrape_exclude)
|
||||
element_tree = cleanup_elements(copy.deepcopy(element_tree))
|
||||
@@ -300,7 +280,8 @@ async def scrape_web_unsafe(
|
||||
|
||||
html = ""
|
||||
try:
|
||||
html = await get_page_content(page)
|
||||
skyvern_frame = await SkyvernFrame.create_instance(frame=page)
|
||||
html = await skyvern_frame.get_content()
|
||||
except Exception:
|
||||
LOG.error(
|
||||
"Failed out to get HTML content",
|
||||
@@ -322,23 +303,6 @@ async def scrape_web_unsafe(
|
||||
)
|
||||
|
||||
|
||||
async def get_page_content(page: Page, timeout: float = PAGE_CONTENT_TIMEOUT) -> str:
|
||||
async with asyncio.timeout(timeout):
|
||||
return await page.content()
|
||||
|
||||
|
||||
async def get_select2_options(frame: Page | Frame, element: ElementHandle) -> list[dict[str, Any]]:
|
||||
await frame.evaluate(JS_FUNCTION_DEFS)
|
||||
js_script = "async (element) => await getSelect2Options(element)"
|
||||
return await frame.evaluate(js_script, element)
|
||||
|
||||
|
||||
async def get_combobox_options(frame: Page | Frame, element: ElementHandle) -> list[dict[str, Any]]:
|
||||
await frame.evaluate(JS_FUNCTION_DEFS)
|
||||
js_script = "async (element) => await getListboxOptions(element)"
|
||||
return await frame.evaluate(js_script, element)
|
||||
|
||||
|
||||
async def get_interactable_element_tree_in_frame(
|
||||
frames: list[Frame],
|
||||
elements: list[dict],
|
||||
@@ -413,41 +377,6 @@ async def get_interactable_element_tree(
|
||||
return elements, element_tree
|
||||
|
||||
|
||||
async def scroll_to_top(page: Page, drow_boxes: bool) -> float:
|
||||
"""
|
||||
Scroll to the top of the page and take a screenshot.
|
||||
:param drow_boxes: If True, draw bounding boxes around the elements.
|
||||
:param page: Page instance to take the screenshot from.
|
||||
:return: Screenshot of the page.
|
||||
"""
|
||||
await page.evaluate(JS_FUNCTION_DEFS)
|
||||
js_script = f"async () => await scrollToTop({str(drow_boxes).lower()})"
|
||||
scroll_y_px = await page.evaluate(js_script)
|
||||
return scroll_y_px
|
||||
|
||||
|
||||
async def scroll_to_next_page(page: Page, drow_boxes: bool) -> bool:
|
||||
"""
|
||||
Scroll to the next page and take a screenshot.
|
||||
:param drow_boxes: If True, draw bounding boxes around the elements.
|
||||
:param page: Page instance to take the screenshot from.
|
||||
:return: Screenshot of the page.
|
||||
"""
|
||||
await page.evaluate(JS_FUNCTION_DEFS)
|
||||
js_script = f"async () => await scrollToNextPage({str(drow_boxes).lower()})"
|
||||
scroll_y_px = await page.evaluate(js_script)
|
||||
return scroll_y_px
|
||||
|
||||
|
||||
async def remove_bounding_boxes(page: Page) -> None:
|
||||
"""
|
||||
Remove the bounding boxes from the page.
|
||||
:param page: Page instance to remove the bounding boxes from.
|
||||
"""
|
||||
js_script = "() => removeBoundingBoxes()"
|
||||
await page.evaluate(js_script)
|
||||
|
||||
|
||||
def cleanup_elements(elements: list[dict]) -> list[dict]:
|
||||
"""
|
||||
Remove rect and attribute.unique_id from the elements.
|
||||
|
||||
@@ -24,7 +24,8 @@ from skyvern.exceptions import (
|
||||
SkyvernException,
|
||||
)
|
||||
from skyvern.forge.sdk.settings_manager import SettingsManager
|
||||
from skyvern.webeye.scraper.scraper import ScrapedPage, get_combobox_options, get_select2_options
|
||||
from skyvern.webeye.scraper.scraper import ScrapedPage
|
||||
from skyvern.webeye.utils.page import SkyvernFrame
|
||||
|
||||
LOG = structlog.get_logger()
|
||||
|
||||
@@ -164,13 +165,15 @@ class SkyvernElement:
|
||||
if not await self.is_select2_dropdown():
|
||||
raise ElementIsNotSelect2Dropdown(self.get_id(), self.__static_element)
|
||||
|
||||
return Select2Dropdown(self.get_frame(), self)
|
||||
frame = await SkyvernFrame.create_instance(self.get_frame())
|
||||
return Select2Dropdown(frame, self)
|
||||
|
||||
async def get_combobox_dropdown(self) -> ComboboxDropdown:
|
||||
if not await self.is_combobox_dropdown():
|
||||
raise ElementIsNotComboboxDropdown(self.get_id(), self.__static_element)
|
||||
|
||||
return ComboboxDropdown(self.get_frame(), self)
|
||||
frame = await SkyvernFrame.create_instance(self.get_frame())
|
||||
return ComboboxDropdown(frame, self)
|
||||
|
||||
def find_element_id_in_label_children(self, element_type: InteractiveElement) -> str | None:
|
||||
tag_name = self.get_tag_name()
|
||||
@@ -344,12 +347,12 @@ class AbstractSelectDropdown(ABC):
|
||||
|
||||
|
||||
class Select2Dropdown(AbstractSelectDropdown):
|
||||
def __init__(self, frame: Page | Frame, skyvern_element: SkyvernElement) -> None:
|
||||
def __init__(self, skyvern_frame: SkyvernFrame, skyvern_element: SkyvernElement) -> None:
|
||||
self.skyvern_element = skyvern_element
|
||||
self.frame = frame
|
||||
self.skyvern_frame = skyvern_frame
|
||||
|
||||
async def __find_anchor(self, timeout: float) -> Locator:
|
||||
locator = self.frame.locator("[id='select2-drop']")
|
||||
locator = self.skyvern_element.get_frame().locator("[id='select2-drop']")
|
||||
await locator.wait_for(state="visible", timeout=timeout)
|
||||
cnt = await locator.count()
|
||||
if cnt == 0:
|
||||
@@ -397,7 +400,7 @@ class Select2Dropdown(AbstractSelectDropdown):
|
||||
) -> typing.List[SkyvernOptionType]:
|
||||
anchor = await self.__find_anchor(timeout=timeout)
|
||||
element_handler = await anchor.element_handle(timeout=timeout)
|
||||
options = await get_select2_options(self.frame, element_handler)
|
||||
options = await self.skyvern_frame.get_select2_options(element_handler)
|
||||
return typing.cast(typing.List[SkyvernOptionType], options)
|
||||
|
||||
async def select_by_index(
|
||||
@@ -409,13 +412,13 @@ class Select2Dropdown(AbstractSelectDropdown):
|
||||
|
||||
|
||||
class ComboboxDropdown(AbstractSelectDropdown):
|
||||
def __init__(self, frame: Page | Frame, skyvern_element: SkyvernElement) -> None:
|
||||
def __init__(self, skyvern_frame: SkyvernFrame, skyvern_element: SkyvernElement) -> None:
|
||||
self.skyvern_element = skyvern_element
|
||||
self.frame = frame
|
||||
self.skyvern_frame = skyvern_frame
|
||||
|
||||
async def __find_anchor(self, timeout: float) -> Locator:
|
||||
control_id = await self.skyvern_element.get_attr("aria-controls", timeout=timeout)
|
||||
locator = self.frame.locator(f"[id='{control_id}']")
|
||||
locator = self.skyvern_element.get_frame().locator(f"[id='{control_id}']")
|
||||
await locator.wait_for(state="visible", timeout=timeout)
|
||||
cnt = await locator.count()
|
||||
if cnt == 0:
|
||||
@@ -445,7 +448,7 @@ class ComboboxDropdown(AbstractSelectDropdown):
|
||||
) -> typing.List[SkyvernOptionType]:
|
||||
anchor = await self.__find_anchor(timeout=timeout)
|
||||
element_handler = await anchor.element_handle()
|
||||
options = await get_combobox_options(self.frame, element_handler)
|
||||
options = await self.skyvern_frame.get_combobox_options(element_handler)
|
||||
return typing.cast(typing.List[SkyvernOptionType], options)
|
||||
|
||||
async def select_by_index(
|
||||
|
||||
160
skyvern/webeye/utils/page.py
Normal file
160
skyvern/webeye/utils/page.py
Normal file
@@ -0,0 +1,160 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import structlog
|
||||
from playwright._impl._errors import TimeoutError
|
||||
from playwright.async_api import ElementHandle, Frame, Page
|
||||
|
||||
from skyvern.constants import PAGE_CONTENT_TIMEOUT, SKYVERN_DIR
|
||||
from skyvern.exceptions import FailedToTakeScreenshot
|
||||
from skyvern.forge.sdk.settings_manager import SettingsManager
|
||||
|
||||
LOG = structlog.get_logger()
|
||||
|
||||
|
||||
def load_js_script() -> str:
|
||||
# TODO: Handle file location better. This is a hacky way to find the file location.
|
||||
path = f"{SKYVERN_DIR}/webeye/scraper/domUtils.js"
|
||||
try:
|
||||
# TODO: Implement TS of domUtils.js and use the complied JS file instead of the raw JS file.
|
||||
# This will allow our code to be type safe.
|
||||
with open(path, "r") as f:
|
||||
return f.read()
|
||||
except FileNotFoundError as e:
|
||||
LOG.exception("Failed to load the JS script", path=path)
|
||||
raise e
|
||||
|
||||
|
||||
JS_FUNCTION_DEFS = load_js_script()
|
||||
|
||||
|
||||
class SkyvernFrame:
|
||||
@staticmethod
|
||||
async def take_screenshot(
|
||||
page: Page,
|
||||
full_page: bool = False,
|
||||
file_path: str | None = None,
|
||||
timeout: float = SettingsManager.get_settings().BROWSER_LOADING_TIMEOUT_MS,
|
||||
) -> bytes:
|
||||
try:
|
||||
await page.wait_for_load_state(timeout=SettingsManager.get_settings().BROWSER_LOADING_TIMEOUT_MS)
|
||||
LOG.info("Page is fully loaded, agent is about to take screenshots")
|
||||
start_time = time.time()
|
||||
screenshot: bytes = bytes()
|
||||
if file_path:
|
||||
screenshot = await page.screenshot(
|
||||
path=file_path,
|
||||
full_page=full_page,
|
||||
timeout=timeout,
|
||||
)
|
||||
else:
|
||||
screenshot = await page.screenshot(
|
||||
full_page=full_page,
|
||||
timeout=timeout,
|
||||
animations="disabled",
|
||||
)
|
||||
end_time = time.time()
|
||||
LOG.info(
|
||||
"Screenshot taking time",
|
||||
screenshot_time=end_time - start_time,
|
||||
full_page=full_page,
|
||||
file_path=file_path,
|
||||
)
|
||||
return screenshot
|
||||
except TimeoutError as e:
|
||||
LOG.exception(f"Timeout error while taking screenshot: {str(e)}")
|
||||
raise FailedToTakeScreenshot(error_message=str(e)) from e
|
||||
except Exception as e:
|
||||
LOG.exception(f"Unknown error while taking screenshot: {str(e)}")
|
||||
raise FailedToTakeScreenshot(error_message=str(e)) from e
|
||||
|
||||
@staticmethod
|
||||
async def take_split_screenshots(
|
||||
page: Page,
|
||||
url: str,
|
||||
draw_boxes: bool = False,
|
||||
max_number: int = SettingsManager.get_settings().MAX_NUM_SCREENSHOTS,
|
||||
) -> List[bytes]:
|
||||
skyvern_page = await SkyvernFrame.create_instance(frame=page)
|
||||
assert isinstance(skyvern_page.frame, Page)
|
||||
|
||||
screenshots: List[bytes] = []
|
||||
scroll_y_px_old = -30.0
|
||||
scroll_y_px = await skyvern_page.scroll_to_top(draw_boxes=draw_boxes)
|
||||
# Checking max number of screenshots to prevent infinite loop
|
||||
# We are checking the difference between the old and new scroll_y_px to determine if we have reached the end of the
|
||||
# page. If the difference is less than 25, we assume we have reached the end of the page.
|
||||
while abs(scroll_y_px_old - scroll_y_px) > 25 and len(screenshots) < max_number:
|
||||
screenshot = await SkyvernFrame.take_screenshot(page=skyvern_page.frame, full_page=False)
|
||||
screenshots.append(screenshot)
|
||||
scroll_y_px_old = scroll_y_px
|
||||
LOG.info("Scrolling to next page", url=url, num_screenshots=len(screenshots))
|
||||
scroll_y_px = await skyvern_page.scroll_to_next_page(draw_boxes=draw_boxes)
|
||||
LOG.info(
|
||||
"Scrolled to next page",
|
||||
scroll_y_px=scroll_y_px,
|
||||
scroll_y_px_old=scroll_y_px_old,
|
||||
)
|
||||
if draw_boxes:
|
||||
await skyvern_page.remove_bounding_boxes()
|
||||
await skyvern_page.scroll_to_top(draw_boxes=False)
|
||||
return screenshots
|
||||
|
||||
@classmethod
|
||||
async def create_instance(cls, frame: Page | Frame) -> SkyvernFrame:
|
||||
instance = cls(frame=frame)
|
||||
await instance.frame.evaluate(JS_FUNCTION_DEFS)
|
||||
return instance
|
||||
|
||||
def __init__(self, frame: Page | Frame) -> None:
|
||||
self.frame = frame
|
||||
|
||||
def get_frame(self) -> Page | Frame:
|
||||
return self.frame
|
||||
|
||||
async def get_content(self, timeout: float = PAGE_CONTENT_TIMEOUT) -> str:
|
||||
async with asyncio.timeout(timeout):
|
||||
return await self.frame.content()
|
||||
|
||||
async def get_select2_options(self, element: ElementHandle) -> List[Dict[str, Any]]:
|
||||
await self.frame.evaluate(JS_FUNCTION_DEFS)
|
||||
js_script = "async (element) => await getSelect2Options(element)"
|
||||
return await self.frame.evaluate(js_script, element)
|
||||
|
||||
async def get_combobox_options(self, element: ElementHandle) -> List[Dict[str, Any]]:
|
||||
await self.frame.evaluate(JS_FUNCTION_DEFS)
|
||||
js_script = "async (element) => await getListboxOptions(element)"
|
||||
return await self.frame.evaluate(js_script, element)
|
||||
|
||||
async def scroll_to_top(self, draw_boxes: bool) -> float:
|
||||
"""
|
||||
Scroll to the top of the page and take a screenshot.
|
||||
:param drow_boxes: If True, draw bounding boxes around the elements.
|
||||
:param page: Page instance to take the screenshot from.
|
||||
:return: Screenshot of the page.
|
||||
"""
|
||||
js_script = f"async () => await scrollToTop({str(draw_boxes).lower()})"
|
||||
scroll_y_px = await self.frame.evaluate(js_script)
|
||||
return scroll_y_px
|
||||
|
||||
async def scroll_to_next_page(self, draw_boxes: bool) -> float:
|
||||
"""
|
||||
Scroll to the next page and take a screenshot.
|
||||
:param drow_boxes: If True, draw bounding boxes around the elements.
|
||||
:param page: Page instance to take the screenshot from.
|
||||
:return: Screenshot of the page.
|
||||
"""
|
||||
js_script = f"async () => await scrollToNextPage({str(draw_boxes).lower()})"
|
||||
scroll_y_px = await self.frame.evaluate(js_script)
|
||||
return scroll_y_px
|
||||
|
||||
async def remove_bounding_boxes(self) -> None:
|
||||
"""
|
||||
Remove the bounding boxes from the page.
|
||||
:param page: Page instance to remove the bounding boxes from.
|
||||
"""
|
||||
js_script = "() => removeBoundingBoxes()"
|
||||
await self.frame.evaluate(js_script)
|
||||
Reference in New Issue
Block a user