Make SkyvernPage extend Playwright (#3934)

This commit is contained in:
Stanislav Novosad
2025-11-10 09:02:37 -07:00
committed by GitHub
parent 2e3879bc37
commit 23ebd72d4a
4 changed files with 88 additions and 234 deletions

View File

@@ -123,8 +123,6 @@ class ScriptSkyvernPage(SkyvernPage):
action: ActionType, action: ActionType,
*args: Any, *args: Any,
prompt: str = "", prompt: str = "",
data: str | dict[str, Any] = "",
intention: str = "",
**kwargs: Any, **kwargs: Any,
) -> Any: ) -> Any:
""" """
@@ -150,9 +148,11 @@ class ScriptSkyvernPage(SkyvernPage):
} }
# Backward compatibility: use intention if provided and prompt is empty # Backward compatibility: use intention if provided and prompt is empty
intention = kwargs.get("intention", None)
if intention and not prompt: if intention and not prompt:
prompt = intention prompt = intention
data = kwargs.get("data", None)
meta = ActionMetadata(prompt, data) meta = ActionMetadata(prompt, data)
call = ActionCall(action, args, kwargs, meta) call = ActionCall(action, args, kwargs, meta)
@@ -199,7 +199,6 @@ class ScriptSkyvernPage(SkyvernPage):
action_type=action, action_type=action,
intention=prompt, intention=prompt,
status=action_status, status=action_status,
data=data,
kwargs=kwargs, kwargs=kwargs,
call_result=call.result, call_result=call.result,
) )
@@ -262,7 +261,6 @@ class ScriptSkyvernPage(SkyvernPage):
action_type: ActionType, action_type: ActionType,
intention: str = "", intention: str = "",
status: ActionStatus = ActionStatus.pending, status: ActionStatus = ActionStatus.pending,
data: str | dict[str, Any] = "",
kwargs: dict[str, Any] | None = None, kwargs: dict[str, Any] | None = None,
call_result: Any | None = None, call_result: Any | None = None,
) -> Action | None: ) -> Action | None:
@@ -396,7 +394,7 @@ class ScriptSkyvernPage(SkyvernPage):
# If screenshot creation fails, don't block execution # If screenshot creation fails, don't block execution
pass pass
async def goto(self, url: str, timeout: float = settings.BROWSER_LOADING_TIMEOUT_MS) -> None: async def goto(self, url: str, **kwargs: Any) -> None:
url = render_template(url) url = render_template(url)
url = prepend_scheme_and_validate_url(url) url = prepend_scheme_and_validate_url(url)
@@ -405,10 +403,8 @@ class ScriptSkyvernPage(SkyvernPage):
if context and context.script_mode: if context and context.script_mode:
print(f"🌐 Navigating to: {url}") print(f"🌐 Navigating to: {url}")
await self.page.goto( timeout = kwargs.pop("timeout", settings.BROWSER_ACTION_TIMEOUT_MS)
url, await self.page.goto(url, timeout=timeout, **kwargs)
timeout=timeout,
)
if context and context.script_mode: if context and context.script_mode:
print(" ✓ Page loaded") print(" ✓ Page loaded")

View File

@@ -3,7 +3,6 @@ from __future__ import annotations
import asyncio import asyncio
import copy import copy
from dataclasses import dataclass from dataclasses import dataclass
from enum import StrEnum
from typing import Any, Callable, Literal, overload from typing import Any, Callable, Literal, overload
import structlog import structlog
@@ -19,10 +18,6 @@ from skyvern.webeye.actions.action_types import ActionType
LOG = structlog.get_logger() LOG = structlog.get_logger()
class Driver(StrEnum):
PLAYWRIGHT = "playwright"
@dataclass @dataclass
class ActionMetadata: class ActionMetadata:
prompt: str = "" prompt: str = ""
@@ -41,7 +36,7 @@ class ActionCall:
error: Exception | None = None # populated if failed error: Exception | None = None # populated if failed
class SkyvernPage: class SkyvernPage(Page):
""" """
A lightweight adapter for the selected driver that: A lightweight adapter for the selected driver that:
1. Executes actual browser commands 1. Executes actual browser commands
@@ -54,21 +49,32 @@ class SkyvernPage:
page: Page, page: Page,
ai: SkyvernPageAi, ai: SkyvernPageAi,
) -> None: ) -> None:
super().__init__(page)
self.page = page self.page = page
self.current_label: str | None = None self.current_label: str | None = None
self._ai = ai self._ai = ai
def __getattribute__(self, name: str) -> Any:
page = object.__getattribute__(self, "page")
if hasattr(page, name):
for cls in type(self).__mro__:
if cls is Page:
break
if name in cls.__dict__:
return object.__getattribute__(self, name)
return getattr(page, name)
return object.__getattribute__(self, name)
async def _decorate_call( async def _decorate_call(
self, self,
fn: Callable, fn: Callable,
action: ActionType, action: ActionType,
*args: Any, *args: Any,
prompt: str = "", prompt: str = "",
data: str | dict[str, Any] = "",
intention: str = "", # backward compatibility
**kwargs: Any, **kwargs: Any,
) -> Any: ) -> Any:
return await fn(self, *args, prompt=prompt, data=data, intention=intention, **kwargs) return await fn(self, *args, prompt=prompt, **kwargs)
@staticmethod @staticmethod
def action_wrap( def action_wrap(
@@ -79,20 +85,17 @@ class SkyvernPage:
skyvern_page: SkyvernPage, skyvern_page: SkyvernPage,
*args: Any, *args: Any,
prompt: str = "", prompt: str = "",
data: str | dict[str, Any] = "",
intention: str = "", # backward compatibility
**kwargs: Any, **kwargs: Any,
) -> Any: ) -> Any:
return await skyvern_page._decorate_call( return await skyvern_page._decorate_call(fn, action, *args, prompt=prompt, **kwargs)
fn, action, *args, prompt=prompt, data=data, intention=intention, **kwargs
)
return wrapper return wrapper
return decorator return decorator
async def goto(self, url: str, timeout: float = settings.BROWSER_LOADING_TIMEOUT_MS) -> None: async def goto(self, url: str, **kwargs: Any) -> None:
await self.page.goto(url, timeout=timeout) timeout = kwargs.pop("timeout", settings.BROWSER_ACTION_TIMEOUT_MS)
await self.page.goto(url, timeout=timeout, **kwargs)
######### Public Interfaces ######### ######### Public Interfaces #########
@@ -103,9 +106,6 @@ class SkyvernPage:
*, *,
prompt: str | None = None, prompt: str | None = None,
ai: str | None = "fallback", ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
intention: str | None = None, # backward compatibility
**kwargs: Any, **kwargs: Any,
) -> str | None: ... ) -> str | None: ...
@@ -115,9 +115,6 @@ class SkyvernPage:
*, *,
prompt: str, prompt: str,
ai: str | None = "fallback", ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
intention: str | None = None, # backward compatibility
**kwargs: Any, **kwargs: Any,
) -> str | None: ... ) -> str | None: ...
@@ -128,9 +125,6 @@ class SkyvernPage:
*, *,
prompt: str | None = None, prompt: str | None = None,
ai: str | None = "fallback", ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
intention: str | None = None, # backward compatibility
**kwargs: Any, **kwargs: Any,
) -> str | None: ) -> str | None:
"""Click an element using a CSS selector, AI-powered prompt matching, or both. """Click an element using a CSS selector, AI-powered prompt matching, or both.
@@ -144,8 +138,7 @@ class SkyvernPage:
selector: CSS selector for the target element. selector: CSS selector for the target element.
prompt: Natural language description of which element to click. prompt: Natural language description of which element to click.
ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI. ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
data: Additional context data for AI processing. **kwargs: All Playwright click parameters (timeout, force, modifiers, etc.)
timeout: Maximum time to wait for the click action in milliseconds.
Returns: Returns:
The selector string that was successfully used to click the element, or None. The selector string that was successfully used to click the element, or None.
@@ -163,12 +156,16 @@ class SkyvernPage:
``` ```
""" """
# Backward compatibility # Backward compatibility
intention = kwargs.pop("intention", None)
if intention is not None and prompt is None: if intention is not None and prompt is None:
prompt = intention prompt = intention
if not selector and not prompt: if not selector and not prompt:
raise ValueError("Missing input: pass a selector and/or a prompt.") raise ValueError("Missing input: pass a selector and/or a prompt.")
timeout = kwargs.pop("timeout", settings.BROWSER_ACTION_TIMEOUT_MS)
data = kwargs.pop("data", None)
context = skyvern_context.current() context = skyvern_context.current()
if context and context.ai_mode_override: if context and context.ai_mode_override:
ai = context.ai_mode_override ai = context.ai_mode_override
@@ -206,8 +203,8 @@ class SkyvernPage:
) )
if selector: if selector:
locator = self.page.locator(selector, **kwargs) locator = self.page.locator(selector)
await locator.click(timeout=timeout) await locator.click(timeout=timeout, **kwargs)
return selector return selector
@@ -219,11 +216,9 @@ class SkyvernPage:
*, *,
prompt: str | None = None, prompt: str | None = None,
ai: str | None = "fallback", ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
totp_identifier: str | None = None, totp_identifier: str | None = None,
totp_url: str | None = None, totp_url: str | None = None,
intention: str | None = None, # backward compatibility **kwargs: Any,
) -> str: ... ) -> str: ...
@overload @overload
@@ -234,11 +229,9 @@ class SkyvernPage:
value: str | None = None, value: str | None = None,
selector: str | None = None, selector: str | None = None,
ai: str | None = "fallback", ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
totp_identifier: str | None = None, totp_identifier: str | None = None,
totp_url: str | None = None, totp_url: str | None = None,
intention: str | None = None, # backward compatibility **kwargs: Any,
) -> str: ... ) -> str: ...
@action_wrap(ActionType.INPUT_TEXT) @action_wrap(ActionType.INPUT_TEXT)
@@ -249,11 +242,9 @@ class SkyvernPage:
*, *,
prompt: str | None = None, prompt: str | None = None,
ai: str | None = "fallback", ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
totp_identifier: str | None = None, totp_identifier: str | None = None,
totp_url: str | None = None, totp_url: str | None = None,
intention: str | None = None, # backward compatibility **kwargs: Any,
) -> str: ) -> str:
"""Fill an input field using a CSS selector, AI-powered prompt matching, or both. """Fill an input field using a CSS selector, AI-powered prompt matching, or both.
@@ -267,8 +258,6 @@ class SkyvernPage:
value: The text value to input into the field. value: The text value to input into the field.
prompt: Natural language description of which field to fill and what value. prompt: Natural language description of which field to fill and what value.
ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI. ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
data: Additional context data for AI processing.
timeout: Maximum time to wait for the fill action in milliseconds.
totp_identifier: TOTP identifier for time-based one-time password fields. totp_identifier: TOTP identifier for time-based one-time password fields.
totp_url: URL to fetch TOTP codes from for authentication. totp_url: URL to fetch TOTP codes from for authentication.
@@ -293,12 +282,16 @@ class SkyvernPage:
""" """
# Backward compatibility # Backward compatibility
intention = kwargs.pop("intention", None)
if intention is not None and prompt is None: if intention is not None and prompt is None:
prompt = intention prompt = intention
if not selector and not prompt: if not selector and not prompt:
raise ValueError("Missing input: pass a selector and/or a prompt.") raise ValueError("Missing input: pass a selector and/or a prompt.")
timeout = kwargs.pop("timeout", settings.BROWSER_ACTION_TIMEOUT_MS)
data = kwargs.pop("data", None)
return await self._input_text( return await self._input_text(
selector=selector, selector=selector,
value=value or "", value=value or "",
@@ -317,19 +310,21 @@ class SkyvernPage:
value: str, value: str,
ai: str | None = "fallback", ai: str | None = "fallback",
prompt: str | None = None, prompt: str | None = None,
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
totp_identifier: str | None = None, totp_identifier: str | None = None,
totp_url: str | None = None, totp_url: str | None = None,
intention: str | None = None, # backward compatibility **kwargs: Any,
) -> str: ) -> str:
# Backward compatibility # Backward compatibility
intention = kwargs.pop("intention", None)
if intention is not None and prompt is None: if intention is not None and prompt is None:
prompt = intention prompt = intention
if not selector and not prompt: if not selector and not prompt:
raise ValueError("Missing input: pass a selector and/or a prompt.") raise ValueError("Missing input: pass a selector and/or a prompt.")
timeout = kwargs.pop("timeout", settings.BROWSER_ACTION_TIMEOUT_MS)
data = kwargs.pop("data", None)
return await self._input_text( return await self._input_text(
selector=selector, selector=selector,
value=value, value=value,
@@ -419,8 +414,7 @@ class SkyvernPage:
*, *,
prompt: str | None = None, prompt: str | None = None,
ai: str | None = "fallback", ai: str | None = "fallback",
data: str | dict[str, Any] | None = None, **kwargs: Any,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str: ... ) -> str: ...
@overload @overload
@@ -431,8 +425,7 @@ class SkyvernPage:
files: str | None = None, files: str | None = None,
selector: str | None = None, selector: str | None = None,
ai: str | None = "fallback", ai: str | None = "fallback",
data: str | dict[str, Any] | None = None, **kwargs: Any,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
) -> str: ... ) -> str: ...
@action_wrap(ActionType.UPLOAD_FILE) @action_wrap(ActionType.UPLOAD_FILE)
@@ -443,17 +436,19 @@ class SkyvernPage:
*, *,
prompt: str | None = None, prompt: str | None = None,
ai: str | None = "fallback", ai: str | None = "fallback",
data: str | dict[str, Any] | None = None, **kwargs: Any,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
intention: str | None = None, # backward compatibility
) -> str: ) -> str:
# Backward compatibility # Backward compatibility
intention = kwargs.pop("intention", None)
if intention is not None and prompt is None: if intention is not None and prompt is None:
prompt = intention prompt = intention
if not selector and not prompt: if not selector and not prompt:
raise ValueError("Missing input: pass a selector and/or a prompt.") raise ValueError("Missing input: pass a selector and/or a prompt.")
timeout = kwargs.pop("timeout", settings.BROWSER_ACTION_TIMEOUT_MS)
data = kwargs.pop("data", None)
context = skyvern_context.current() context = skyvern_context.current()
if context and context.ai_mode_override: if context and context.ai_mode_override:
ai = context.ai_mode_override ai = context.ai_mode_override
@@ -466,7 +461,7 @@ class SkyvernPage:
try: try:
file_path = await download_file(files) file_path = await download_file(files)
locator = self.page.locator(selector) locator = self.page.locator(selector)
await locator.set_input_files(file_path) await locator.set_input_files(file_path, **kwargs)
except Exception as e: except Exception as e:
error_to_raise = e error_to_raise = e
selector = None selector = None
@@ -501,7 +496,7 @@ class SkyvernPage:
file_path = await download_file(files) file_path = await download_file(files)
locator = self.page.locator(selector) locator = self.page.locator(selector)
await locator.set_input_files(file_path, timeout=timeout) await locator.set_input_files(file_path, timeout=timeout, **kwargs)
return files return files
@overload @overload
@@ -512,9 +507,6 @@ class SkyvernPage:
*, *,
prompt: str | None = None, prompt: str | None = None,
ai: str | None = "fallback", ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
intention: str | None = None, # backward compatibility
**kwargs: Any, **kwargs: Any,
) -> str | None: ... ) -> str | None: ...
@@ -526,9 +518,6 @@ class SkyvernPage:
value: str | None = None, value: str | None = None,
selector: str | None = None, selector: str | None = None,
ai: str | None = "fallback", ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
intention: str | None = None, # backward compatibility
**kwargs: Any, **kwargs: Any,
) -> str | None: ... ) -> str | None: ...
@@ -540,9 +529,6 @@ class SkyvernPage:
*, *,
prompt: str | None = None, prompt: str | None = None,
ai: str | None = "fallback", ai: str | None = "fallback",
data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
intention: str | None = None, # backward compatibility
**kwargs: Any, **kwargs: Any,
) -> str | None: ) -> str | None:
"""Select an option from a dropdown using a CSS selector, AI-powered prompt matching, or both. """Select an option from a dropdown using a CSS selector, AI-powered prompt matching, or both.
@@ -557,8 +543,6 @@ class SkyvernPage:
value: The option value to select. value: The option value to select.
prompt: Natural language description of which option to select. prompt: Natural language description of which option to select.
ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI. ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
data: Additional context data for AI processing.
timeout: Maximum time to wait for the select action in milliseconds.
Returns: Returns:
The value that was successfully selected. The value that was successfully selected.
@@ -581,12 +565,16 @@ class SkyvernPage:
""" """
# Backward compatibility # Backward compatibility
intention = kwargs.pop("intention", None)
if intention is not None and prompt is None: if intention is not None and prompt is None:
prompt = intention prompt = intention
if not selector and not prompt: if not selector and not prompt:
raise ValueError("Missing input: pass a selector and/or a prompt.") raise ValueError("Missing input: pass a selector and/or a prompt.")
timeout = kwargs.pop("timeout", settings.BROWSER_ACTION_TIMEOUT_MS)
data = kwargs.pop("data", None)
context = skyvern_context.current() context = skyvern_context.current()
if context and context.ai_mode_override: if context and context.ai_mode_override:
ai = context.ai_mode_override ai = context.ai_mode_override
@@ -631,46 +619,30 @@ class SkyvernPage:
async def wait( async def wait(
self, self,
seconds: float, seconds: float,
prompt: str | None = None, **kwargs: Any,
data: str | dict[str, Any] | None = None,
intention: str | None = None,
) -> None: ) -> None:
await asyncio.sleep(seconds) await asyncio.sleep(seconds)
@action_wrap(ActionType.NULL_ACTION) @action_wrap(ActionType.NULL_ACTION)
async def null_action( async def null_action(self, **kwargs: Any) -> None:
self, prompt: str | None = None, data: str | dict[str, Any] | None = None, intention: str | None = None
) -> None:
return return
@action_wrap(ActionType.SOLVE_CAPTCHA) @action_wrap(ActionType.SOLVE_CAPTCHA)
async def solve_captcha( async def solve_captcha(self, prompt: str | None = None) -> None:
self, prompt: str | None = None, data: str | dict[str, Any] | None = None, intention: str | None = None
) -> None:
raise NotImplementedError("Solve captcha is not supported outside server context") raise NotImplementedError("Solve captcha is not supported outside server context")
@action_wrap(ActionType.TERMINATE) @action_wrap(ActionType.TERMINATE)
async def terminate( async def terminate(self, errors: list[str], **kwargs: Any) -> None:
self,
errors: list[str],
prompt: str | None = None,
data: str | dict[str, Any] | None = None,
intention: str | None = None,
) -> None:
# TODO: update the workflow run status to terminated # TODO: update the workflow run status to terminated
return return
@action_wrap(ActionType.COMPLETE) @action_wrap(ActionType.COMPLETE)
async def complete( async def complete(self, prompt: str | None = None) -> None:
self, prompt: str | None = None, data: str | dict[str, Any] | None = None, intention: str | None = None
) -> None:
"""Stub for complete. Override in subclasses for specific behavior.""" """Stub for complete. Override in subclasses for specific behavior."""
@action_wrap(ActionType.RELOAD_PAGE) @action_wrap(ActionType.RELOAD_PAGE)
async def reload_page( async def reload_page(self, **kwargs: Any) -> None:
self, prompt: str | None = None, data: str | dict[str, Any] | None = None, intention: str | None = None await self.page.reload(**kwargs)
) -> None:
await self.page.reload()
return return
@action_wrap(ActionType.EXTRACT) @action_wrap(ActionType.EXTRACT)
@@ -680,7 +652,7 @@ class SkyvernPage:
schema: dict[str, Any] | list | str | None = None, schema: dict[str, Any] | list | str | None = None,
error_code_mapping: dict[str, str] | None = None, error_code_mapping: dict[str, str] | None = None,
intention: str | None = None, intention: str | None = None,
data: str | dict[str, Any] | None = None, **kwargs: Any,
) -> dict[str, Any] | list | str | None: ) -> dict[str, Any] | list | str | None:
"""Extract structured data from the page using AI. """Extract structured data from the page using AI.
@@ -689,7 +661,6 @@ class SkyvernPage:
schema: JSON Schema defining the structure of data to extract. schema: JSON Schema defining the structure of data to extract.
error_code_mapping: Mapping of error codes to custom error messages. error_code_mapping: Mapping of error codes to custom error messages.
intention: Additional context about the extraction intent. intention: Additional context about the extraction intent.
data: Additional context data for AI processing.
Returns: Returns:
Extracted data matching the provided schema, or None if extraction fails. Extracted data matching the provided schema, or None if extraction fails.
@@ -711,12 +682,11 @@ class SkyvernPage:
# Returns: {"name": "...", "price": 29.99} # Returns: {"name": "...", "price": 29.99}
``` ```
""" """
data = kwargs.pop("data", None)
return await self._ai.ai_extract(prompt, schema, error_code_mapping, intention, data) return await self._ai.ai_extract(prompt, schema, error_code_mapping, intention, data)
@action_wrap(ActionType.VERIFICATION_CODE) @action_wrap(ActionType.VERIFICATION_CODE)
async def verification_code( async def verification_code(self, prompt: str | None = None) -> None:
self, prompt: str | None = None, data: str | dict[str, Any] | None = None, intention: str | None = None
) -> None:
return return
@action_wrap(ActionType.SCROLL) @action_wrap(ActionType.SCROLL)
@@ -724,9 +694,7 @@ class SkyvernPage:
self, self,
scroll_x: int, scroll_x: int,
scroll_y: int, scroll_y: int,
prompt: str | None = None, **kwargs: Any,
data: str | dict[str, Any] | None = None,
intention: str | None = None,
) -> None: ) -> None:
await self.page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})") await self.page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})")
@@ -736,9 +704,7 @@ class SkyvernPage:
keys: list[str], keys: list[str],
hold: bool = False, hold: bool = False,
duration: float = 0, duration: float = 0,
prompt: str | None = None, **kwargs: Any,
data: str | dict[str, Any] | None = None,
intention: str | None = None, # backward compatibility
) -> None: ) -> None:
await handler_utils.keypress(self.page, keys, hold=hold, duration=duration) await handler_utils.keypress(self.page, keys, hold=hold, duration=duration)
@@ -747,9 +713,7 @@ class SkyvernPage:
self, self,
x: int, x: int,
y: int, y: int,
prompt: str | None = None, **kwargs: Any,
data: str | dict[str, Any] | None = None,
intention: str | None = None,
) -> None: ) -> None:
await self.page.mouse.move(x, y) await self.page.mouse.move(x, y)
@@ -759,9 +723,7 @@ class SkyvernPage:
start_x: int, start_x: int,
start_y: int, start_y: int,
path: list[tuple[int, int]], path: list[tuple[int, int]],
prompt: str | None = None, **kwargs: Any,
data: str | dict[str, Any] | None = None,
intention: str | None = None, # backward compatibility
) -> None: ) -> None:
await handler_utils.drag(self.page, start_x, start_y, path) await handler_utils.drag(self.page, start_x, start_y, path)
@@ -771,9 +733,7 @@ class SkyvernPage:
x: int, x: int,
y: int, y: int,
direction: Literal["down", "up"], direction: Literal["down", "up"],
prompt: str | None = None, **kwargs: Any,
data: str | dict[str, Any] | None = None,
intention: str | None = None, # backward compatibility
) -> None: ) -> None:
await handler_utils.left_mouse(self.page, x, y, direction) await handler_utils.left_mouse(self.page, x, y, direction)

View File

@@ -1,4 +1,4 @@
from typing import TYPE_CHECKING from typing import TYPE_CHECKING, Any
from playwright.async_api import BrowserContext, Page from playwright.async_api import BrowserContext, Page
@@ -9,10 +9,10 @@ if TYPE_CHECKING:
from skyvern.library.skyvern_sdk import SkyvernSdk from skyvern.library.skyvern_sdk import SkyvernSdk
class SkyvernBrowser: class SkyvernBrowser(BrowserContext):
"""A browser context wrapper that creates Skyvern-enabled pages. """A browser context wrapper that creates Skyvern-enabled pages.
This class wraps a Playwright BrowserContext and provides methods to create This class extends Playwright BrowserContext and provides methods to create
SkyvernBrowserPage instances that combine traditional browser automation with SkyvernBrowserPage instances that combine traditional browser automation with
AI-powered task execution capabilities. It manages browser session state and AI-powered task execution capabilities. It manages browser session state and
enables persistent browser sessions across multiple pages. enables persistent browser sessions across multiple pages.
@@ -44,6 +44,7 @@ class SkyvernBrowser:
browser_session_id: str | None = None, browser_session_id: str | None = None,
browser_address: str | None = None, browser_address: str | None = None,
): ):
super().__init__(browser_context)
self._sdk = sdk self._sdk = sdk
self._browser_context = browser_context self._browser_context = browser_context
self._browser_session_id = browser_session_id self._browser_session_id = browser_session_id
@@ -51,6 +52,18 @@ class SkyvernBrowser:
self.workflow_run_id: None | str = None self.workflow_run_id: None | str = None
def __getattribute__(self, name: str) -> Any:
browser_context = object.__getattribute__(self, "_browser_context")
if hasattr(browser_context, name):
for cls in type(self).__mro__:
if cls is BrowserContext:
break
if name in cls.__dict__:
return object.__getattribute__(self, name)
return getattr(browser_context, name)
return object.__getattribute__(self, name)
@property @property
def browser_session_id(self) -> str | None: def browser_session_id(self) -> str | None:
return self._browser_session_id return self._browser_session_id

View File

@@ -1,5 +1,5 @@
import asyncio import asyncio
from typing import TYPE_CHECKING, Any, Pattern from typing import TYPE_CHECKING, Any
from playwright.async_api import Page from playwright.async_api import Page
@@ -8,7 +8,6 @@ from skyvern.client.types.workflow_run_response import WorkflowRunResponse
from skyvern.core.script_generations.skyvern_page import SkyvernPage from skyvern.core.script_generations.skyvern_page import SkyvernPage
from skyvern.library.constants import DEFAULT_AGENT_HEARTBEAT_INTERVAL, DEFAULT_AGENT_TIMEOUT from skyvern.library.constants import DEFAULT_AGENT_HEARTBEAT_INTERVAL, DEFAULT_AGENT_TIMEOUT
from skyvern.library.skyvern_browser_page_ai import SdkSkyvernPageAi from skyvern.library.skyvern_browser_page_ai import SdkSkyvernPageAi
from skyvern.library.skyvern_locator import SkyvernLocator
if TYPE_CHECKING: if TYPE_CHECKING:
from skyvern.library.skyvern_browser import SkyvernBrowser from skyvern.library.skyvern_browser import SkyvernBrowser
@@ -249,117 +248,3 @@ class SkyvernBrowserPage(SkyvernPage):
``` ```
""" """
return await self._ai.ai_act(prompt) return await self._ai.ai_act(prompt)
async def reload(self, **kwargs: Any) -> None:
"""Reload the current page.
Args:
**kwargs: Additional options like timeout, wait_until, etc.
"""
await self.page.reload(**kwargs)
async def screenshot(self, **kwargs: Any) -> bytes:
"""Take a screenshot of the page.
Args:
**kwargs: Additional options like path, full_page, clip, type, quality, etc.
Returns:
bytes: The screenshot as bytes (unless path is specified, then saves to file).
"""
return await self.page.screenshot(**kwargs)
def locator(self, selector: str, **kwargs: Any) -> SkyvernLocator:
"""Find an element using a CSS selector or other selector syntax.
Args:
selector: CSS selector or other selector syntax (xpath=, text=, etc.).
**kwargs: Additional options like has, has_text, has_not, etc.
Returns:
SkyvernLocator object that can be used to perform actions or assertions.
"""
return SkyvernLocator(self.page.locator(selector, **kwargs))
def get_by_label(self, text: str | Pattern[str], **kwargs: Any) -> SkyvernLocator:
"""Find an input element by its associated label text.
Args:
text: Label text to search for (supports substring and regex matching).
**kwargs: Additional options like exact.
Returns:
SkyvernLocator object for the labeled input element.
"""
return SkyvernLocator(self.page.get_by_label(text, **kwargs))
def get_by_text(self, text: str | Pattern[str], **kwargs: Any) -> SkyvernLocator:
"""Find an element containing the specified text.
Args:
text: Text content to search for (supports substring and regex matching).
**kwargs: Additional options like exact.
Returns:
SkyvernLocator object for the element containing the text.
"""
return SkyvernLocator(self.page.get_by_text(text, **kwargs))
def get_by_title(self, text: str | Pattern[str], **kwargs: Any) -> SkyvernLocator:
"""Find an element by its title attribute.
Args:
text: Title attribute value to search for (supports substring and regex matching).
**kwargs: Additional options like exact.
Returns:
SkyvernLocator object for the element with matching title.
"""
return SkyvernLocator(self.page.get_by_title(text, **kwargs))
def get_by_role(self, role: str, **kwargs: Any) -> SkyvernLocator:
"""Find an element by its ARIA role.
Args:
role: ARIA role (e.g., "button", "textbox", "link").
**kwargs: Additional options like name, checked, pressed, etc.
Returns:
SkyvernLocator object for the element with matching role.
"""
return SkyvernLocator(self.page.get_by_role(role, **kwargs))
def get_by_placeholder(self, text: str | Pattern[str], **kwargs: Any) -> SkyvernLocator:
"""Find an input element by its placeholder text.
Args:
text: Placeholder text to search for (supports substring and regex matching).
**kwargs: Additional options like exact.
Returns:
SkyvernLocator object for the input element with matching placeholder.
"""
return SkyvernLocator(self.page.get_by_placeholder(text, **kwargs))
def get_by_alt_text(self, text: str | Pattern[str], **kwargs: Any) -> SkyvernLocator:
"""Find an element by its alt text (typically images).
Args:
text: Alt text to search for (supports substring and regex matching).
**kwargs: Additional options like exact.
Returns:
SkyvernLocator object for the element with matching alt text.
"""
return SkyvernLocator(self.page.get_by_alt_text(text, **kwargs))
def get_by_test_id(self, test_id: str) -> SkyvernLocator:
"""Find an element by its test ID attribute.
Args:
test_id: Test ID value to search for.
Returns:
SkyvernLocator object for the element with matching test ID.
"""
return SkyvernLocator(self.page.get_by_test_id(test_id))