SDK: support actions skeleton (#3817)
Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
d2d7b8e4b0
commit
33ad4cfcd1
144
skyvern/library/SdkSkyvernPageAi.py
Normal file
144
skyvern/library/SdkSkyvernPageAi.py
Normal file
@@ -0,0 +1,144 @@
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from playwright.async_api import Page
|
||||
|
||||
from skyvern.config import settings
|
||||
from skyvern.core.script_generations.skyvern_page_ai import SkyvernPageAi
|
||||
from skyvern.forge.sdk.schemas.sdk_actions import (
|
||||
ClickAction,
|
||||
ExtractAction,
|
||||
InputTextAction,
|
||||
SelectOptionAction,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from skyvern.library.skyvern_browser import SkyvernBrowser
|
||||
|
||||
|
||||
class SdkSkyvernPageAi(SkyvernPageAi):
|
||||
"""Implementation of SkyvernPageAi that makes API calls to the server."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
browser: "SkyvernBrowser",
|
||||
page: Page,
|
||||
):
|
||||
self._browser = browser
|
||||
self._page = page
|
||||
|
||||
async def ai_click(
|
||||
self,
|
||||
selector: str,
|
||||
intention: str,
|
||||
data: str | dict[str, Any] | None = None,
|
||||
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||
) -> str:
|
||||
"""Click an element using AI via API call."""
|
||||
|
||||
action = ClickAction(
|
||||
selector=selector,
|
||||
intention=intention,
|
||||
data=data,
|
||||
timeout=timeout,
|
||||
)
|
||||
response = await self._browser.client.run_sdk_action(
|
||||
url=self._page.url,
|
||||
browser_session_id=self._browser.browser_session_id,
|
||||
browser_address=self._browser.browser_address,
|
||||
workflow_run_id=self._browser.workflow_run_id,
|
||||
action=action,
|
||||
)
|
||||
self._browser.workflow_run_id = response.workflow_run_id
|
||||
return response.result if response.result else selector
|
||||
|
||||
async def ai_input_text(
|
||||
self,
|
||||
selector: str,
|
||||
value: str,
|
||||
intention: str,
|
||||
data: str | dict[str, Any] | None = None,
|
||||
totp_identifier: str | None = None,
|
||||
totp_url: str | None = None,
|
||||
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||
) -> str:
|
||||
"""Input text into an element using AI via API call."""
|
||||
|
||||
response = await self._browser.client.run_sdk_action(
|
||||
url=self._page.url,
|
||||
action=InputTextAction(
|
||||
selector=selector,
|
||||
value=value,
|
||||
intention=intention,
|
||||
data=data,
|
||||
totp_identifier=totp_identifier,
|
||||
totp_url=totp_url,
|
||||
timeout=timeout,
|
||||
),
|
||||
browser_session_id=self._browser.browser_session_id,
|
||||
browser_address=self._browser.browser_address,
|
||||
workflow_run_id=self._browser.workflow_run_id,
|
||||
)
|
||||
self._browser.workflow_run_id = response.workflow_run_id
|
||||
return response.result if response.result else value
|
||||
|
||||
async def ai_select_option(
|
||||
self,
|
||||
selector: str,
|
||||
value: str,
|
||||
intention: str,
|
||||
data: str | dict[str, Any] | None = None,
|
||||
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||
) -> str:
|
||||
"""Select an option from a dropdown using AI via API call."""
|
||||
|
||||
response = await self._browser.client.run_sdk_action(
|
||||
url=self._page.url,
|
||||
action=SelectOptionAction(
|
||||
selector=selector,
|
||||
value=value,
|
||||
intention=intention,
|
||||
data=data,
|
||||
timeout=timeout,
|
||||
),
|
||||
browser_session_id=self._browser.browser_session_id,
|
||||
browser_address=self._browser.browser_address,
|
||||
workflow_run_id=self._browser.workflow_run_id,
|
||||
)
|
||||
self._browser.workflow_run_id = response.workflow_run_id
|
||||
return response.result if response.result else value
|
||||
|
||||
async def ai_upload_file(
|
||||
self,
|
||||
selector: str,
|
||||
files: str,
|
||||
intention: str,
|
||||
data: str | dict[str, Any] | None = None,
|
||||
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||
) -> str:
|
||||
raise NotImplementedError("Upload is not supported yet")
|
||||
|
||||
async def ai_extract(
|
||||
self,
|
||||
prompt: str,
|
||||
schema: dict[str, Any] | list | str | None = None,
|
||||
error_code_mapping: dict[str, str] | None = None,
|
||||
intention: str | None = None,
|
||||
data: str | dict[str, Any] | None = None,
|
||||
) -> dict[str, Any] | list | str | None:
|
||||
"""Extract information from the page using AI via API call."""
|
||||
|
||||
response = await self._browser.client.run_sdk_action(
|
||||
url=self._page.url,
|
||||
action=ExtractAction(
|
||||
prompt=prompt,
|
||||
extract_schema=schema,
|
||||
error_code_mapping=error_code_mapping,
|
||||
intention=intention,
|
||||
data=data,
|
||||
),
|
||||
browser_session_id=self._browser.browser_session_id,
|
||||
browser_address=self._browser.browser_address,
|
||||
workflow_run_id=self._browser.workflow_run_id,
|
||||
)
|
||||
self._browser.workflow_run_id = response.workflow_run_id
|
||||
return response.result if response.result else None
|
||||
@@ -1,7 +1,7 @@
|
||||
from playwright.async_api import BrowserContext, Page
|
||||
|
||||
from skyvern.client import AsyncSkyvern
|
||||
from skyvern.library.skyvern_browser_page import SkyvernBrowserPage, SkyvernPageRun
|
||||
from skyvern.library.skyvern_browser_page import SkyvernBrowserPage
|
||||
|
||||
|
||||
class SkyvernBrowser:
|
||||
@@ -44,6 +44,20 @@ class SkyvernBrowser:
|
||||
self._browser_address = browser_address
|
||||
self._client = client
|
||||
|
||||
self.workflow_run_id: None | str = None
|
||||
|
||||
@property
|
||||
def browser_session_id(self) -> str | None:
|
||||
return self._browser_session_id
|
||||
|
||||
@property
|
||||
def browser_address(self) -> str | None:
|
||||
return self._browser_address
|
||||
|
||||
@property
|
||||
def client(self) -> AsyncSkyvern:
|
||||
return self._client
|
||||
|
||||
async def get_working_page(self) -> SkyvernBrowserPage:
|
||||
"""Get the most recent page or create a new one if none exists.
|
||||
|
||||
@@ -73,5 +87,4 @@ class SkyvernBrowser:
|
||||
return await self._create_skyvern_page(page)
|
||||
|
||||
async def _create_skyvern_page(self, page: Page) -> SkyvernBrowserPage:
|
||||
page_ai = SkyvernPageRun(page, self._browser_session_id, self._browser_address, self._client)
|
||||
return SkyvernBrowserPage(page, page_ai)
|
||||
return SkyvernBrowserPage(self, page)
|
||||
|
||||
@@ -1,11 +1,18 @@
|
||||
import asyncio
|
||||
from typing import Any
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from playwright.async_api import Page
|
||||
|
||||
from skyvern.client import AsyncSkyvern, GetRunResponse
|
||||
from skyvern.client import GetRunResponse
|
||||
from skyvern.client.types.workflow_run_response import WorkflowRunResponse
|
||||
from skyvern.config import settings
|
||||
from skyvern.library.constants import DEFAULT_AGENT_HEARTBEAT_INTERVAL, DEFAULT_AGENT_TIMEOUT
|
||||
from skyvern.library.SdkSkyvernPageAi import SdkSkyvernPageAi
|
||||
from skyvern.webeye.actions import handler_utils
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from skyvern.library.skyvern_browser import SkyvernBrowser
|
||||
|
||||
from skyvern.schemas.run_blocks import CredentialType
|
||||
from skyvern.schemas.runs import RunEngine, RunStatus, TaskRunResponse
|
||||
|
||||
@@ -18,13 +25,9 @@ class SkyvernPageRun:
|
||||
and pre-defined workflows with automatic waiting for completion.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, page: Page, browser_session_id: str | None, browser_address: str | None, client: AsyncSkyvern
|
||||
) -> None:
|
||||
def __init__(self, browser: "SkyvernBrowser", page: Page) -> None:
|
||||
self._browser = browser
|
||||
self._page = page
|
||||
self._browser_session_id = browser_session_id
|
||||
self._browser_address = browser_address
|
||||
self._client = client
|
||||
|
||||
async def run_task(
|
||||
self,
|
||||
@@ -63,7 +66,7 @@ class SkyvernPageRun:
|
||||
TaskRunResponse containing the task execution results.
|
||||
"""
|
||||
|
||||
task_run = await self._client.run_task(
|
||||
task_run = await self._browser.client.run_task(
|
||||
prompt=prompt,
|
||||
engine=engine,
|
||||
model=model,
|
||||
@@ -75,8 +78,8 @@ class SkyvernPageRun:
|
||||
error_code_mapping=error_code_mapping,
|
||||
data_extraction_schema=data_extraction_schema,
|
||||
max_steps=max_steps,
|
||||
browser_session_id=self._browser_session_id,
|
||||
browser_address=self._browser_address,
|
||||
browser_session_id=self._browser.browser_session_id,
|
||||
browser_address=self._browser.browser_address,
|
||||
user_agent=user_agent,
|
||||
)
|
||||
|
||||
@@ -121,7 +124,7 @@ class SkyvernPageRun:
|
||||
WorkflowRunResponse containing the login workflow execution results.
|
||||
"""
|
||||
|
||||
workflow_run = await self._client.login(
|
||||
workflow_run = await self._browser.client.login(
|
||||
credential_type=credential_type,
|
||||
url=url or self._get_page_url(),
|
||||
credential_id=credential_id,
|
||||
@@ -133,8 +136,8 @@ class SkyvernPageRun:
|
||||
webhook_url=webhook_url,
|
||||
totp_identifier=totp_identifier,
|
||||
totp_url=totp_url,
|
||||
browser_session_id=self._browser_session_id,
|
||||
browser_address=self._browser_address,
|
||||
browser_session_id=self._browser.browser_session_id,
|
||||
browser_address=self._browser.browser_address,
|
||||
extra_http_headers=extra_http_headers,
|
||||
)
|
||||
|
||||
@@ -167,7 +170,7 @@ class SkyvernPageRun:
|
||||
Returns:
|
||||
WorkflowRunResponse containing the workflow execution results.
|
||||
"""
|
||||
workflow_run = await self._client.run_workflow(
|
||||
workflow_run = await self._browser.client.run_workflow(
|
||||
workflow_id=workflow_id,
|
||||
parameters=parameters,
|
||||
template=template,
|
||||
@@ -175,8 +178,8 @@ class SkyvernPageRun:
|
||||
webhook_url=webhook_url,
|
||||
totp_url=totp_url,
|
||||
totp_identifier=totp_identifier,
|
||||
browser_session_id=self._browser_session_id,
|
||||
browser_address=self._browser_address,
|
||||
browser_session_id=self._browser.browser_session_id,
|
||||
browser_address=self._browser.browser_address,
|
||||
)
|
||||
|
||||
workflow_run = await self._wait_for_run_completion(workflow_run.run_id, timeout)
|
||||
@@ -185,7 +188,7 @@ class SkyvernPageRun:
|
||||
async def _wait_for_run_completion(self, run_id: str, timeout: float) -> GetRunResponse:
|
||||
async with asyncio.timeout(timeout):
|
||||
while True:
|
||||
task_run = await self._client.get_run(run_id)
|
||||
task_run = await self._browser.client.get_run(run_id)
|
||||
if RunStatus(task_run.status).is_final():
|
||||
break
|
||||
await asyncio.sleep(DEFAULT_AGENT_HEARTBEAT_INTERVAL)
|
||||
@@ -221,28 +224,150 @@ class SkyvernBrowserPage:
|
||||
run: SkyvernPageRun instance for executing AI-powered tasks and workflows.
|
||||
"""
|
||||
|
||||
def __init__(self, page: Page, run: SkyvernPageRun):
|
||||
self.run = run
|
||||
self._playwright_page = page
|
||||
def __init__(self, browser: "SkyvernBrowser", page: Page):
|
||||
self._browser = browser
|
||||
self._page = page
|
||||
self._ai = SdkSkyvernPageAi(browser, page)
|
||||
self.run = SkyvernPageRun(browser, page)
|
||||
|
||||
async def click(self, selector: str, **kwargs: Any) -> None:
|
||||
"""Click an element matching the selector.
|
||||
async def click(
|
||||
self,
|
||||
*,
|
||||
selector: str | None = None,
|
||||
intention: str | None = None,
|
||||
ai: str | None = "fallback",
|
||||
data: str | dict[str, Any] | None = None,
|
||||
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||
) -> str | None:
|
||||
"""Click an element identified by ``selector``.
|
||||
|
||||
Args:
|
||||
selector: A selector to search for an element to click.
|
||||
**kwargs: Additional options like timeout, force, position, etc.
|
||||
When ``intention`` and ``data`` are provided a new click action is
|
||||
generated via the ``single-click-action`` prompt. The model returns a
|
||||
fresh "xpath=..." selector based on the current DOM and the updated data for this run.
|
||||
The browser then clicks the element using this newly generated xpath selector.
|
||||
|
||||
If the prompt generation or parsing fails for any reason we fall back to
|
||||
clicking the originally supplied ``selector``.
|
||||
"""
|
||||
await self._playwright_page.click(selector, **kwargs)
|
||||
|
||||
async def fill(self, selector: str, value: str, **kwargs: Any) -> None:
|
||||
"""Fill an input field with the given value.
|
||||
if ai == "fallback":
|
||||
# try to click the element with the original selector first
|
||||
error_to_raise = None
|
||||
if selector:
|
||||
try:
|
||||
locator = self._page.locator(selector)
|
||||
await locator.click(timeout=timeout)
|
||||
return selector
|
||||
except Exception as e:
|
||||
error_to_raise = e
|
||||
|
||||
Args:
|
||||
selector: A selector to search for an element to fill.
|
||||
value: Value to fill for the input field.
|
||||
**kwargs: Additional options like timeout, force, no_wait_after, etc.
|
||||
# if the original selector doesn't work, try to click the element with the ai generated selector
|
||||
if intention:
|
||||
return await self._ai.ai_click(
|
||||
selector=selector or "",
|
||||
intention=intention,
|
||||
data=data,
|
||||
timeout=timeout,
|
||||
)
|
||||
if error_to_raise:
|
||||
raise error_to_raise
|
||||
else:
|
||||
return selector
|
||||
elif ai == "proactive":
|
||||
if intention:
|
||||
return await self._ai.ai_click(
|
||||
selector=selector or "",
|
||||
intention=intention,
|
||||
data=data,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
if selector:
|
||||
locator = self._page.locator(selector)
|
||||
await locator.click(timeout=timeout)
|
||||
return selector
|
||||
|
||||
async def _input_text(
|
||||
self,
|
||||
selector: str,
|
||||
value: str,
|
||||
ai: str | None = "fallback",
|
||||
intention: str | None = None,
|
||||
data: str | dict[str, Any] | None = None,
|
||||
totp_identifier: str | None = None,
|
||||
totp_url: str | None = None,
|
||||
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||
) -> str:
|
||||
"""Input text into an element identified by ``selector``.
|
||||
|
||||
When ``intention`` and ``data`` are provided a new input text action is
|
||||
generated via the `script-generation-input-text-generation` prompt. The model returns a
|
||||
fresh text based on the current DOM and the updated data for this run.
|
||||
The browser then inputs the text using this newly generated text.
|
||||
|
||||
If the prompt generation or parsing fails for any reason we fall back to
|
||||
inputting the originally supplied ``value``.
|
||||
"""
|
||||
await self._playwright_page.fill(selector, value, **kwargs)
|
||||
|
||||
# format the text with the actual value of the parameter if it's a secret when running a workflow
|
||||
if ai == "fallback":
|
||||
error_to_raise = None
|
||||
try:
|
||||
locator = self._page.locator(selector)
|
||||
await handler_utils.input_sequentially(locator, value, timeout=timeout)
|
||||
return value
|
||||
except Exception as e:
|
||||
error_to_raise = e
|
||||
|
||||
if intention:
|
||||
return await self._ai.ai_input_text(
|
||||
selector=selector,
|
||||
value=value,
|
||||
intention=intention,
|
||||
data=data,
|
||||
totp_identifier=totp_identifier,
|
||||
totp_url=totp_url,
|
||||
timeout=timeout,
|
||||
)
|
||||
if error_to_raise:
|
||||
raise error_to_raise
|
||||
else:
|
||||
return value
|
||||
elif ai == "proactive" and intention:
|
||||
return await self._ai.ai_input_text(
|
||||
selector=selector,
|
||||
value=value,
|
||||
intention=intention,
|
||||
data=data,
|
||||
totp_identifier=totp_identifier,
|
||||
totp_url=totp_url,
|
||||
timeout=timeout,
|
||||
)
|
||||
locator = self._page.locator(selector)
|
||||
await handler_utils.input_sequentially(locator, value, timeout=timeout)
|
||||
return value
|
||||
|
||||
async def fill(
|
||||
self,
|
||||
selector: str,
|
||||
value: str,
|
||||
ai: str | None = "fallback",
|
||||
intention: str | None = None,
|
||||
data: str | dict[str, Any] | None = None,
|
||||
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||
totp_identifier: str | None = None,
|
||||
totp_url: str | None = None,
|
||||
) -> str:
|
||||
return await self._input_text(
|
||||
selector=selector,
|
||||
value=value,
|
||||
ai=ai,
|
||||
intention=intention,
|
||||
data=data,
|
||||
timeout=timeout,
|
||||
totp_identifier=totp_identifier,
|
||||
totp_url=totp_url,
|
||||
)
|
||||
|
||||
async def goto(self, url: str, **kwargs: Any) -> None:
|
||||
"""Navigate to the given URL.
|
||||
@@ -251,7 +376,7 @@ class SkyvernBrowserPage:
|
||||
url: URL to navigate page to.
|
||||
**kwargs: Additional options like timeout, wait_until, referer, etc.
|
||||
"""
|
||||
await self._playwright_page.goto(url, **kwargs)
|
||||
await self._page.goto(url, **kwargs)
|
||||
|
||||
async def type(self, selector: str, text: str, **kwargs: Any) -> None:
|
||||
"""Type text into an element character by character.
|
||||
@@ -261,7 +386,7 @@ class SkyvernBrowserPage:
|
||||
text: Text to type into the element.
|
||||
**kwargs: Additional options like delay, timeout, no_wait_after, etc.
|
||||
"""
|
||||
await self._playwright_page.type(selector, text, **kwargs)
|
||||
await self._page.type(selector, text, **kwargs)
|
||||
|
||||
async def select_option(self, selector: str, value: Any = None, **kwargs: Any) -> list[str]:
|
||||
"""Select option(s) in a <select> element.
|
||||
@@ -274,7 +399,7 @@ class SkyvernBrowserPage:
|
||||
Returns:
|
||||
List of option values that have been successfully selected.
|
||||
"""
|
||||
return await self._playwright_page.select_option(selector, value, **kwargs)
|
||||
return await self._page.select_option(selector, value, **kwargs)
|
||||
|
||||
async def reload(self, **kwargs: Any) -> None:
|
||||
"""Reload the current page.
|
||||
@@ -282,7 +407,7 @@ class SkyvernBrowserPage:
|
||||
Args:
|
||||
**kwargs: Additional options like timeout, wait_until, etc.
|
||||
"""
|
||||
await self._playwright_page.reload(**kwargs)
|
||||
await self._page.reload(**kwargs)
|
||||
|
||||
async def screenshot(self, **kwargs: Any) -> bytes:
|
||||
"""Take a screenshot of the page.
|
||||
@@ -293,4 +418,4 @@ class SkyvernBrowserPage:
|
||||
Returns:
|
||||
bytes: The screenshot as bytes (unless path is specified, then saves to file).
|
||||
"""
|
||||
return await self._playwright_page.screenshot(**kwargs)
|
||||
return await self._page.screenshot(**kwargs)
|
||||
|
||||
Reference in New Issue
Block a user