SDK: Prompt-based locator (#4027)
This commit is contained in:
committed by
GitHub
parent
90f51bcacb
commit
8fb46ef1ca
@@ -550,6 +550,87 @@ class RealSkyvernPageAi(SkyvernPageAi):
|
||||
print(f"{'-' * 50}\n")
|
||||
return result
|
||||
|
||||
async def ai_locate_element(
|
||||
self,
|
||||
prompt: str,
|
||||
) -> str | None:
|
||||
"""Locate an element on the page using AI and return its XPath selector.
|
||||
|
||||
Args:
|
||||
prompt: Natural language description of the element to locate (e.g., 'find "download invoices" button')
|
||||
|
||||
Returns:
|
||||
XPath selector string (e.g., 'xpath=//button[@id="download"]') or None if not found
|
||||
"""
|
||||
scraped_page_refreshed = await self.scraped_page.refresh()
|
||||
context = skyvern_context.ensure_context()
|
||||
|
||||
prompt_rendered = _render_template_with_label(prompt, label=self.current_label)
|
||||
|
||||
locate_element_prompt = load_prompt_with_elements(
|
||||
element_tree_builder=scraped_page_refreshed,
|
||||
prompt_engine=prompt_engine,
|
||||
template_name="single-locate-element",
|
||||
html_need_skyvern_attrs=True,
|
||||
data_extraction_goal=prompt_rendered,
|
||||
current_url=scraped_page_refreshed.url,
|
||||
local_datetime=datetime.now(context.tz_info or datetime.now().astimezone().tzinfo).isoformat(),
|
||||
)
|
||||
|
||||
step = None
|
||||
if context.organization_id and context.task_id and context.step_id:
|
||||
step = await app.DATABASE.get_step(
|
||||
step_id=context.step_id,
|
||||
organization_id=context.organization_id,
|
||||
)
|
||||
|
||||
result = await app.EXTRACTION_LLM_API_HANDLER(
|
||||
prompt=locate_element_prompt,
|
||||
step=step,
|
||||
screenshots=scraped_page_refreshed.screenshots,
|
||||
prompt_name="single-locate-element",
|
||||
)
|
||||
|
||||
if not result or not isinstance(result, dict):
|
||||
LOG.error(
|
||||
"AI locate element failed - invalid result",
|
||||
result=result,
|
||||
result_type=type(result).__name__,
|
||||
prompt=prompt_rendered,
|
||||
)
|
||||
return None
|
||||
|
||||
element_id = result.get("element_id", None)
|
||||
confidence = result.get("confidence_float", 0.0)
|
||||
|
||||
xpath: str | None = None
|
||||
if element_id:
|
||||
skyvern_element_data = scraped_page_refreshed.id_to_element_dict.get(element_id)
|
||||
if skyvern_element_data and "xpath" in skyvern_element_data:
|
||||
xpath = skyvern_element_data.get("xpath")
|
||||
|
||||
if not xpath:
|
||||
xpath = result.get("xpath", None)
|
||||
|
||||
if not xpath:
|
||||
LOG.error(
|
||||
"AI locate element failed - no xpath in element data",
|
||||
element_id=element_id,
|
||||
result=result,
|
||||
prompt=prompt_rendered,
|
||||
)
|
||||
return None
|
||||
|
||||
LOG.info(
|
||||
"AI locate element result",
|
||||
element_id=element_id,
|
||||
xpath=xpath,
|
||||
confidence=confidence,
|
||||
prompt=prompt_rendered,
|
||||
)
|
||||
|
||||
return xpath
|
||||
|
||||
async def ai_act(
|
||||
self,
|
||||
prompt: str,
|
||||
|
||||
@@ -6,12 +6,13 @@ from dataclasses import dataclass
|
||||
from typing import Any, Callable, Literal, overload
|
||||
|
||||
import structlog
|
||||
from playwright.async_api import Page
|
||||
from playwright.async_api import Locator, Page
|
||||
|
||||
from skyvern.config import settings
|
||||
from skyvern.core.script_generations.skyvern_page_ai import SkyvernPageAi
|
||||
from skyvern.forge.sdk.api.files import download_file
|
||||
from skyvern.forge.sdk.core import skyvern_context
|
||||
from skyvern.library.ai_locator import AILocator
|
||||
from skyvern.webeye.actions import handler_utils
|
||||
from skyvern.webeye.actions.action_types import ActionType
|
||||
|
||||
@@ -683,6 +684,120 @@ class SkyvernPage(Page):
|
||||
data = kwargs.pop("data", None)
|
||||
return await self._ai.ai_extract(prompt, schema, error_code_mapping, intention, data)
|
||||
|
||||
@overload
|
||||
def locator(
|
||||
self,
|
||||
selector: str,
|
||||
*,
|
||||
prompt: str | None = None,
|
||||
ai: str | None = "fallback",
|
||||
**kwargs: Any,
|
||||
) -> Locator: ...
|
||||
|
||||
@overload
|
||||
def locator(
|
||||
self,
|
||||
*,
|
||||
prompt: str,
|
||||
ai: str | None = "fallback",
|
||||
**kwargs: Any,
|
||||
) -> Locator: ...
|
||||
|
||||
def locator(
|
||||
self,
|
||||
selector: str | None = None,
|
||||
*,
|
||||
prompt: str | None = None,
|
||||
ai: str | None = "fallback",
|
||||
**kwargs: Any,
|
||||
) -> Locator:
|
||||
"""Get a Playwright locator using a CSS selector, AI-powered prompt, or both.
|
||||
|
||||
This method extends Playwright's locator() with AI capabilities. It supports three modes:
|
||||
- **Selector-based**: Get locator using CSS selector (standard Playwright behavior)
|
||||
- **AI-powered**: Use natural language to describe the element (returns lazy AILocator)
|
||||
- **Fallback mode** (default): Try the selector first, fall back to AI if it fails
|
||||
|
||||
The AI-powered locator is lazy - it only calls ai_locate_element when you actually
|
||||
use the locator (e.g., when you call .click(), .fill(), etc.). Note that using this
|
||||
AI locator lookup with prompt only works for elements you can interact with on the page.
|
||||
|
||||
Args:
|
||||
selector: CSS selector for the target element.
|
||||
prompt: Natural language description of which element to locate.
|
||||
ai: AI behavior mode. Defaults to "fallback" which tries selector first, then AI.
|
||||
**kwargs: All Playwright locator parameters (has_text, has, etc.)
|
||||
|
||||
Returns:
|
||||
A Playwright Locator object (or AILocator proxy that acts like one).
|
||||
|
||||
Examples:
|
||||
```python
|
||||
# Standard Playwright usage - selector only
|
||||
download_button = page.locator("#download-btn")
|
||||
await download_button.click()
|
||||
|
||||
# AI-powered - prompt only (returns lazy _AILocator)
|
||||
download_button = page.locator(prompt='find "download invoices" button')
|
||||
await download_button.click() # AI resolves XPath here
|
||||
|
||||
# Fallback mode - try selector first, use AI if it fails
|
||||
download_button = page.locator("#download-btn", prompt='find "download invoices" button')
|
||||
await download_button.click()
|
||||
|
||||
# With Playwright parameters
|
||||
submit_button = page.locator(prompt="find submit button", has_text="Submit")
|
||||
await submit_button.click()
|
||||
```
|
||||
"""
|
||||
if not selector and not prompt:
|
||||
raise ValueError("Missing input: pass a selector and/or a prompt.")
|
||||
|
||||
context = skyvern_context.current()
|
||||
if context and context.ai_mode_override:
|
||||
ai = context.ai_mode_override
|
||||
|
||||
if ai == "fallback":
|
||||
if selector and prompt:
|
||||
# Try selector first, then AI
|
||||
return AILocator(
|
||||
self.page,
|
||||
self._ai,
|
||||
prompt,
|
||||
selector=selector,
|
||||
selector_kwargs=kwargs,
|
||||
try_selector_first=True,
|
||||
)
|
||||
|
||||
if selector:
|
||||
return self.page.locator(selector, **kwargs)
|
||||
|
||||
if prompt:
|
||||
return AILocator(
|
||||
self.page,
|
||||
self._ai,
|
||||
prompt,
|
||||
selector=None,
|
||||
selector_kwargs=kwargs,
|
||||
)
|
||||
|
||||
elif ai == "proactive":
|
||||
if prompt:
|
||||
# Try AI first, then selector
|
||||
return AILocator(
|
||||
self.page,
|
||||
self._ai,
|
||||
prompt,
|
||||
selector=selector,
|
||||
selector_kwargs=kwargs,
|
||||
try_selector_first=False,
|
||||
)
|
||||
|
||||
if selector:
|
||||
return self.page.locator(selector, **kwargs)
|
||||
|
||||
raise ValueError("Selector is required but was not provided")
|
||||
|
||||
@action_wrap(ActionType.VERIFICATION_CODE)
|
||||
async def verification_code(self, prompt: str | None = None) -> None:
|
||||
return
|
||||
|
||||
@@ -71,3 +71,10 @@ class SkyvernPageAi(Protocol):
|
||||
) -> None:
|
||||
"""Perform an action on the page using AI based on a natural language prompt."""
|
||||
...
|
||||
|
||||
async def ai_locate_element(
|
||||
self,
|
||||
prompt: str,
|
||||
) -> str | None:
|
||||
"""Locate an element on the page using AI and return its XPath selector."""
|
||||
...
|
||||
|
||||
Reference in New Issue
Block a user