SDK: Prompt-based locator (#4027)

This commit is contained in:
Stanislav Novosad
2025-11-21 19:13:42 -07:00
committed by GitHub
parent 90f51bcacb
commit 8fb46ef1ca
19 changed files with 899 additions and 4 deletions

View File

@@ -199,6 +199,11 @@ async def run_sdk_action(
data=action.data,
)
result = extract_result
elif action.type == "locate_element":
xpath_result = await page_ai.ai_locate_element(
prompt=action.prompt,
)
result = xpath_result
finally:
skyvern_context.reset()

View File

@@ -15,6 +15,7 @@ class SdkActionType(str, Enum):
AI_UPLOAD_FILE = "ai_upload_file"
AI_ACT = "ai_act"
EXTRACT = "extract"
LOCATE_ELEMENT = "locate_element"
# Base action class
@@ -137,9 +138,30 @@ class ExtractAction(SdkActionBase):
return self.data if isinstance(self.data, dict) else None
class LocateElementAction(SdkActionBase):
"""Locate element action parameters."""
type: Literal["locate_element"] = "locate_element"
prompt: str = Field(default="", description="Natural language prompt to locate an element")
def get_navigation_goal(self) -> str | None:
return self.prompt
def get_navigation_payload(self) -> dict[str, Any] | None:
return None
# Discriminated union of all action types
SdkAction = Annotated[
Union[ClickAction, InputTextAction, SelectOptionAction, UploadFileAction, ActAction, ExtractAction],
Union[
ClickAction,
InputTextAction,
SelectOptionAction,
UploadFileAction,
ActAction,
ExtractAction,
LocateElementAction,
],
Field(discriminator="type"),
]