SDK: text prompt (#4214)

This commit is contained in:
Stanislav Novosad
2025-12-05 18:13:25 -07:00
committed by GitHub
parent 0f495f458e
commit b7d08fe906
9 changed files with 156 additions and 0 deletions

View File

@@ -205,6 +205,13 @@ async def run_sdk_action(
prompt=action.prompt,
)
result = xpath_result
elif action.type == "prompt":
prompt_result = await page_ai.ai_prompt(
prompt=action.prompt,
schema=action.schema,
model=action.model,
)
result = prompt_result
await app.DATABASE.update_task(
task_id=task.task_id,
organization_id=organization_id,

View File

@@ -16,6 +16,7 @@ class SdkActionType(str, Enum):
AI_ACT = "ai_act"
EXTRACT = "extract"
LOCATE_ELEMENT = "locate_element"
PROMPT = "prompt"
# Base action class
@@ -151,6 +152,21 @@ class LocateElementAction(SdkActionBase):
return None
class PromptAction(SdkActionBase):
"""Prompt action parameters."""
type: Literal["prompt"] = "prompt"
prompt: str = Field(..., description="The prompt to send to the LLM")
schema: dict[str, Any] | None = Field(None, description="Optional JSON schema to structure the response")
model: dict[str, Any] | None = Field(None, description="Optional model configuration")
def get_navigation_goal(self) -> str | None:
return self.prompt
def get_navigation_payload(self) -> dict[str, Any] | None:
return None
# Discriminated union of all action types
SdkAction = Annotated[
Union[
@@ -161,6 +177,7 @@ SdkAction = Annotated[
ActAction,
ExtractAction,
LocateElementAction,
PromptAction,
],
Field(discriminator="type"),
]