SDK: text prompt (#4214)

This commit is contained in:
Stanislav Novosad
2025-12-05 18:13:25 -07:00
committed by GitHub
parent 0f495f458e
commit b7d08fe906
9 changed files with 156 additions and 0 deletions

View File

@@ -17,6 +17,7 @@ from skyvern.forge.sdk.api.files import validate_download_url
from skyvern.forge.sdk.api.llm.schema_validator import validate_and_fill_extraction_result
from skyvern.forge.sdk.core import skyvern_context
from skyvern.forge.sdk.schemas.totp_codes import OTPType
from skyvern.services import script_service
from skyvern.services.otp_service import poll_otp_value
from skyvern.utils.prompt_engine import load_prompt_with_elements
from skyvern.webeye.actions import handler_utils
@@ -644,6 +645,20 @@ class RealSkyvernPageAi(SkyvernPageAi):
return xpath
async def ai_prompt(
self,
prompt: str,
schema: dict[str, Any] | None = None,
model: dict[str, Any] | None = None,
) -> dict[str, Any] | list | str | None:
"""Send a prompt to the LLM and get a response based on the provided schema."""
result = await script_service.prompt(
prompt=prompt,
schema=schema,
model=model,
)
return result
async def ai_act(
self,
prompt: str,

View File

@@ -684,6 +684,56 @@ class SkyvernPage(Page):
data = kwargs.pop("data", None)
return await self._ai.ai_extract(prompt, schema, error_code_mapping, intention, data)
async def prompt(
self,
prompt: str,
schema: dict[str, Any] | None = None,
model: dict[str, Any] | str | None = None,
) -> dict[str, Any] | list | str | None:
"""Send a prompt to the LLM and get a response based on the provided schema.
This method allows you to interact with the LLM directly without requiring page context.
It's useful for making decisions, generating text, or processing information using AI.
Args:
prompt: The prompt to send to the LLM
schema: Optional JSON schema to structure the response. If provided, the LLM response
will be validated against this schema.
model: Optional model configuration. Can be either:
- A dict with model configuration (e.g., {"model_name": "gemini-2.5-flash-lite", "max_tokens": 2048})
- A string with just the model name (e.g., "gemini-2.5-flash-lite")
Returns:
LLM response structured according to the schema if provided, or unstructured response otherwise.
Examples:
```python
# Simple unstructured prompt
response = await page.prompt("What is 2 + 2?")
# Returns: {'llm_response': '2 + 2 equals 4.'}
# Structured prompt with schema
response = await page.prompt(
"What is 2 + 2?",
schema={
"type": "object",
"properties": {
"result_number": {"type": "int"},
"confidence": {"type": "number", "minimum": 0, "maximum": 1}
}
}
)
# Returns: {'result_number': 4, 'confidence': 1}
```
"""
normalized_model: dict[str, Any] | None = None
if isinstance(model, str):
normalized_model = {"model_name": model}
elif model is not None:
normalized_model = model
return await self._ai.ai_prompt(prompt=prompt, schema=schema, model=normalized_model)
@overload
def locator(
self,

View File

@@ -78,3 +78,12 @@ class SkyvernPageAi(Protocol):
) -> str | None:
"""Locate an element on the page using AI and return its XPath selector."""
...
async def ai_prompt(
self,
prompt: str,
schema: dict[str, Any] | None = None,
model: dict[str, Any] | None = None,
) -> dict[str, Any] | list | str | None:
"""Send a prompt to the LLM and get a response based on the provided schema."""
...