SDK: text prompt (#4214)

2025-12-05 18:13:25 -07:00
parent 0f495f458e
commit b7d08fe906
9 changed files with 156 additions and 0 deletions
--- a/skyvern/core/script_generations/real_skyvern_page_ai.py
+++ b/skyvern/core/script_generations/real_skyvern_page_ai.py
@@ -17,6 +17,7 @@ from skyvern.forge.sdk.api.files import validate_download_url
 from skyvern.forge.sdk.api.llm.schema_validator import validate_and_fill_extraction_result
 from skyvern.forge.sdk.core import skyvern_context
 from skyvern.forge.sdk.schemas.totp_codes import OTPType
+from skyvern.services import script_service
 from skyvern.services.otp_service import poll_otp_value
 from skyvern.utils.prompt_engine import load_prompt_with_elements
 from skyvern.webeye.actions import handler_utils
@@ -644,6 +645,20 @@ class RealSkyvernPageAi(SkyvernPageAi):

        return xpath

+    async def ai_prompt(
+        self,
+        prompt: str,
+        schema: dict[str, Any] | None = None,
+        model: dict[str, Any] | None = None,
+    ) -> dict[str, Any] | list | str | None:
+        """Send a prompt to the LLM and get a response based on the provided schema."""
+        result = await script_service.prompt(
+            prompt=prompt,
+            schema=schema,
+            model=model,
+        )
+        return result
+
    async def ai_act(
        self,
        prompt: str,
--- a/skyvern/core/script_generations/skyvern_page.py
+++ b/skyvern/core/script_generations/skyvern_page.py
@@ -684,6 +684,56 @@ class SkyvernPage(Page):
        data = kwargs.pop("data", None)
        return await self._ai.ai_extract(prompt, schema, error_code_mapping, intention, data)

+    async def prompt(
+        self,
+        prompt: str,
+        schema: dict[str, Any] | None = None,
+        model: dict[str, Any] | str | None = None,
+    ) -> dict[str, Any] | list | str | None:
+        """Send a prompt to the LLM and get a response based on the provided schema.
+
+        This method allows you to interact with the LLM directly without requiring page context.
+        It's useful for making decisions, generating text, or processing information using AI.
+
+        Args:
+            prompt: The prompt to send to the LLM
+            schema: Optional JSON schema to structure the response. If provided, the LLM response
+                   will be validated against this schema.
+            model: Optional model configuration. Can be either:
+                   - A dict with model configuration (e.g., {"model_name": "gemini-2.5-flash-lite", "max_tokens": 2048})
+                   - A string with just the model name (e.g., "gemini-2.5-flash-lite")
+
+        Returns:
+            LLM response structured according to the schema if provided, or unstructured response otherwise.
+
+        Examples:
+            ```python
+            # Simple unstructured prompt
+            response = await page.prompt("What is 2 + 2?")
+            # Returns: {'llm_response': '2 + 2 equals 4.'}
+
+            # Structured prompt with schema
+            response = await page.prompt(
+                "What is 2 + 2?",
+                schema={
+                    "type": "object",
+                    "properties": {
+                        "result_number": {"type": "int"},
+                        "confidence": {"type": "number", "minimum": 0, "maximum": 1}
+                    }
+                }
+            )
+            # Returns: {'result_number': 4, 'confidence': 1}
+            ```
+        """
+        normalized_model: dict[str, Any] | None = None
+        if isinstance(model, str):
+            normalized_model = {"model_name": model}
+        elif model is not None:
+            normalized_model = model
+
+        return await self._ai.ai_prompt(prompt=prompt, schema=schema, model=normalized_model)
+
    @overload
    def locator(
        self,
--- a/skyvern/core/script_generations/skyvern_page_ai.py
+++ b/skyvern/core/script_generations/skyvern_page_ai.py
@@ -78,3 +78,12 @@ class SkyvernPageAi(Protocol):
    ) -> str | None:
        """Locate an element on the page using AI and return its XPath selector."""
        ...
+
+    async def ai_prompt(
+        self,
+        prompt: str,
+        schema: dict[str, Any] | None = None,
+        model: dict[str, Any] | None = None,
+    ) -> dict[str, Any] | list | str | None:
+        """Send a prompt to the LLM and get a response based on the provided schema."""
+        ...