Code gen: Rename 'intention' -> 'prompt' (#3900)

Co-authored-by: Shuchang Zheng <wintonzheng0325@gmail.com>
2025-11-04 15:16:46 -07:00
parent ba99e9ea2c
commit 324c3f921d
2 changed files with 108 additions and 44 deletions
--- a/skyvern/core/script_generations/generate_script.py
+++ b/skyvern/core/script_generations/generate_script.py
@@ -229,7 +229,7 @@ def _action_to_stmt(act: dict[str, Any], task: dict[str, Any], assign_to_output:
    """
    Turn one Action dict into:

-        await page.<method>(selector=..., intention=..., data=context.parameters)
+        await page.<method>(selector=..., prompt=..., data=context.parameters)

    Or if assign_to_output is True for extract actions:

@@ -425,7 +425,7 @@ def _action_to_stmt(act: dict[str, Any], task: dict[str, Any], assign_to_output:
        args.extend(
            [
                cst.Arg(
-                    keyword=cst.Name("intention"),
+                    keyword=cst.Name("prompt"),
                    value=_value(intention),
                    whitespace_after_arg=cst.ParenthesizedWhitespace(indent=True),
                    comma=cst.Comma(),
--- a/skyvern/core/script_generations/skyvern_page.py
+++ b/skyvern/core/script_generations/skyvern_page.py
@@ -45,7 +45,7 @@ class Driver(StrEnum):

@dataclass
 class ActionMetadata:
-    intention: str = ""
+    prompt: str = ""
    data: dict[str, Any] | str | None = None
    timestamp: float | None = None  # filled in by recorder
    screenshot_path: str | None = None  # if enabled
@@ -176,11 +176,16 @@ class SkyvernPage:
            async def wrapper(
                skyvern_page: SkyvernPage,
                *args: Any,
-                intention: str = "",
+                prompt: str = "",
                data: str | dict[str, Any] = "",
+                intention: str = "",  # backward compatibility
                **kwargs: Any,
            ) -> Any:
-                meta = ActionMetadata(intention, data)
+                # Backward compatibility: use intention if provided and prompt is empty
+                if intention and not prompt:
+                    prompt = intention
+
+                meta = ActionMetadata(prompt, data)
                call = ActionCall(action, args, kwargs, meta)

                action_status = ActionStatus.completed
@@ -191,14 +196,14 @@ class SkyvernPage:
                    emoji = ACTION_EMOJIS.get(action, "🔧")
                    action_name = action.value if hasattr(action, "value") else str(action)
                    print(f"{emoji} {action_name.replace('_', ' ').title()}", end="")
-                    if intention:
-                        print(f": {intention}")
+                    if prompt:
+                        print(f": {prompt}")
                    else:
                        print()

                try:
                    call.result = await fn(
-                        skyvern_page, *args, intention=intention, data=data, **kwargs
+                        skyvern_page, *args, prompt=prompt, data=data, intention=intention, **kwargs
                    )  # real driver call

                    # Note: Action status would be updated to completed here if update method existed
@@ -224,7 +229,7 @@ class SkyvernPage:
                    # Auto-create action after execution
                    await skyvern_page._create_action_after_execution(
                        action_type=action,
-                        intention=intention,
+                        intention=prompt,
                        status=action_status,
                        data=data,
                        kwargs=kwargs,
@@ -268,6 +273,7 @@ class SkyvernPage:
        data_extraction_schema: dict[str, Any] | list | str | None = None,
    ) -> str:
        """Generate user-facing reasoning for an action using the secondary LLM."""
+
        reasoning = f"Auto-generated action for {action_type.value}"
        try:
            context = skyvern_context.current()
@@ -275,7 +281,7 @@ class SkyvernPage:
                return f"Auto-generated action for {action_type.value}"

            # Build the prompt with available context
-            prompt = prompt_engine.load_prompt(
+            reasoning_prompt = prompt_engine.load_prompt(
                template="generate-action-reasoning",
                action_type=action_type.value,
                intention=intention,
@@ -288,7 +294,7 @@ class SkyvernPage:

            # Call secondary LLM to generate reasoning
            json_response = await app.SECONDARY_LLM_API_HANDLER(
-                prompt=prompt,
+                prompt=reasoning_prompt,
                prompt_name="generate-action-reasoning",
                organization_id=context.organization_id,
            )
@@ -314,6 +320,7 @@ class SkyvernPage:
        call_result: Any | None = None,
    ) -> Action | None:
        """Create an action record in the database before execution if task_id and step_id are available."""
+
        try:
            context = skyvern_context.current()
            if not context or not context.task_id or not context.step_id:
@@ -447,14 +454,15 @@ class SkyvernPage:
    async def click(
        self,
        selector: str,
-        intention: str | None = None,
+        prompt: str | None = None,
        ai: str | None = "fallback",
        data: str | dict[str, Any] | None = None,
        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
+        intention: str | None = None,  # backward compatibility
    ) -> str:
        """Click an element identified by ``selector``.

-        When ``intention`` and ``data`` are provided a new click action is
+        When ``prompt`` and ``data`` are provided a new click action is
        generated via the ``single-click-action`` prompt.  The model returns a
        fresh "xpath=..." selector based on the current DOM and the updated data for this run.
        The browser then clicks the element using this newly generated xpath selector.
@@ -462,6 +470,10 @@ class SkyvernPage:
        If the prompt generation or parsing fails for any reason we fall back to
        clicking the originally supplied ``selector``.
        """
+        # Backward compatibility
+        if intention is not None and prompt is None:
+            prompt = intention
+
        context = skyvern_context.current()
        if context and context.ai_mode_override:
            ai = context.ai_mode_override
@@ -476,10 +488,10 @@ class SkyvernPage:
                error_to_raise = e

            # if the original selector doesn't work, try to click the element with the ai generated selector
-            if intention:
+            if prompt:
                return await self._ai.ai_click(
                    selector=selector,
-                    intention=intention,
+                    intention=prompt,
                    data=data,
                    timeout=timeout,
                )
@@ -488,10 +500,10 @@ class SkyvernPage:
            else:
                return selector
        elif ai == "proactive":
-            if intention:
+            if prompt:
                return await self._ai.ai_click(
                    selector=selector,
-                    intention=intention,
+                    intention=prompt,
                    data=data,
                    timeout=timeout,
                )
@@ -505,17 +517,22 @@ class SkyvernPage:
        selector: str | None,
        value: str,
        ai: str | None = "fallback",
-        intention: str | None = None,
+        prompt: str | None = None,
        data: str | dict[str, Any] | None = None,
        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
        totp_identifier: str | None = None,
        totp_url: str | None = None,
+        intention: str | None = None,  # backward compatibility
    ) -> str:
+        # Backward compatibility
+        if intention is not None and prompt is None:
+            prompt = intention
+
        return await self._input_text(
            selector=selector,
            value=value,
            ai=ai,
-            intention=intention,
+            intention=prompt,
            data=data,
            timeout=timeout,
            totp_identifier=totp_identifier,
@@ -528,17 +545,22 @@ class SkyvernPage:
        selector: str | None,
        value: str,
        ai: str | None = "fallback",
-        intention: str | None = None,
+        prompt: str | None = None,
        data: str | dict[str, Any] | None = None,
        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
        totp_identifier: str | None = None,
        totp_url: str | None = None,
+        intention: str | None = None,  # backward compatibility
    ) -> str:
+        # Backward compatibility
+        if intention is not None and prompt is None:
+            prompt = intention
+
        return await self._input_text(
            selector=selector,
            value=value,
            ai=ai,
-            intention=intention,
+            intention=prompt,
            data=data,
            timeout=timeout,
            totp_identifier=totp_identifier,
@@ -558,7 +580,7 @@ class SkyvernPage:
    ) -> str:
        """Input text into an element identified by ``selector``.

-        When ``intention`` and ``data`` are provided a new input text action is
+        When ``prompt`` and ``data`` are provided a new input text action is
        generated via the `script-generation-input-text-generation` prompt.  The model returns a
        fresh text based on the current DOM and the updated data for this run.
        The browser then inputs the text using this newly generated text.
@@ -566,6 +588,7 @@ class SkyvernPage:
        If the prompt generation or parsing fails for any reason we fall back to
        inputting the originally supplied ``text``.
        """
+
        context = skyvern_context.current()
        if context and context.ai_mode_override:
            ai = context.ai_mode_override
@@ -619,10 +642,15 @@ class SkyvernPage:
        selector: str | None,
        files: str,
        ai: str | None = "fallback",
-        intention: str | None = None,
+        prompt: str | None = None,
        data: str | dict[str, Any] | None = None,
        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
+        intention: str | None = None,  # backward compatibility
    ) -> str:
+        # Backward compatibility
+        if intention is not None and prompt is None:
+            prompt = intention
+
        context = skyvern_context.current()
        if context and context.ai_mode_override:
            ai = context.ai_mode_override
@@ -636,11 +664,11 @@ class SkyvernPage:
                except Exception as e:
                    error_to_raise = e

-            if intention:
+            if prompt:
                return await self._ai.ai_upload_file(
                    selector=selector,
                    files=files,
-                    intention=intention,
+                    intention=prompt,
                    data=data,
                    timeout=timeout,
                )
@@ -648,11 +676,11 @@ class SkyvernPage:
                raise error_to_raise
            else:
                return files
-        elif ai == "proactive" and intention:
+        elif ai == "proactive" and prompt:
            return await self._ai.ai_upload_file(
                selector=selector,
                files=files,
-                intention=intention,
+                intention=prompt,
                data=data,
                timeout=timeout,
            )
@@ -672,10 +700,15 @@ class SkyvernPage:
        value: str | None = None,
        label: str | None = None,
        ai: str | None = "fallback",
-        intention: str | None = None,
+        prompt: str | None = None,
        data: str | dict[str, Any] | None = None,
        timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
+        intention: str | None = None,  # backward compatibility
    ) -> str:
+        # Backward compatibility
+        if intention is not None and prompt is None:
+            prompt = intention
+
        context = skyvern_context.current()
        if context and context.ai_mode_override:
            ai = context.ai_mode_override
@@ -688,11 +721,11 @@ class SkyvernPage:
                return value
            except Exception as e:
                error_to_raise = e
-            if intention:
+            if prompt:
                return await self._ai.ai_select_option(
                    selector=selector,
                    value=value,
-                    intention=intention,
+                    intention=prompt,
                    data=data,
                    timeout=timeout,
                )
@@ -700,11 +733,11 @@ class SkyvernPage:
                raise error_to_raise
            else:
                return value
-        elif ai == "proactive" and intention:
+        elif ai == "proactive" and prompt:
            return await self._ai.ai_select_option(
                selector=selector,
                value=value,
-                intention=intention,
+                intention=prompt,
                data=data,
                timeout=timeout,
            )
@@ -714,16 +747,24 @@ class SkyvernPage:

    @action_wrap(ActionType.WAIT)
    async def wait(
-        self, seconds: float, intention: str | None = None, data: str | dict[str, Any] | None = None
+        self,
+        seconds: float,
+        prompt: str | None = None,
+        data: str | dict[str, Any] | None = None,
+        intention: str | None = None,
    ) -> None:
        await asyncio.sleep(seconds)

    @action_wrap(ActionType.NULL_ACTION)
-    async def null_action(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None:
+    async def null_action(
+        self, prompt: str | None = None, data: str | dict[str, Any] | None = None, intention: str | None = None
+    ) -> None:
        return

    @action_wrap(ActionType.SOLVE_CAPTCHA)
-    async def solve_captcha(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None:
+    async def solve_captcha(
+        self, prompt: str | None = None, data: str | dict[str, Any] | None = None, intention: str | None = None
+    ) -> None:
        context = skyvern_context.current()
        if not context or not context.organization_id or not context.task_id or not context.step_id:
            await asyncio.sleep(30)
@@ -744,13 +785,19 @@ class SkyvernPage:

    @action_wrap(ActionType.TERMINATE)
    async def terminate(
-        self, errors: list[str], intention: str | None = None, data: str | dict[str, Any] | None = None
+        self,
+        errors: list[str],
+        prompt: str | None = None,
+        data: str | dict[str, Any] | None = None,
+        intention: str | None = None,
    ) -> None:
        # TODO: update the workflow run status to terminated
        return

    @action_wrap(ActionType.COMPLETE)
-    async def complete(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None:
+    async def complete(
+        self, prompt: str | None = None, data: str | dict[str, Any] | None = None, intention: str | None = None
+    ) -> None:
        # TODO: add validation here. if it doesn't pass the validation criteria:
        #  1. terminate the workflow run if fallback to ai is false
        #  2. fallback to ai if fallback to ai is true
@@ -779,7 +826,9 @@ class SkyvernPage:
                raise ScriptTerminationException(result[-1].exception_message)

    @action_wrap(ActionType.RELOAD_PAGE)
-    async def reload_page(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None:
+    async def reload_page(
+        self, prompt: str | None = None, data: str | dict[str, Any] | None = None, intention: str | None = None
+    ) -> None:
        await self.page.reload()
        return

@@ -795,12 +844,19 @@ class SkyvernPage:
        return await self._ai.ai_extract(prompt, schema, error_code_mapping, intention, data)

    @action_wrap(ActionType.VERIFICATION_CODE)
-    async def verification_code(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None:
+    async def verification_code(
+        self, prompt: str | None = None, data: str | dict[str, Any] | None = None, intention: str | None = None
+    ) -> None:
        return

    @action_wrap(ActionType.SCROLL)
    async def scroll(
-        self, scroll_x: int, scroll_y: int, intention: str | None = None, data: str | dict[str, Any] | None = None
+        self,
+        scroll_x: int,
+        scroll_y: int,
+        prompt: str | None = None,
+        data: str | dict[str, Any] | None = None,
+        intention: str | None = None,
    ) -> None:
        await self.page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})")

@@ -810,14 +866,20 @@ class SkyvernPage:
        keys: list[str],
        hold: bool = False,
        duration: float = 0,
-        intention: str | None = None,
+        prompt: str | None = None,
        data: str | dict[str, Any] | None = None,
+        intention: str | None = None,  # backward compatibility
    ) -> None:
        await handler_utils.keypress(self.page, keys, hold=hold, duration=duration)

    @action_wrap(ActionType.MOVE)
    async def move(
-        self, x: int, y: int, intention: str | None = None, data: str | dict[str, Any] | None = None
+        self,
+        x: int,
+        y: int,
+        prompt: str | None = None,
+        data: str | dict[str, Any] | None = None,
+        intention: str | None = None,
    ) -> None:
        await self.page.mouse.move(x, y)

@@ -827,8 +889,9 @@ class SkyvernPage:
        start_x: int,
        start_y: int,
        path: list[tuple[int, int]],
-        intention: str | None = None,
+        prompt: str | None = None,
        data: str | dict[str, Any] | None = None,
+        intention: str | None = None,  # backward compatibility
    ) -> None:
        await handler_utils.drag(self.page, start_x, start_y, path)

@@ -838,8 +901,9 @@ class SkyvernPage:
        x: int,
        y: int,
        direction: Literal["down", "up"],
-        intention: str | None = None,
+        prompt: str | None = None,
        data: str | dict[str, Any] | None = None,
+        intention: str | None = None,  # backward compatibility
    ) -> None:
        await handler_utils.left_mouse(self.page, x, y, direction)