diff --git a/skyvern/core/script_generations/generate_script.py b/skyvern/core/script_generations/generate_script.py index 4f47c9e4..65d8067e 100644 --- a/skyvern/core/script_generations/generate_script.py +++ b/skyvern/core/script_generations/generate_script.py @@ -229,7 +229,7 @@ def _action_to_stmt(act: dict[str, Any], task: dict[str, Any], assign_to_output: """ Turn one Action dict into: - await page.(selector=..., intention=..., data=context.parameters) + await page.(selector=..., prompt=..., data=context.parameters) Or if assign_to_output is True for extract actions: @@ -425,7 +425,7 @@ def _action_to_stmt(act: dict[str, Any], task: dict[str, Any], assign_to_output: args.extend( [ cst.Arg( - keyword=cst.Name("intention"), + keyword=cst.Name("prompt"), value=_value(intention), whitespace_after_arg=cst.ParenthesizedWhitespace(indent=True), comma=cst.Comma(), diff --git a/skyvern/core/script_generations/skyvern_page.py b/skyvern/core/script_generations/skyvern_page.py index a354aa5a..c92aece8 100644 --- a/skyvern/core/script_generations/skyvern_page.py +++ b/skyvern/core/script_generations/skyvern_page.py @@ -45,7 +45,7 @@ class Driver(StrEnum): @dataclass class ActionMetadata: - intention: str = "" + prompt: str = "" data: dict[str, Any] | str | None = None timestamp: float | None = None # filled in by recorder screenshot_path: str | None = None # if enabled @@ -176,11 +176,16 @@ class SkyvernPage: async def wrapper( skyvern_page: SkyvernPage, *args: Any, - intention: str = "", + prompt: str = "", data: str | dict[str, Any] = "", + intention: str = "", # backward compatibility **kwargs: Any, ) -> Any: - meta = ActionMetadata(intention, data) + # Backward compatibility: use intention if provided and prompt is empty + if intention and not prompt: + prompt = intention + + meta = ActionMetadata(prompt, data) call = ActionCall(action, args, kwargs, meta) action_status = ActionStatus.completed @@ -191,14 +196,14 @@ class SkyvernPage: emoji = ACTION_EMOJIS.get(action, "🔧") action_name = action.value if hasattr(action, "value") else str(action) print(f"{emoji} {action_name.replace('_', ' ').title()}", end="") - if intention: - print(f": {intention}") + if prompt: + print(f": {prompt}") else: print() try: call.result = await fn( - skyvern_page, *args, intention=intention, data=data, **kwargs + skyvern_page, *args, prompt=prompt, data=data, intention=intention, **kwargs ) # real driver call # Note: Action status would be updated to completed here if update method existed @@ -224,7 +229,7 @@ class SkyvernPage: # Auto-create action after execution await skyvern_page._create_action_after_execution( action_type=action, - intention=intention, + intention=prompt, status=action_status, data=data, kwargs=kwargs, @@ -268,6 +273,7 @@ class SkyvernPage: data_extraction_schema: dict[str, Any] | list | str | None = None, ) -> str: """Generate user-facing reasoning for an action using the secondary LLM.""" + reasoning = f"Auto-generated action for {action_type.value}" try: context = skyvern_context.current() @@ -275,7 +281,7 @@ class SkyvernPage: return f"Auto-generated action for {action_type.value}" # Build the prompt with available context - prompt = prompt_engine.load_prompt( + reasoning_prompt = prompt_engine.load_prompt( template="generate-action-reasoning", action_type=action_type.value, intention=intention, @@ -288,7 +294,7 @@ class SkyvernPage: # Call secondary LLM to generate reasoning json_response = await app.SECONDARY_LLM_API_HANDLER( - prompt=prompt, + prompt=reasoning_prompt, prompt_name="generate-action-reasoning", organization_id=context.organization_id, ) @@ -314,6 +320,7 @@ class SkyvernPage: call_result: Any | None = None, ) -> Action | None: """Create an action record in the database before execution if task_id and step_id are available.""" + try: context = skyvern_context.current() if not context or not context.task_id or not context.step_id: @@ -447,14 +454,15 @@ class SkyvernPage: async def click( self, selector: str, - intention: str | None = None, + prompt: str | None = None, ai: str | None = "fallback", data: str | dict[str, Any] | None = None, timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, + intention: str | None = None, # backward compatibility ) -> str: """Click an element identified by ``selector``. - When ``intention`` and ``data`` are provided a new click action is + When ``prompt`` and ``data`` are provided a new click action is generated via the ``single-click-action`` prompt. The model returns a fresh "xpath=..." selector based on the current DOM and the updated data for this run. The browser then clicks the element using this newly generated xpath selector. @@ -462,6 +470,10 @@ class SkyvernPage: If the prompt generation or parsing fails for any reason we fall back to clicking the originally supplied ``selector``. """ + # Backward compatibility + if intention is not None and prompt is None: + prompt = intention + context = skyvern_context.current() if context and context.ai_mode_override: ai = context.ai_mode_override @@ -476,10 +488,10 @@ class SkyvernPage: error_to_raise = e # if the original selector doesn't work, try to click the element with the ai generated selector - if intention: + if prompt: return await self._ai.ai_click( selector=selector, - intention=intention, + intention=prompt, data=data, timeout=timeout, ) @@ -488,10 +500,10 @@ class SkyvernPage: else: return selector elif ai == "proactive": - if intention: + if prompt: return await self._ai.ai_click( selector=selector, - intention=intention, + intention=prompt, data=data, timeout=timeout, ) @@ -505,17 +517,22 @@ class SkyvernPage: selector: str | None, value: str, ai: str | None = "fallback", - intention: str | None = None, + prompt: str | None = None, data: str | dict[str, Any] | None = None, timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, totp_identifier: str | None = None, totp_url: str | None = None, + intention: str | None = None, # backward compatibility ) -> str: + # Backward compatibility + if intention is not None and prompt is None: + prompt = intention + return await self._input_text( selector=selector, value=value, ai=ai, - intention=intention, + intention=prompt, data=data, timeout=timeout, totp_identifier=totp_identifier, @@ -528,17 +545,22 @@ class SkyvernPage: selector: str | None, value: str, ai: str | None = "fallback", - intention: str | None = None, + prompt: str | None = None, data: str | dict[str, Any] | None = None, timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, totp_identifier: str | None = None, totp_url: str | None = None, + intention: str | None = None, # backward compatibility ) -> str: + # Backward compatibility + if intention is not None and prompt is None: + prompt = intention + return await self._input_text( selector=selector, value=value, ai=ai, - intention=intention, + intention=prompt, data=data, timeout=timeout, totp_identifier=totp_identifier, @@ -558,7 +580,7 @@ class SkyvernPage: ) -> str: """Input text into an element identified by ``selector``. - When ``intention`` and ``data`` are provided a new input text action is + When ``prompt`` and ``data`` are provided a new input text action is generated via the `script-generation-input-text-generation` prompt. The model returns a fresh text based on the current DOM and the updated data for this run. The browser then inputs the text using this newly generated text. @@ -566,6 +588,7 @@ class SkyvernPage: If the prompt generation or parsing fails for any reason we fall back to inputting the originally supplied ``text``. """ + context = skyvern_context.current() if context and context.ai_mode_override: ai = context.ai_mode_override @@ -619,10 +642,15 @@ class SkyvernPage: selector: str | None, files: str, ai: str | None = "fallback", - intention: str | None = None, + prompt: str | None = None, data: str | dict[str, Any] | None = None, timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, + intention: str | None = None, # backward compatibility ) -> str: + # Backward compatibility + if intention is not None and prompt is None: + prompt = intention + context = skyvern_context.current() if context and context.ai_mode_override: ai = context.ai_mode_override @@ -636,11 +664,11 @@ class SkyvernPage: except Exception as e: error_to_raise = e - if intention: + if prompt: return await self._ai.ai_upload_file( selector=selector, files=files, - intention=intention, + intention=prompt, data=data, timeout=timeout, ) @@ -648,11 +676,11 @@ class SkyvernPage: raise error_to_raise else: return files - elif ai == "proactive" and intention: + elif ai == "proactive" and prompt: return await self._ai.ai_upload_file( selector=selector, files=files, - intention=intention, + intention=prompt, data=data, timeout=timeout, ) @@ -672,10 +700,15 @@ class SkyvernPage: value: str | None = None, label: str | None = None, ai: str | None = "fallback", - intention: str | None = None, + prompt: str | None = None, data: str | dict[str, Any] | None = None, timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, + intention: str | None = None, # backward compatibility ) -> str: + # Backward compatibility + if intention is not None and prompt is None: + prompt = intention + context = skyvern_context.current() if context and context.ai_mode_override: ai = context.ai_mode_override @@ -688,11 +721,11 @@ class SkyvernPage: return value except Exception as e: error_to_raise = e - if intention: + if prompt: return await self._ai.ai_select_option( selector=selector, value=value, - intention=intention, + intention=prompt, data=data, timeout=timeout, ) @@ -700,11 +733,11 @@ class SkyvernPage: raise error_to_raise else: return value - elif ai == "proactive" and intention: + elif ai == "proactive" and prompt: return await self._ai.ai_select_option( selector=selector, value=value, - intention=intention, + intention=prompt, data=data, timeout=timeout, ) @@ -714,16 +747,24 @@ class SkyvernPage: @action_wrap(ActionType.WAIT) async def wait( - self, seconds: float, intention: str | None = None, data: str | dict[str, Any] | None = None + self, + seconds: float, + prompt: str | None = None, + data: str | dict[str, Any] | None = None, + intention: str | None = None, ) -> None: await asyncio.sleep(seconds) @action_wrap(ActionType.NULL_ACTION) - async def null_action(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None: + async def null_action( + self, prompt: str | None = None, data: str | dict[str, Any] | None = None, intention: str | None = None + ) -> None: return @action_wrap(ActionType.SOLVE_CAPTCHA) - async def solve_captcha(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None: + async def solve_captcha( + self, prompt: str | None = None, data: str | dict[str, Any] | None = None, intention: str | None = None + ) -> None: context = skyvern_context.current() if not context or not context.organization_id or not context.task_id or not context.step_id: await asyncio.sleep(30) @@ -744,13 +785,19 @@ class SkyvernPage: @action_wrap(ActionType.TERMINATE) async def terminate( - self, errors: list[str], intention: str | None = None, data: str | dict[str, Any] | None = None + self, + errors: list[str], + prompt: str | None = None, + data: str | dict[str, Any] | None = None, + intention: str | None = None, ) -> None: # TODO: update the workflow run status to terminated return @action_wrap(ActionType.COMPLETE) - async def complete(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None: + async def complete( + self, prompt: str | None = None, data: str | dict[str, Any] | None = None, intention: str | None = None + ) -> None: # TODO: add validation here. if it doesn't pass the validation criteria: # 1. terminate the workflow run if fallback to ai is false # 2. fallback to ai if fallback to ai is true @@ -779,7 +826,9 @@ class SkyvernPage: raise ScriptTerminationException(result[-1].exception_message) @action_wrap(ActionType.RELOAD_PAGE) - async def reload_page(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None: + async def reload_page( + self, prompt: str | None = None, data: str | dict[str, Any] | None = None, intention: str | None = None + ) -> None: await self.page.reload() return @@ -795,12 +844,19 @@ class SkyvernPage: return await self._ai.ai_extract(prompt, schema, error_code_mapping, intention, data) @action_wrap(ActionType.VERIFICATION_CODE) - async def verification_code(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None: + async def verification_code( + self, prompt: str | None = None, data: str | dict[str, Any] | None = None, intention: str | None = None + ) -> None: return @action_wrap(ActionType.SCROLL) async def scroll( - self, scroll_x: int, scroll_y: int, intention: str | None = None, data: str | dict[str, Any] | None = None + self, + scroll_x: int, + scroll_y: int, + prompt: str | None = None, + data: str | dict[str, Any] | None = None, + intention: str | None = None, ) -> None: await self.page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})") @@ -810,14 +866,20 @@ class SkyvernPage: keys: list[str], hold: bool = False, duration: float = 0, - intention: str | None = None, + prompt: str | None = None, data: str | dict[str, Any] | None = None, + intention: str | None = None, # backward compatibility ) -> None: await handler_utils.keypress(self.page, keys, hold=hold, duration=duration) @action_wrap(ActionType.MOVE) async def move( - self, x: int, y: int, intention: str | None = None, data: str | dict[str, Any] | None = None + self, + x: int, + y: int, + prompt: str | None = None, + data: str | dict[str, Any] | None = None, + intention: str | None = None, ) -> None: await self.page.mouse.move(x, y) @@ -827,8 +889,9 @@ class SkyvernPage: start_x: int, start_y: int, path: list[tuple[int, int]], - intention: str | None = None, + prompt: str | None = None, data: str | dict[str, Any] | None = None, + intention: str | None = None, # backward compatibility ) -> None: await handler_utils.drag(self.page, start_x, start_y, path) @@ -838,8 +901,9 @@ class SkyvernPage: x: int, y: int, direction: Literal["down", "up"], - intention: str | None = None, + prompt: str | None = None, data: str | dict[str, Any] | None = None, + intention: str | None = None, # backward compatibility ) -> None: await handler_utils.left_mouse(self.page, x, y, direction)