Code gen: Rename 'intention' -> 'prompt' (#3900)

Co-authored-by: Shuchang Zheng <wintonzheng0325@gmail.com>
This commit is contained in:
Stanislav Novosad
2025-11-04 15:16:46 -07:00
committed by GitHub
parent ba99e9ea2c
commit 324c3f921d
2 changed files with 108 additions and 44 deletions

View File

@@ -229,7 +229,7 @@ def _action_to_stmt(act: dict[str, Any], task: dict[str, Any], assign_to_output:
""" """
Turn one Action dict into: Turn one Action dict into:
await page.<method>(selector=..., intention=..., data=context.parameters) await page.<method>(selector=..., prompt=..., data=context.parameters)
Or if assign_to_output is True for extract actions: Or if assign_to_output is True for extract actions:
@@ -425,7 +425,7 @@ def _action_to_stmt(act: dict[str, Any], task: dict[str, Any], assign_to_output:
args.extend( args.extend(
[ [
cst.Arg( cst.Arg(
keyword=cst.Name("intention"), keyword=cst.Name("prompt"),
value=_value(intention), value=_value(intention),
whitespace_after_arg=cst.ParenthesizedWhitespace(indent=True), whitespace_after_arg=cst.ParenthesizedWhitespace(indent=True),
comma=cst.Comma(), comma=cst.Comma(),

View File

@@ -45,7 +45,7 @@ class Driver(StrEnum):
@dataclass @dataclass
class ActionMetadata: class ActionMetadata:
intention: str = "" prompt: str = ""
data: dict[str, Any] | str | None = None data: dict[str, Any] | str | None = None
timestamp: float | None = None # filled in by recorder timestamp: float | None = None # filled in by recorder
screenshot_path: str | None = None # if enabled screenshot_path: str | None = None # if enabled
@@ -176,11 +176,16 @@ class SkyvernPage:
async def wrapper( async def wrapper(
skyvern_page: SkyvernPage, skyvern_page: SkyvernPage,
*args: Any, *args: Any,
intention: str = "", prompt: str = "",
data: str | dict[str, Any] = "", data: str | dict[str, Any] = "",
intention: str = "", # backward compatibility
**kwargs: Any, **kwargs: Any,
) -> Any: ) -> Any:
meta = ActionMetadata(intention, data) # Backward compatibility: use intention if provided and prompt is empty
if intention and not prompt:
prompt = intention
meta = ActionMetadata(prompt, data)
call = ActionCall(action, args, kwargs, meta) call = ActionCall(action, args, kwargs, meta)
action_status = ActionStatus.completed action_status = ActionStatus.completed
@@ -191,14 +196,14 @@ class SkyvernPage:
emoji = ACTION_EMOJIS.get(action, "🔧") emoji = ACTION_EMOJIS.get(action, "🔧")
action_name = action.value if hasattr(action, "value") else str(action) action_name = action.value if hasattr(action, "value") else str(action)
print(f"{emoji} {action_name.replace('_', ' ').title()}", end="") print(f"{emoji} {action_name.replace('_', ' ').title()}", end="")
if intention: if prompt:
print(f": {intention}") print(f": {prompt}")
else: else:
print() print()
try: try:
call.result = await fn( call.result = await fn(
skyvern_page, *args, intention=intention, data=data, **kwargs skyvern_page, *args, prompt=prompt, data=data, intention=intention, **kwargs
) # real driver call ) # real driver call
# Note: Action status would be updated to completed here if update method existed # Note: Action status would be updated to completed here if update method existed
@@ -224,7 +229,7 @@ class SkyvernPage:
# Auto-create action after execution # Auto-create action after execution
await skyvern_page._create_action_after_execution( await skyvern_page._create_action_after_execution(
action_type=action, action_type=action,
intention=intention, intention=prompt,
status=action_status, status=action_status,
data=data, data=data,
kwargs=kwargs, kwargs=kwargs,
@@ -268,6 +273,7 @@ class SkyvernPage:
data_extraction_schema: dict[str, Any] | list | str | None = None, data_extraction_schema: dict[str, Any] | list | str | None = None,
) -> str: ) -> str:
"""Generate user-facing reasoning for an action using the secondary LLM.""" """Generate user-facing reasoning for an action using the secondary LLM."""
reasoning = f"Auto-generated action for {action_type.value}" reasoning = f"Auto-generated action for {action_type.value}"
try: try:
context = skyvern_context.current() context = skyvern_context.current()
@@ -275,7 +281,7 @@ class SkyvernPage:
return f"Auto-generated action for {action_type.value}" return f"Auto-generated action for {action_type.value}"
# Build the prompt with available context # Build the prompt with available context
prompt = prompt_engine.load_prompt( reasoning_prompt = prompt_engine.load_prompt(
template="generate-action-reasoning", template="generate-action-reasoning",
action_type=action_type.value, action_type=action_type.value,
intention=intention, intention=intention,
@@ -288,7 +294,7 @@ class SkyvernPage:
# Call secondary LLM to generate reasoning # Call secondary LLM to generate reasoning
json_response = await app.SECONDARY_LLM_API_HANDLER( json_response = await app.SECONDARY_LLM_API_HANDLER(
prompt=prompt, prompt=reasoning_prompt,
prompt_name="generate-action-reasoning", prompt_name="generate-action-reasoning",
organization_id=context.organization_id, organization_id=context.organization_id,
) )
@@ -314,6 +320,7 @@ class SkyvernPage:
call_result: Any | None = None, call_result: Any | None = None,
) -> Action | None: ) -> Action | None:
"""Create an action record in the database before execution if task_id and step_id are available.""" """Create an action record in the database before execution if task_id and step_id are available."""
try: try:
context = skyvern_context.current() context = skyvern_context.current()
if not context or not context.task_id or not context.step_id: if not context or not context.task_id or not context.step_id:
@@ -447,14 +454,15 @@ class SkyvernPage:
async def click( async def click(
self, self,
selector: str, selector: str,
intention: str | None = None, prompt: str | None = None,
ai: str | None = "fallback", ai: str | None = "fallback",
data: str | dict[str, Any] | None = None, data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
intention: str | None = None, # backward compatibility
) -> str: ) -> str:
"""Click an element identified by ``selector``. """Click an element identified by ``selector``.
When ``intention`` and ``data`` are provided a new click action is When ``prompt`` and ``data`` are provided a new click action is
generated via the ``single-click-action`` prompt. The model returns a generated via the ``single-click-action`` prompt. The model returns a
fresh "xpath=..." selector based on the current DOM and the updated data for this run. fresh "xpath=..." selector based on the current DOM and the updated data for this run.
The browser then clicks the element using this newly generated xpath selector. The browser then clicks the element using this newly generated xpath selector.
@@ -462,6 +470,10 @@ class SkyvernPage:
If the prompt generation or parsing fails for any reason we fall back to If the prompt generation or parsing fails for any reason we fall back to
clicking the originally supplied ``selector``. clicking the originally supplied ``selector``.
""" """
# Backward compatibility
if intention is not None and prompt is None:
prompt = intention
context = skyvern_context.current() context = skyvern_context.current()
if context and context.ai_mode_override: if context and context.ai_mode_override:
ai = context.ai_mode_override ai = context.ai_mode_override
@@ -476,10 +488,10 @@ class SkyvernPage:
error_to_raise = e error_to_raise = e
# if the original selector doesn't work, try to click the element with the ai generated selector # if the original selector doesn't work, try to click the element with the ai generated selector
if intention: if prompt:
return await self._ai.ai_click( return await self._ai.ai_click(
selector=selector, selector=selector,
intention=intention, intention=prompt,
data=data, data=data,
timeout=timeout, timeout=timeout,
) )
@@ -488,10 +500,10 @@ class SkyvernPage:
else: else:
return selector return selector
elif ai == "proactive": elif ai == "proactive":
if intention: if prompt:
return await self._ai.ai_click( return await self._ai.ai_click(
selector=selector, selector=selector,
intention=intention, intention=prompt,
data=data, data=data,
timeout=timeout, timeout=timeout,
) )
@@ -505,17 +517,22 @@ class SkyvernPage:
selector: str | None, selector: str | None,
value: str, value: str,
ai: str | None = "fallback", ai: str | None = "fallback",
intention: str | None = None, prompt: str | None = None,
data: str | dict[str, Any] | None = None, data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
totp_identifier: str | None = None, totp_identifier: str | None = None,
totp_url: str | None = None, totp_url: str | None = None,
intention: str | None = None, # backward compatibility
) -> str: ) -> str:
# Backward compatibility
if intention is not None and prompt is None:
prompt = intention
return await self._input_text( return await self._input_text(
selector=selector, selector=selector,
value=value, value=value,
ai=ai, ai=ai,
intention=intention, intention=prompt,
data=data, data=data,
timeout=timeout, timeout=timeout,
totp_identifier=totp_identifier, totp_identifier=totp_identifier,
@@ -528,17 +545,22 @@ class SkyvernPage:
selector: str | None, selector: str | None,
value: str, value: str,
ai: str | None = "fallback", ai: str | None = "fallback",
intention: str | None = None, prompt: str | None = None,
data: str | dict[str, Any] | None = None, data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
totp_identifier: str | None = None, totp_identifier: str | None = None,
totp_url: str | None = None, totp_url: str | None = None,
intention: str | None = None, # backward compatibility
) -> str: ) -> str:
# Backward compatibility
if intention is not None and prompt is None:
prompt = intention
return await self._input_text( return await self._input_text(
selector=selector, selector=selector,
value=value, value=value,
ai=ai, ai=ai,
intention=intention, intention=prompt,
data=data, data=data,
timeout=timeout, timeout=timeout,
totp_identifier=totp_identifier, totp_identifier=totp_identifier,
@@ -558,7 +580,7 @@ class SkyvernPage:
) -> str: ) -> str:
"""Input text into an element identified by ``selector``. """Input text into an element identified by ``selector``.
When ``intention`` and ``data`` are provided a new input text action is When ``prompt`` and ``data`` are provided a new input text action is
generated via the `script-generation-input-text-generation` prompt. The model returns a generated via the `script-generation-input-text-generation` prompt. The model returns a
fresh text based on the current DOM and the updated data for this run. fresh text based on the current DOM and the updated data for this run.
The browser then inputs the text using this newly generated text. The browser then inputs the text using this newly generated text.
@@ -566,6 +588,7 @@ class SkyvernPage:
If the prompt generation or parsing fails for any reason we fall back to If the prompt generation or parsing fails for any reason we fall back to
inputting the originally supplied ``text``. inputting the originally supplied ``text``.
""" """
context = skyvern_context.current() context = skyvern_context.current()
if context and context.ai_mode_override: if context and context.ai_mode_override:
ai = context.ai_mode_override ai = context.ai_mode_override
@@ -619,10 +642,15 @@ class SkyvernPage:
selector: str | None, selector: str | None,
files: str, files: str,
ai: str | None = "fallback", ai: str | None = "fallback",
intention: str | None = None, prompt: str | None = None,
data: str | dict[str, Any] | None = None, data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
intention: str | None = None, # backward compatibility
) -> str: ) -> str:
# Backward compatibility
if intention is not None and prompt is None:
prompt = intention
context = skyvern_context.current() context = skyvern_context.current()
if context and context.ai_mode_override: if context and context.ai_mode_override:
ai = context.ai_mode_override ai = context.ai_mode_override
@@ -636,11 +664,11 @@ class SkyvernPage:
except Exception as e: except Exception as e:
error_to_raise = e error_to_raise = e
if intention: if prompt:
return await self._ai.ai_upload_file( return await self._ai.ai_upload_file(
selector=selector, selector=selector,
files=files, files=files,
intention=intention, intention=prompt,
data=data, data=data,
timeout=timeout, timeout=timeout,
) )
@@ -648,11 +676,11 @@ class SkyvernPage:
raise error_to_raise raise error_to_raise
else: else:
return files return files
elif ai == "proactive" and intention: elif ai == "proactive" and prompt:
return await self._ai.ai_upload_file( return await self._ai.ai_upload_file(
selector=selector, selector=selector,
files=files, files=files,
intention=intention, intention=prompt,
data=data, data=data,
timeout=timeout, timeout=timeout,
) )
@@ -672,10 +700,15 @@ class SkyvernPage:
value: str | None = None, value: str | None = None,
label: str | None = None, label: str | None = None,
ai: str | None = "fallback", ai: str | None = "fallback",
intention: str | None = None, prompt: str | None = None,
data: str | dict[str, Any] | None = None, data: str | dict[str, Any] | None = None,
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS, timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
intention: str | None = None, # backward compatibility
) -> str: ) -> str:
# Backward compatibility
if intention is not None and prompt is None:
prompt = intention
context = skyvern_context.current() context = skyvern_context.current()
if context and context.ai_mode_override: if context and context.ai_mode_override:
ai = context.ai_mode_override ai = context.ai_mode_override
@@ -688,11 +721,11 @@ class SkyvernPage:
return value return value
except Exception as e: except Exception as e:
error_to_raise = e error_to_raise = e
if intention: if prompt:
return await self._ai.ai_select_option( return await self._ai.ai_select_option(
selector=selector, selector=selector,
value=value, value=value,
intention=intention, intention=prompt,
data=data, data=data,
timeout=timeout, timeout=timeout,
) )
@@ -700,11 +733,11 @@ class SkyvernPage:
raise error_to_raise raise error_to_raise
else: else:
return value return value
elif ai == "proactive" and intention: elif ai == "proactive" and prompt:
return await self._ai.ai_select_option( return await self._ai.ai_select_option(
selector=selector, selector=selector,
value=value, value=value,
intention=intention, intention=prompt,
data=data, data=data,
timeout=timeout, timeout=timeout,
) )
@@ -714,16 +747,24 @@ class SkyvernPage:
@action_wrap(ActionType.WAIT) @action_wrap(ActionType.WAIT)
async def wait( async def wait(
self, seconds: float, intention: str | None = None, data: str | dict[str, Any] | None = None self,
seconds: float,
prompt: str | None = None,
data: str | dict[str, Any] | None = None,
intention: str | None = None,
) -> None: ) -> None:
await asyncio.sleep(seconds) await asyncio.sleep(seconds)
@action_wrap(ActionType.NULL_ACTION) @action_wrap(ActionType.NULL_ACTION)
async def null_action(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None: async def null_action(
self, prompt: str | None = None, data: str | dict[str, Any] | None = None, intention: str | None = None
) -> None:
return return
@action_wrap(ActionType.SOLVE_CAPTCHA) @action_wrap(ActionType.SOLVE_CAPTCHA)
async def solve_captcha(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None: async def solve_captcha(
self, prompt: str | None = None, data: str | dict[str, Any] | None = None, intention: str | None = None
) -> None:
context = skyvern_context.current() context = skyvern_context.current()
if not context or not context.organization_id or not context.task_id or not context.step_id: if not context or not context.organization_id or not context.task_id or not context.step_id:
await asyncio.sleep(30) await asyncio.sleep(30)
@@ -744,13 +785,19 @@ class SkyvernPage:
@action_wrap(ActionType.TERMINATE) @action_wrap(ActionType.TERMINATE)
async def terminate( async def terminate(
self, errors: list[str], intention: str | None = None, data: str | dict[str, Any] | None = None self,
errors: list[str],
prompt: str | None = None,
data: str | dict[str, Any] | None = None,
intention: str | None = None,
) -> None: ) -> None:
# TODO: update the workflow run status to terminated # TODO: update the workflow run status to terminated
return return
@action_wrap(ActionType.COMPLETE) @action_wrap(ActionType.COMPLETE)
async def complete(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None: async def complete(
self, prompt: str | None = None, data: str | dict[str, Any] | None = None, intention: str | None = None
) -> None:
# TODO: add validation here. if it doesn't pass the validation criteria: # TODO: add validation here. if it doesn't pass the validation criteria:
# 1. terminate the workflow run if fallback to ai is false # 1. terminate the workflow run if fallback to ai is false
# 2. fallback to ai if fallback to ai is true # 2. fallback to ai if fallback to ai is true
@@ -779,7 +826,9 @@ class SkyvernPage:
raise ScriptTerminationException(result[-1].exception_message) raise ScriptTerminationException(result[-1].exception_message)
@action_wrap(ActionType.RELOAD_PAGE) @action_wrap(ActionType.RELOAD_PAGE)
async def reload_page(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None: async def reload_page(
self, prompt: str | None = None, data: str | dict[str, Any] | None = None, intention: str | None = None
) -> None:
await self.page.reload() await self.page.reload()
return return
@@ -795,12 +844,19 @@ class SkyvernPage:
return await self._ai.ai_extract(prompt, schema, error_code_mapping, intention, data) return await self._ai.ai_extract(prompt, schema, error_code_mapping, intention, data)
@action_wrap(ActionType.VERIFICATION_CODE) @action_wrap(ActionType.VERIFICATION_CODE)
async def verification_code(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None: async def verification_code(
self, prompt: str | None = None, data: str | dict[str, Any] | None = None, intention: str | None = None
) -> None:
return return
@action_wrap(ActionType.SCROLL) @action_wrap(ActionType.SCROLL)
async def scroll( async def scroll(
self, scroll_x: int, scroll_y: int, intention: str | None = None, data: str | dict[str, Any] | None = None self,
scroll_x: int,
scroll_y: int,
prompt: str | None = None,
data: str | dict[str, Any] | None = None,
intention: str | None = None,
) -> None: ) -> None:
await self.page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})") await self.page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})")
@@ -810,14 +866,20 @@ class SkyvernPage:
keys: list[str], keys: list[str],
hold: bool = False, hold: bool = False,
duration: float = 0, duration: float = 0,
intention: str | None = None, prompt: str | None = None,
data: str | dict[str, Any] | None = None, data: str | dict[str, Any] | None = None,
intention: str | None = None, # backward compatibility
) -> None: ) -> None:
await handler_utils.keypress(self.page, keys, hold=hold, duration=duration) await handler_utils.keypress(self.page, keys, hold=hold, duration=duration)
@action_wrap(ActionType.MOVE) @action_wrap(ActionType.MOVE)
async def move( async def move(
self, x: int, y: int, intention: str | None = None, data: str | dict[str, Any] | None = None self,
x: int,
y: int,
prompt: str | None = None,
data: str | dict[str, Any] | None = None,
intention: str | None = None,
) -> None: ) -> None:
await self.page.mouse.move(x, y) await self.page.mouse.move(x, y)
@@ -827,8 +889,9 @@ class SkyvernPage:
start_x: int, start_x: int,
start_y: int, start_y: int,
path: list[tuple[int, int]], path: list[tuple[int, int]],
intention: str | None = None, prompt: str | None = None,
data: str | dict[str, Any] | None = None, data: str | dict[str, Any] | None = None,
intention: str | None = None, # backward compatibility
) -> None: ) -> None:
await handler_utils.drag(self.page, start_x, start_y, path) await handler_utils.drag(self.page, start_x, start_y, path)
@@ -838,8 +901,9 @@ class SkyvernPage:
x: int, x: int,
y: int, y: int,
direction: Literal["down", "up"], direction: Literal["down", "up"],
intention: str | None = None, prompt: str | None = None,
data: str | dict[str, Any] | None = None, data: str | dict[str, Any] | None = None,
intention: str | None = None, # backward compatibility
) -> None: ) -> None:
await handler_utils.left_mouse(self.page, x, y, direction) await handler_utils.left_mouse(self.page, x, y, direction)