Support None/Invalid selector in ai_input_text (#3883)

This commit is contained in:
Stanislav Novosad
2025-11-03 11:49:40 -07:00
committed by GitHub
parent 4da7b6d4dd
commit 9feb9e855c
6 changed files with 107 additions and 60 deletions

View File

@@ -2,7 +2,7 @@ from __future__ import annotations
import json import json
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Any from typing import Any, cast
import structlog import structlog
from jinja2.sandbox import SandboxedEnvironment from jinja2.sandbox import SandboxedEnvironment
@@ -35,6 +35,9 @@ jinja_sandbox_env = SandboxedEnvironment()
LOG = structlog.get_logger() LOG = structlog.get_logger()
INPUT_GOAL = """- The intention to fill out an input: {intention}.
- The overall goal that the user wants to achieve: {prompt}."""
SELECT_OPTION_GOAL = """- The intention to select an option: {intention}. SELECT_OPTION_GOAL = """- The intention to select an option: {intention}.
- The overall goal that the user wants to achieve: {prompt}.""" - The overall goal that the user wants to achieve: {prompt}."""
@@ -183,7 +186,7 @@ class RealSkyvernPageAi(SkyvernPageAi):
async def ai_input_text( async def ai_input_text(
self, self,
selector: str, selector: str | None,
value: str, value: str,
intention: str, intention: str,
data: str | dict[str, Any] | None = None, data: str | dict[str, Any] | None = None,
@@ -193,19 +196,20 @@ class RealSkyvernPageAi(SkyvernPageAi):
) -> str: ) -> str:
"""Input text into an element using AI to determine the value.""" """Input text into an element using AI to determine the value."""
context = skyvern_context.current() context = skyvern_context.ensure_context()
value = value or "" value = value or ""
transformed_value = value transformed_value = value
element_id: str | None = None action: InputTextAction | None = None
organization_id = context.organization_id if context else None organization_id = context.organization_id
task_id = context.task_id if context else None task_id = context.task_id
step_id = context.step_id if context else None step_id = context.step_id
workflow_run_id = context.workflow_run_id if context else None workflow_run_id = context.workflow_run_id
task = await app.DATABASE.get_task(task_id, organization_id) if task_id and organization_id else None task = await app.DATABASE.get_task(task_id, organization_id) if task_id and organization_id else None
step = await app.DATABASE.get_step(step_id, organization_id) if step_id and organization_id else None step = await app.DATABASE.get_step(step_id, organization_id) if step_id and organization_id else None
if intention: if intention:
try: try:
prompt = context.prompt if context else None prompt = context.prompt
data = data or {} data = data or {}
if (totp_identifier or totp_url) and context and organization_id and task_id: if (totp_identifier or totp_url) and context and organization_id and task_id:
if totp_identifier: if totp_identifier:
@@ -232,40 +236,72 @@ class RealSkyvernPageAi(SkyvernPageAi):
refreshed_page = await self.scraped_page.generate_scraped_page_without_screenshots() refreshed_page = await self.scraped_page.generate_scraped_page_without_screenshots()
self.scraped_page = refreshed_page self.scraped_page = refreshed_page
# get the element_id by the selector
element_id = await _get_element_id_by_selector(selector, self.page) # Try to get element_id from selector if selector is provided
script_generation_input_text_prompt = prompt_engine.load_prompt( element_id = await _get_element_id_by_selector(selector, self.page) if selector else None
template="script-generation-input-text-generatiion",
intention=intention, if element_id:
goal=prompt, # The selector/element is valid, using a simpler/smaller prompt
data=data, script_generation_input_text_prompt = prompt_engine.load_prompt(
) template="script-generation-input-text-generatiion",
json_response = await app.SINGLE_INPUT_AGENT_LLM_API_HANDLER( intention=intention,
prompt=script_generation_input_text_prompt, goal=prompt,
prompt_name="script-generation-input-text-generatiion", data=data,
step=step, )
organization_id=organization_id, json_response = await app.SINGLE_INPUT_AGENT_LLM_API_HANDLER(
) prompt=script_generation_input_text_prompt,
value = json_response.get("answer", value) prompt_name="script-generation-input-text-generatiion",
step=step,
organization_id=organization_id,
)
value = json_response.get("answer", value)
if context and context.workflow_run_id:
transformed_value = await _get_actual_value_of_parameter_if_secret(
context.workflow_run_id, str(value)
)
action = InputTextAction(
element_id=element_id,
text=value,
status=ActionStatus.pending,
organization_id=organization_id,
workflow_run_id=workflow_run_id,
task_id=task_id,
step_id=context.step_id if context else None,
reasoning=intention,
intention=intention,
response=value,
)
else:
# Use a heavier single-input-action when selector is not found
element_tree = refreshed_page.build_element_tree()
payload_str = _get_context_data(data)
merged_goal = INPUT_GOAL.format(intention=intention, prompt=prompt)
single_input_prompt = prompt_engine.load_prompt(
template="single-input-action",
navigation_goal=merged_goal,
navigation_payload_str=payload_str,
current_url=self.page.url,
elements=element_tree,
local_datetime=datetime.now(context.tz_info or datetime.now().astimezone().tzinfo).isoformat(),
)
json_response = await app.SINGLE_INPUT_AGENT_LLM_API_HANDLER(
prompt=single_input_prompt,
prompt_name="single-input-action",
step=step,
organization_id=organization_id,
)
actions_json = json_response.get("actions", [])
if actions_json and task and step:
actions = parse_actions(task, step.step_id, step.order, refreshed_page, actions_json)
if actions and isinstance(actions[0], InputTextAction):
action = cast(InputTextAction, actions[0])
except Exception: except Exception:
LOG.exception(f"Failed to adapt value for input text action on selector={selector}, value={value}") LOG.exception(f"Failed to adapt value for input text action on selector={selector}, value={value}")
if context and context.workflow_run_id: if action and organization_id and task and step:
transformed_value = await _get_actual_value_of_parameter_if_secret(context.workflow_run_id, str(value))
if element_id and organization_id and task and step:
action = InputTextAction(
element_id=element_id,
text=value,
status=ActionStatus.pending,
organization_id=organization_id,
workflow_run_id=workflow_run_id,
task_id=task_id,
step_id=context.step_id if context else None,
reasoning=intention,
intention=intention,
response=value,
)
result = await handle_input_text_action(action, self.page, self.scraped_page, task, step) result = await handle_input_text_action(action, self.page, self.scraped_page, task, step)
if result and result[-1].success is False: if result and result[-1].success is False:
raise Exception(result[-1].exception_message) raise Exception(result[-1].exception_message)

View File

@@ -502,7 +502,7 @@ class SkyvernPage:
@action_wrap(ActionType.INPUT_TEXT) @action_wrap(ActionType.INPUT_TEXT)
async def fill( async def fill(
self, self,
selector: str, selector: str | None,
value: str, value: str,
ai: str | None = "fallback", ai: str | None = "fallback",
intention: str | None = None, intention: str | None = None,
@@ -525,7 +525,7 @@ class SkyvernPage:
@action_wrap(ActionType.INPUT_TEXT) @action_wrap(ActionType.INPUT_TEXT)
async def type( async def type(
self, self,
selector: str, selector: str | None,
value: str, value: str,
ai: str | None = "fallback", ai: str | None = "fallback",
intention: str | None = None, intention: str | None = None,
@@ -547,7 +547,7 @@ class SkyvernPage:
async def _input_text( async def _input_text(
self, self,
selector: str, selector: str | None,
value: str, value: str,
ai: str | None = "fallback", ai: str | None = "fallback",
intention: str | None = None, intention: str | None = None,
@@ -569,15 +569,17 @@ class SkyvernPage:
context = skyvern_context.current() context = skyvern_context.current()
if context and context.ai_mode_override: if context and context.ai_mode_override:
ai = context.ai_mode_override ai = context.ai_mode_override
# format the text with the actual value of the parameter if it's a secret when running a workflow # format the text with the actual value of the parameter if it's a secret when running a workflow
if ai == "fallback": if ai == "fallback":
error_to_raise = None error_to_raise = None
try: if selector:
locator = self.page.locator(selector) try:
await handler_utils.input_sequentially(locator, value, timeout=timeout) locator = self.page.locator(selector)
return value await handler_utils.input_sequentially(locator, value, timeout=timeout)
except Exception as e: return value
error_to_raise = e except Exception as e:
error_to_raise = e
if intention: if intention:
return await self._ai.ai_input_text( return await self._ai.ai_input_text(
@@ -603,6 +605,10 @@ class SkyvernPage:
totp_url=totp_url, totp_url=totp_url,
timeout=timeout, timeout=timeout,
) )
if not selector:
raise ValueError("Selector is required but was not provided")
locator = self.page.locator(selector) locator = self.page.locator(selector)
await handler_utils.input_sequentially(locator, value, timeout=timeout) await handler_utils.input_sequentially(locator, value, timeout=timeout)
return value return value

View File

@@ -20,7 +20,7 @@ class SkyvernPageAi(Protocol):
async def ai_input_text( async def ai_input_text(
self, self,
selector: str, selector: str | None,
value: str, value: str,
intention: str, intention: str,
data: str | dict[str, Any] | None = None, data: str | dict[str, Any] | None = None,

View File

@@ -37,7 +37,7 @@ class InputTextAction(SdkActionBase):
"""Input text action parameters.""" """Input text action parameters."""
type: Literal["ai_input_text"] = "ai_input_text" type: Literal["ai_input_text"] = "ai_input_text"
selector: str = Field(default="", description="CSS selector for the element") selector: str | None = Field(default="", description="CSS selector for the element")
value: str = Field(default="", description="Value to input") value: str = Field(default="", description="Value to input")
intention: str = Field(default="", description="The intention or goal of the input") intention: str = Field(default="", description="The intention or goal of the input")
data: str | dict[str, Any] | None = Field(None, description="Additional context data") data: str | dict[str, Any] | None = Field(None, description="Additional context data")

View File

@@ -47,7 +47,7 @@ class SdkSkyvernPageAi(SkyvernPageAi):
async def ai_input_text( async def ai_input_text(
self, self,
selector: str, selector: str | None,
value: str, value: str,
intention: str, intention: str,
data: str | dict[str, Any] | None = None, data: str | dict[str, Any] | None = None,

View File

@@ -408,7 +408,7 @@ class SkyvernBrowserPage:
``` ```
""" """
return await self._input_text( return await self._input_text(
selector=selector or "", selector=selector,
value=value or "", value=value or "",
ai=ai, ai=ai,
intention=prompt, intention=prompt,
@@ -698,7 +698,7 @@ class SkyvernBrowserPage:
async def _input_text( async def _input_text(
self, self,
selector: str, selector: str | None,
value: str, value: str,
ai: str | None = "fallback", ai: str | None = "fallback",
intention: str | None = None, intention: str | None = None,
@@ -721,12 +721,13 @@ class SkyvernBrowserPage:
# format the text with the actual value of the parameter if it's a secret when running a workflow # format the text with the actual value of the parameter if it's a secret when running a workflow
if ai == "fallback": if ai == "fallback":
error_to_raise = None error_to_raise = None
try: if selector:
locator = self._page.locator(selector) try:
await handler_utils.input_sequentially(locator, value, timeout=timeout) locator = self._page.locator(selector)
return value await handler_utils.input_sequentially(locator, value, timeout=timeout)
except Exception as e: return value
error_to_raise = e except Exception as e:
error_to_raise = e
if intention: if intention:
return await self._ai.ai_input_text( return await self._ai.ai_input_text(
@@ -752,6 +753,10 @@ class SkyvernBrowserPage:
totp_url=totp_url, totp_url=totp_url,
timeout=timeout, timeout=timeout,
) )
if not selector:
raise ValueError("Selector is required but was not provided")
locator = self._page.locator(selector) locator = self._page.locator(selector)
await handler_utils.input_sequentially(locator, value, timeout=timeout) await handler_utils.input_sequentially(locator, value, timeout=timeout)
return value return value