SkyvernPage actions: Support selector and xpath at the same time. Migrate generate script to use click(selector=f"xpath={xpath}") (#3729)
This commit is contained in:
@@ -27,6 +27,7 @@ from skyvern.schemas.workflows import FileStorageType
|
|||||||
from skyvern.webeye.actions.action_types import ActionType
|
from skyvern.webeye.actions.action_types import ActionType
|
||||||
|
|
||||||
LOG = structlog.get_logger(__name__)
|
LOG = structlog.get_logger(__name__)
|
||||||
|
GENERATE_CODE_AI_MODE = "proactive"
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------- #
|
# --------------------------------------------------------------------- #
|
||||||
@@ -227,7 +228,7 @@ def _action_to_stmt(act: dict[str, Any], task: dict[str, Any], assign_to_output:
|
|||||||
"""
|
"""
|
||||||
Turn one Action dict into:
|
Turn one Action dict into:
|
||||||
|
|
||||||
await page.<method>(xpath=..., intention=..., data=context.parameters)
|
await page.<method>(selector=..., intention=..., data=context.parameters)
|
||||||
|
|
||||||
Or if assign_to_output is True for extract actions:
|
Or if assign_to_output is True for extract actions:
|
||||||
|
|
||||||
@@ -239,8 +240,8 @@ def _action_to_stmt(act: dict[str, Any], task: dict[str, Any], assign_to_output:
|
|||||||
if method in ACTIONS_WITH_XPATH:
|
if method in ACTIONS_WITH_XPATH:
|
||||||
args.append(
|
args.append(
|
||||||
cst.Arg(
|
cst.Arg(
|
||||||
keyword=cst.Name("xpath"),
|
keyword=cst.Name("selector"),
|
||||||
value=_value(act["xpath"]),
|
value=_value(f"xpath={act['xpath']}"),
|
||||||
whitespace_after_arg=cst.ParenthesizedWhitespace(
|
whitespace_after_arg=cst.ParenthesizedWhitespace(
|
||||||
indent=True,
|
indent=True,
|
||||||
last_line=cst.SimpleWhitespace(INDENT),
|
last_line=cst.SimpleWhitespace(INDENT),
|
||||||
@@ -248,7 +249,18 @@ def _action_to_stmt(act: dict[str, Any], task: dict[str, Any], assign_to_output:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
if method in ["type", "fill"]:
|
if method == "click":
|
||||||
|
args.append(
|
||||||
|
cst.Arg(
|
||||||
|
keyword=cst.Name("ai"),
|
||||||
|
value=_value(GENERATE_CODE_AI_MODE),
|
||||||
|
whitespace_after_arg=cst.ParenthesizedWhitespace(
|
||||||
|
indent=True,
|
||||||
|
last_line=cst.SimpleWhitespace(INDENT),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
elif method in ["type", "fill"]:
|
||||||
# Use context.parameters if field_name is available, otherwise fallback to direct value
|
# Use context.parameters if field_name is available, otherwise fallback to direct value
|
||||||
if act.get("field_name"):
|
if act.get("field_name"):
|
||||||
text_value = cst.Subscript(
|
text_value = cst.Subscript(
|
||||||
@@ -273,8 +285,8 @@ def _action_to_stmt(act: dict[str, Any], task: dict[str, Any], assign_to_output:
|
|||||||
)
|
)
|
||||||
args.append(
|
args.append(
|
||||||
cst.Arg(
|
cst.Arg(
|
||||||
keyword=cst.Name("ai_infer"),
|
keyword=cst.Name("ai"),
|
||||||
value=cst.Name("True"),
|
value=_value(GENERATE_CODE_AI_MODE),
|
||||||
whitespace_after_arg=cst.ParenthesizedWhitespace(
|
whitespace_after_arg=cst.ParenthesizedWhitespace(
|
||||||
indent=True,
|
indent=True,
|
||||||
last_line=cst.SimpleWhitespace(INDENT),
|
last_line=cst.SimpleWhitespace(INDENT),
|
||||||
@@ -330,8 +342,8 @@ def _action_to_stmt(act: dict[str, Any], task: dict[str, Any], assign_to_output:
|
|||||||
)
|
)
|
||||||
args.append(
|
args.append(
|
||||||
cst.Arg(
|
cst.Arg(
|
||||||
keyword=cst.Name("ai_infer"),
|
keyword=cst.Name("ai"),
|
||||||
value=cst.Name("True"),
|
value=_value(GENERATE_CODE_AI_MODE),
|
||||||
whitespace_after_arg=cst.ParenthesizedWhitespace(
|
whitespace_after_arg=cst.ParenthesizedWhitespace(
|
||||||
indent=True,
|
indent=True,
|
||||||
last_line=cst.SimpleWhitespace(INDENT),
|
last_line=cst.SimpleWhitespace(INDENT),
|
||||||
@@ -361,8 +373,8 @@ def _action_to_stmt(act: dict[str, Any], task: dict[str, Any], assign_to_output:
|
|||||||
)
|
)
|
||||||
args.append(
|
args.append(
|
||||||
cst.Arg(
|
cst.Arg(
|
||||||
keyword=cst.Name("ai_infer"),
|
keyword=cst.Name("ai"),
|
||||||
value=cst.Name("True"),
|
value=_value(GENERATE_CODE_AI_MODE),
|
||||||
whitespace_after_arg=cst.ParenthesizedWhitespace(
|
whitespace_after_arg=cst.ParenthesizedWhitespace(
|
||||||
indent=True,
|
indent=True,
|
||||||
last_line=cst.SimpleWhitespace(INDENT),
|
last_line=cst.SimpleWhitespace(INDENT),
|
||||||
|
|||||||
@@ -36,6 +36,7 @@ from skyvern.webeye.actions.actions import (
|
|||||||
)
|
)
|
||||||
from skyvern.webeye.actions.handler import (
|
from skyvern.webeye.actions.handler import (
|
||||||
ActionHandler,
|
ActionHandler,
|
||||||
|
handle_click_action,
|
||||||
handle_complete_action,
|
handle_complete_action,
|
||||||
handle_input_text_action,
|
handle_input_text_action,
|
||||||
handle_select_option_action,
|
handle_select_option_action,
|
||||||
@@ -72,8 +73,8 @@ class ActionCall:
|
|||||||
error: Exception | None = None # populated if failed
|
error: Exception | None = None # populated if failed
|
||||||
|
|
||||||
|
|
||||||
async def _get_element_id_by_xpath(xpath: str, page: Page) -> str | None:
|
async def _get_element_id_by_selector(selector: str, page: Page) -> str | None:
|
||||||
locator = page.locator(f"xpath={xpath}")
|
locator = page.locator(selector)
|
||||||
element_id = await locator.get_attribute("unique_id")
|
element_id = await locator.get_attribute("unique_id")
|
||||||
return element_id
|
return element_id
|
||||||
|
|
||||||
@@ -353,9 +354,12 @@ class SkyvernPage:
|
|||||||
# Create action record. TODO: store more action fields
|
# Create action record. TODO: store more action fields
|
||||||
kwargs = kwargs or {}
|
kwargs = kwargs or {}
|
||||||
# we're using "value" instead of "text" for input text actions interface
|
# we're using "value" instead of "text" for input text actions interface
|
||||||
xpath = kwargs.get("xpath")
|
xpath = None
|
||||||
if action_type == ActionType.CLICK:
|
if action_type == ActionType.CLICK:
|
||||||
xpath = call_result or xpath
|
if isinstance(call_result, str) and "xpath=" in call_result:
|
||||||
|
xpath_split_list = call_result.split("xpath=")
|
||||||
|
if len(xpath_split_list) > 1:
|
||||||
|
xpath = xpath_split_list[1]
|
||||||
text = None
|
text = None
|
||||||
select_option = None
|
select_option = None
|
||||||
response: str | None = kwargs.get("response")
|
response: str | None = kwargs.get("response")
|
||||||
@@ -470,59 +474,120 @@ class SkyvernPage:
|
|||||||
# If screenshot creation fails, don't block execution
|
# If screenshot creation fails, don't block execution
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
async def _ai_click(
|
||||||
|
self,
|
||||||
|
selector: str,
|
||||||
|
intention: str,
|
||||||
|
data: str | dict[str, Any] | None = None,
|
||||||
|
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||||
|
) -> str:
|
||||||
|
try:
|
||||||
|
# Build the element tree of the current page for the prompt
|
||||||
|
context = skyvern_context.ensure_context()
|
||||||
|
payload_str = _get_context_data(data)
|
||||||
|
refreshed_page = await self.scraped_page.generate_scraped_page_without_screenshots()
|
||||||
|
element_tree = refreshed_page.build_element_tree()
|
||||||
|
single_click_prompt = prompt_engine.load_prompt(
|
||||||
|
template="single-click-action",
|
||||||
|
navigation_goal=intention,
|
||||||
|
navigation_payload_str=payload_str,
|
||||||
|
current_url=self.page.url,
|
||||||
|
elements=element_tree,
|
||||||
|
local_datetime=datetime.now(context.tz_info or datetime.now().astimezone().tzinfo).isoformat(),
|
||||||
|
# user_context=getattr(context, "prompt", None),
|
||||||
|
)
|
||||||
|
json_response = await app.SINGLE_CLICK_AGENT_LLM_API_HANDLER(
|
||||||
|
prompt=single_click_prompt,
|
||||||
|
prompt_name="single-click-action",
|
||||||
|
organization_id=context.organization_id,
|
||||||
|
)
|
||||||
|
actions_json = json_response.get("actions", [])
|
||||||
|
if actions_json:
|
||||||
|
organization_id = context.organization_id if context else None
|
||||||
|
task_id = context.task_id if context else None
|
||||||
|
step_id = context.step_id if context else None
|
||||||
|
task = await app.DATABASE.get_task(task_id, organization_id) if task_id and organization_id else None
|
||||||
|
step = await app.DATABASE.get_step(step_id, organization_id) if step_id and organization_id else None
|
||||||
|
if organization_id and task and step:
|
||||||
|
actions = parse_actions(
|
||||||
|
task, step.step_id, step.order, self.scraped_page, json_response.get("actions", [])
|
||||||
|
)
|
||||||
|
action = actions[0]
|
||||||
|
result = await handle_click_action(action, self.page, self.scraped_page, task, step)
|
||||||
|
if result and result[-1].success is False:
|
||||||
|
raise Exception(result[-1].exception_message)
|
||||||
|
xpath = action.get_xpath()
|
||||||
|
selector = f"xpath={xpath}" if xpath else selector
|
||||||
|
return selector
|
||||||
|
except Exception:
|
||||||
|
LOG.exception(
|
||||||
|
f"Failed to do ai click. Falling back to original selector={selector}, intention={intention}, data={data}"
|
||||||
|
)
|
||||||
|
|
||||||
|
locator = self.page.locator(selector)
|
||||||
|
await locator.click(timeout=timeout)
|
||||||
|
return selector
|
||||||
|
|
||||||
######### Public Interfaces #########
|
######### Public Interfaces #########
|
||||||
@action_wrap(ActionType.CLICK)
|
@action_wrap(ActionType.CLICK)
|
||||||
async def click(self, xpath: str, intention: str | None = None, data: str | dict[str, Any] | None = None) -> str:
|
async def click(
|
||||||
"""Click an element identified by ``xpath``.
|
self,
|
||||||
|
selector: str,
|
||||||
|
intention: str | None = None,
|
||||||
|
ai: str | None = "fallback",
|
||||||
|
data: str | dict[str, Any] | None = None,
|
||||||
|
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||||
|
) -> str:
|
||||||
|
"""Click an element identified by ``selector``.
|
||||||
|
|
||||||
When ``intention`` and ``data`` are provided a new click action is
|
When ``intention`` and ``data`` are provided a new click action is
|
||||||
generated via the ``single-click-action`` prompt. The model returns a
|
generated via the ``single-click-action`` prompt. The model returns a
|
||||||
fresh xpath based on the current DOM and the updated data for this run.
|
fresh "xpath=..." selector based on the current DOM and the updated data for this run.
|
||||||
The browser then clicks the element using this newly generated xpath.
|
The browser then clicks the element using this newly generated xpath selector.
|
||||||
|
|
||||||
If the prompt generation or parsing fails for any reason we fall back to
|
If the prompt generation or parsing fails for any reason we fall back to
|
||||||
clicking the originally supplied ``xpath``.
|
clicking the originally supplied ``selector``.
|
||||||
"""
|
"""
|
||||||
new_xpath = xpath
|
if ai == "fallback":
|
||||||
|
# try to click the element with the original selector first
|
||||||
if intention:
|
error_to_raise = None
|
||||||
try:
|
try:
|
||||||
# Build the element tree of the current page for the prompt
|
locator = self.page.locator(selector)
|
||||||
context = skyvern_context.ensure_context()
|
await locator.click(timeout=timeout)
|
||||||
payload_str = _get_context_data(data)
|
return selector
|
||||||
refreshed_page = await self.scraped_page.generate_scraped_page_without_screenshots()
|
except Exception as e:
|
||||||
element_tree = refreshed_page.build_element_tree()
|
error_to_raise = e
|
||||||
single_click_prompt = prompt_engine.load_prompt(
|
|
||||||
template="single-click-action",
|
|
||||||
navigation_goal=intention,
|
|
||||||
navigation_payload_str=payload_str,
|
|
||||||
current_url=self.page.url,
|
|
||||||
elements=element_tree,
|
|
||||||
local_datetime=datetime.now(context.tz_info or datetime.now().astimezone().tzinfo).isoformat(),
|
|
||||||
# user_context=getattr(context, "prompt", None),
|
|
||||||
)
|
|
||||||
json_response = await app.SINGLE_CLICK_AGENT_LLM_API_HANDLER(
|
|
||||||
prompt=single_click_prompt,
|
|
||||||
prompt_name="single-click-action",
|
|
||||||
organization_id=context.organization_id,
|
|
||||||
)
|
|
||||||
actions = json_response.get("actions", [])
|
|
||||||
if actions:
|
|
||||||
new_xpath = actions[0].get("xpath", xpath) or xpath
|
|
||||||
except Exception:
|
|
||||||
# If anything goes wrong, fall back to the original xpath
|
|
||||||
new_xpath = xpath
|
|
||||||
|
|
||||||
locator = self.page.locator(f"xpath={new_xpath}")
|
# if the original selector doesn't work, try to click the element with the ai generated selector
|
||||||
await locator.click(timeout=5000)
|
if intention:
|
||||||
return new_xpath
|
return await self._ai_click(
|
||||||
|
selector=selector,
|
||||||
|
intention=intention,
|
||||||
|
data=data,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
if error_to_raise:
|
||||||
|
raise error_to_raise
|
||||||
|
else:
|
||||||
|
return selector
|
||||||
|
elif ai == "proactive":
|
||||||
|
if intention:
|
||||||
|
return await self._ai_click(
|
||||||
|
selector=selector,
|
||||||
|
intention=intention,
|
||||||
|
data=data,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
locator = self.page.locator(selector)
|
||||||
|
await locator.click(timeout=timeout)
|
||||||
|
return selector
|
||||||
|
|
||||||
@action_wrap(ActionType.INPUT_TEXT)
|
@action_wrap(ActionType.INPUT_TEXT)
|
||||||
async def fill(
|
async def fill(
|
||||||
self,
|
self,
|
||||||
xpath: str,
|
selector: str,
|
||||||
value: str,
|
value: str,
|
||||||
ai_infer: bool = False,
|
ai: str | None = "fallback",
|
||||||
intention: str | None = None,
|
intention: str | None = None,
|
||||||
data: str | dict[str, Any] | None = None,
|
data: str | dict[str, Any] | None = None,
|
||||||
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||||
@@ -530,9 +595,9 @@ class SkyvernPage:
|
|||||||
totp_url: str | None = None,
|
totp_url: str | None = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
return await self._input_text(
|
return await self._input_text(
|
||||||
xpath=xpath,
|
selector=selector,
|
||||||
value=value,
|
value=value,
|
||||||
ai_infer=ai_infer,
|
ai=ai,
|
||||||
intention=intention,
|
intention=intention,
|
||||||
data=data,
|
data=data,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
@@ -543,9 +608,9 @@ class SkyvernPage:
|
|||||||
@action_wrap(ActionType.INPUT_TEXT)
|
@action_wrap(ActionType.INPUT_TEXT)
|
||||||
async def type(
|
async def type(
|
||||||
self,
|
self,
|
||||||
xpath: str,
|
selector: str,
|
||||||
value: str,
|
value: str,
|
||||||
ai_infer: bool = False,
|
ai: str | None = "fallback",
|
||||||
intention: str | None = None,
|
intention: str | None = None,
|
||||||
data: str | dict[str, Any] | None = None,
|
data: str | dict[str, Any] | None = None,
|
||||||
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||||
@@ -553,9 +618,9 @@ class SkyvernPage:
|
|||||||
totp_url: str | None = None,
|
totp_url: str | None = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
return await self._input_text(
|
return await self._input_text(
|
||||||
xpath=xpath,
|
selector=selector,
|
||||||
value=value,
|
value=value,
|
||||||
ai_infer=ai_infer,
|
ai=ai,
|
||||||
intention=intention,
|
intention=intention,
|
||||||
data=data,
|
data=data,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
@@ -563,28 +628,16 @@ class SkyvernPage:
|
|||||||
totp_url=totp_url,
|
totp_url=totp_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _input_text(
|
async def _ai_input_text(
|
||||||
self,
|
self,
|
||||||
xpath: str,
|
selector: str,
|
||||||
value: str,
|
value: str,
|
||||||
ai_infer: bool = False,
|
intention: str,
|
||||||
intention: str | None = None,
|
|
||||||
data: str | dict[str, Any] | None = None,
|
data: str | dict[str, Any] | None = None,
|
||||||
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
|
||||||
totp_identifier: str | None = None,
|
totp_identifier: str | None = None,
|
||||||
totp_url: str | None = None,
|
totp_url: str | None = None,
|
||||||
|
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Input text into an element identified by ``xpath``.
|
|
||||||
|
|
||||||
When ``intention`` and ``data`` are provided a new input text action is
|
|
||||||
generated via the `script-generation-input-text-generatiion` prompt. The model returns a
|
|
||||||
fresh text based on the current DOM and the updated data for this run.
|
|
||||||
The browser then inputs the text using this newly generated text.
|
|
||||||
|
|
||||||
If the prompt generation or parsing fails for any reason we fall back to
|
|
||||||
inputting the originally supplied ``text``.
|
|
||||||
"""
|
|
||||||
# format the text with the actual value of the parameter if it's a secret when running a workflow
|
|
||||||
context = skyvern_context.current()
|
context = skyvern_context.current()
|
||||||
value = value or ""
|
value = value or ""
|
||||||
transformed_value = value
|
transformed_value = value
|
||||||
@@ -595,7 +648,7 @@ class SkyvernPage:
|
|||||||
workflow_run_id = context.workflow_run_id if context else None
|
workflow_run_id = context.workflow_run_id if context else None
|
||||||
task = await app.DATABASE.get_task(task_id, organization_id) if task_id and organization_id else None
|
task = await app.DATABASE.get_task(task_id, organization_id) if task_id and organization_id else None
|
||||||
step = await app.DATABASE.get_step(step_id, organization_id) if step_id and organization_id else None
|
step = await app.DATABASE.get_step(step_id, organization_id) if step_id and organization_id else None
|
||||||
if ai_infer and intention:
|
if intention:
|
||||||
try:
|
try:
|
||||||
prompt = context.prompt if context else None
|
prompt = context.prompt if context else None
|
||||||
data = data or {}
|
data = data or {}
|
||||||
@@ -624,8 +677,8 @@ class SkyvernPage:
|
|||||||
|
|
||||||
refreshed_page = await self.scraped_page.generate_scraped_page_without_screenshots()
|
refreshed_page = await self.scraped_page.generate_scraped_page_without_screenshots()
|
||||||
self.scraped_page = refreshed_page
|
self.scraped_page = refreshed_page
|
||||||
# get the element_id by the xpath
|
# get the element_id by the selector
|
||||||
element_id = await _get_element_id_by_xpath(xpath, self.page)
|
element_id = await _get_element_id_by_selector(selector, self.page)
|
||||||
script_generation_input_text_prompt = prompt_engine.load_prompt(
|
script_generation_input_text_prompt = prompt_engine.load_prompt(
|
||||||
template="script-generation-input-text-generatiion",
|
template="script-generation-input-text-generatiion",
|
||||||
intention=intention,
|
intention=intention,
|
||||||
@@ -639,10 +692,10 @@ class SkyvernPage:
|
|||||||
)
|
)
|
||||||
value = json_response.get("answer", value)
|
value = json_response.get("answer", value)
|
||||||
except Exception:
|
except Exception:
|
||||||
LOG.exception(f"Failed to adapt value for input text action on xpath={xpath}, value={value}")
|
LOG.exception(f"Failed to adapt value for input text action on selector={selector}, value={value}")
|
||||||
|
|
||||||
if context and context.workflow_run_id:
|
if context and context.workflow_run_id:
|
||||||
transformed_value = await _get_actual_value_of_parameter_if_secret(context.workflow_run_id, value)
|
transformed_value = await _get_actual_value_of_parameter_if_secret(context.workflow_run_id, str(value))
|
||||||
|
|
||||||
if element_id and organization_id and task and step:
|
if element_id and organization_id and task and step:
|
||||||
action = InputTextAction(
|
action = InputTextAction(
|
||||||
@@ -661,20 +714,78 @@ class SkyvernPage:
|
|||||||
if result and result[-1].success is False:
|
if result and result[-1].success is False:
|
||||||
raise Exception(result[-1].exception_message)
|
raise Exception(result[-1].exception_message)
|
||||||
else:
|
else:
|
||||||
locator = self.page.locator(f"xpath={xpath}")
|
locator = self.page.locator(selector)
|
||||||
await handler_utils.input_sequentially(locator, transformed_value, timeout=timeout)
|
await handler_utils.input_sequentially(locator, transformed_value, timeout=timeout)
|
||||||
return value
|
return value
|
||||||
|
|
||||||
@action_wrap(ActionType.UPLOAD_FILE)
|
async def _input_text(
|
||||||
async def upload_file(
|
|
||||||
self,
|
self,
|
||||||
xpath: str,
|
selector: str,
|
||||||
files: str,
|
value: str,
|
||||||
ai_infer: bool = False,
|
ai: str | None = "fallback",
|
||||||
intention: str | None = None,
|
intention: str | None = None,
|
||||||
data: str | dict[str, Any] | None = None,
|
data: str | dict[str, Any] | None = None,
|
||||||
|
totp_identifier: str | None = None,
|
||||||
|
totp_url: str | None = None,
|
||||||
|
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||||
) -> str:
|
) -> str:
|
||||||
if ai_infer and intention:
|
"""Input text into an element identified by ``selector``.
|
||||||
|
|
||||||
|
When ``intention`` and ``data`` are provided a new input text action is
|
||||||
|
generated via the `script-generation-input-text-generation` prompt. The model returns a
|
||||||
|
fresh text based on the current DOM and the updated data for this run.
|
||||||
|
The browser then inputs the text using this newly generated text.
|
||||||
|
|
||||||
|
If the prompt generation or parsing fails for any reason we fall back to
|
||||||
|
inputting the originally supplied ``text``.
|
||||||
|
"""
|
||||||
|
# format the text with the actual value of the parameter if it's a secret when running a workflow
|
||||||
|
if ai == "fallback":
|
||||||
|
error_to_raise = None
|
||||||
|
try:
|
||||||
|
locator = self.page.locator(selector)
|
||||||
|
await handler_utils.input_sequentially(locator, value, timeout=timeout)
|
||||||
|
except Exception as e:
|
||||||
|
error_to_raise = e
|
||||||
|
|
||||||
|
if intention:
|
||||||
|
return await self._ai_input_text(
|
||||||
|
selector=selector,
|
||||||
|
value=value,
|
||||||
|
intention=intention,
|
||||||
|
data=data,
|
||||||
|
totp_identifier=totp_identifier,
|
||||||
|
totp_url=totp_url,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
if error_to_raise:
|
||||||
|
raise error_to_raise
|
||||||
|
else:
|
||||||
|
return value
|
||||||
|
elif ai == "proactive" and intention:
|
||||||
|
return await self._ai_input_text(
|
||||||
|
selector=selector,
|
||||||
|
value=value,
|
||||||
|
intention=intention,
|
||||||
|
data=data,
|
||||||
|
totp_identifier=totp_identifier,
|
||||||
|
totp_url=totp_url,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
locator = self.page.locator(selector)
|
||||||
|
await handler_utils.input_sequentially(locator, value, timeout=timeout)
|
||||||
|
return value
|
||||||
|
|
||||||
|
async def _ai_upload_file(
|
||||||
|
self,
|
||||||
|
selector: str,
|
||||||
|
files: str,
|
||||||
|
file_path: str,
|
||||||
|
intention: str,
|
||||||
|
data: str | dict[str, Any] | None = None,
|
||||||
|
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||||
|
) -> str:
|
||||||
|
if intention:
|
||||||
try:
|
try:
|
||||||
context = skyvern_context.current()
|
context = skyvern_context.current()
|
||||||
prompt = context.prompt if context else None
|
prompt = context.prompt if context else None
|
||||||
@@ -692,28 +803,71 @@ class SkyvernPage:
|
|||||||
)
|
)
|
||||||
files = json_response.get("answer", files)
|
files = json_response.get("answer", files)
|
||||||
except Exception:
|
except Exception:
|
||||||
LOG.exception(f"Failed to adapt value for input text action on xpath={xpath}, file={files}")
|
LOG.exception(f"Failed to adapt value for input text action on selector={selector}, file={files}")
|
||||||
file_path = await download_file(files)
|
if not files:
|
||||||
locator = self.page.locator(f"xpath={xpath}")
|
raise ValueError("file url must be provided")
|
||||||
await locator.set_input_files(file_path)
|
locator = self.page.locator(selector)
|
||||||
|
await locator.set_input_files(file_path, timeout=timeout)
|
||||||
return files
|
return files
|
||||||
|
|
||||||
@action_wrap(ActionType.SELECT_OPTION)
|
@action_wrap(ActionType.UPLOAD_FILE)
|
||||||
async def select_option(
|
async def upload_file(
|
||||||
self,
|
self,
|
||||||
xpath: str,
|
selector: str,
|
||||||
value: str,
|
files: str,
|
||||||
ai_infer: bool = False,
|
ai: str | None = "fallback",
|
||||||
intention: str | None = None,
|
intention: str | None = None,
|
||||||
data: str | dict[str, Any] | None = None,
|
data: str | dict[str, Any] | None = None,
|
||||||
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||||
|
) -> str:
|
||||||
|
file_path = await download_file(files)
|
||||||
|
if ai == "fallback":
|
||||||
|
error_to_raise = None
|
||||||
|
try:
|
||||||
|
locator = self.page.locator(selector)
|
||||||
|
await locator.set_input_files(file_path)
|
||||||
|
except Exception as e:
|
||||||
|
error_to_raise = e
|
||||||
|
if intention:
|
||||||
|
return await self._ai_upload_file(
|
||||||
|
selector=selector,
|
||||||
|
files=files,
|
||||||
|
file_path=file_path,
|
||||||
|
intention=intention,
|
||||||
|
data=data,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
if error_to_raise:
|
||||||
|
raise error_to_raise
|
||||||
|
else:
|
||||||
|
return files
|
||||||
|
elif ai == "proactive" and intention:
|
||||||
|
return await self._ai_upload_file(
|
||||||
|
selector=selector,
|
||||||
|
files=files,
|
||||||
|
file_path=file_path,
|
||||||
|
intention=intention,
|
||||||
|
data=data,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
locator = self.page.locator(selector)
|
||||||
|
await locator.set_input_files(file_path, timeout=timeout)
|
||||||
|
return files
|
||||||
|
|
||||||
|
async def _ai_select_option(
|
||||||
|
self,
|
||||||
|
selector: str,
|
||||||
|
value: str,
|
||||||
|
intention: str,
|
||||||
|
data: str | dict[str, Any] | None = None,
|
||||||
|
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||||
) -> str:
|
) -> str:
|
||||||
option_value = value or ""
|
option_value = value or ""
|
||||||
context = skyvern_context.current()
|
context = skyvern_context.current()
|
||||||
if context and context.task_id and context.step_id and context.organization_id:
|
if context and context.task_id and context.step_id and context.organization_id:
|
||||||
task = await app.DATABASE.get_task(context.task_id, organization_id=context.organization_id)
|
task = await app.DATABASE.get_task(context.task_id, organization_id=context.organization_id)
|
||||||
step = await app.DATABASE.get_step(context.step_id, organization_id=context.organization_id)
|
step = await app.DATABASE.get_step(context.step_id, organization_id=context.organization_id)
|
||||||
if ai_infer and intention and task and step:
|
if intention and task and step:
|
||||||
try:
|
try:
|
||||||
prompt = context.prompt if context else None
|
prompt = context.prompt if context else None
|
||||||
# data = _get_context_data(data)
|
# data = _get_context_data(data)
|
||||||
@@ -735,7 +889,9 @@ class SkyvernPage:
|
|||||||
prompt_name="single-select-action",
|
prompt_name="single-select-action",
|
||||||
organization_id=context.organization_id if context else None,
|
organization_id=context.organization_id if context else None,
|
||||||
)
|
)
|
||||||
actions = parse_actions(task, step.step_id, step.order, self.scraped_page, json_response["actions"])
|
actions = parse_actions(
|
||||||
|
task, step.step_id, step.order, self.scraped_page, json_response.get("actions", [])
|
||||||
|
)
|
||||||
if actions:
|
if actions:
|
||||||
action = actions[0]
|
action = actions[0]
|
||||||
if not action.option:
|
if not action.option:
|
||||||
@@ -750,15 +906,60 @@ class SkyvernPage:
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
LOG.exception(
|
LOG.exception(
|
||||||
f"Failed to parse actions for select option action on xpath={xpath}, value={value}"
|
f"Failed to parse actions for select option action on selector={selector}, value={value}"
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
LOG.exception(f"Failed to adapt value for select option action on xpath={xpath}, value={value}")
|
LOG.exception(
|
||||||
|
f"Failed to adapt value for select option action on selector={selector}, value={value}"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
locator = self.page.locator(f"xpath={xpath}")
|
locator = self.page.locator(selector)
|
||||||
await locator.select_option(option_value, timeout=timeout)
|
await locator.select_option(option_value, timeout=timeout)
|
||||||
return option_value
|
return option_value
|
||||||
|
|
||||||
|
@action_wrap(ActionType.SELECT_OPTION)
|
||||||
|
async def select_option(
|
||||||
|
self,
|
||||||
|
selector: str,
|
||||||
|
value: str | None = None,
|
||||||
|
label: str | None = None,
|
||||||
|
ai: str | None = "fallback",
|
||||||
|
intention: str | None = None,
|
||||||
|
data: str | dict[str, Any] | None = None,
|
||||||
|
timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS,
|
||||||
|
) -> str:
|
||||||
|
value = value or ""
|
||||||
|
if ai == "fallback":
|
||||||
|
error_to_raise = None
|
||||||
|
try:
|
||||||
|
locator = self.page.locator(selector)
|
||||||
|
await locator.select_option(value, timeout=timeout)
|
||||||
|
except Exception as e:
|
||||||
|
error_to_raise = e
|
||||||
|
if intention:
|
||||||
|
return await self._ai_select_option(
|
||||||
|
selector=selector,
|
||||||
|
value=value,
|
||||||
|
intention=intention,
|
||||||
|
data=data,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
if error_to_raise:
|
||||||
|
raise error_to_raise
|
||||||
|
else:
|
||||||
|
return value
|
||||||
|
elif ai == "proactive" and intention:
|
||||||
|
return await self._ai_select_option(
|
||||||
|
selector=selector,
|
||||||
|
value=value,
|
||||||
|
intention=intention,
|
||||||
|
data=data,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
locator = self.page.locator(selector)
|
||||||
|
await locator.select_option(value, timeout=timeout)
|
||||||
|
return value
|
||||||
|
|
||||||
@action_wrap(ActionType.WAIT)
|
@action_wrap(ActionType.WAIT)
|
||||||
async def wait(
|
async def wait(
|
||||||
self, seconds: float, intention: str | None = None, data: str | dict[str, Any] | None = None
|
self, seconds: float, intention: str | None = None, data: str | dict[str, Any] | None = None
|
||||||
@@ -873,9 +1074,7 @@ class SkyvernPage:
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
@action_wrap(ActionType.VERIFICATION_CODE)
|
@action_wrap(ActionType.VERIFICATION_CODE)
|
||||||
async def verification_code(
|
async def verification_code(self, intention: str | None = None, data: str | dict[str, Any] | None = None) -> None:
|
||||||
self, xpath: str, intention: str | None = None, data: str | dict[str, Any] | None = None
|
|
||||||
) -> None:
|
|
||||||
return
|
return
|
||||||
|
|
||||||
@action_wrap(ActionType.SCROLL)
|
@action_wrap(ActionType.SCROLL)
|
||||||
|
|||||||
@@ -37,6 +37,7 @@ from skyvern.forge.sdk.workflow.models.block import (
|
|||||||
ForLoopBlock,
|
ForLoopBlock,
|
||||||
HttpRequestBlock,
|
HttpRequestBlock,
|
||||||
LoginBlock,
|
LoginBlock,
|
||||||
|
NavigationBlock,
|
||||||
PDFParserBlock,
|
PDFParserBlock,
|
||||||
SendEmailBlock,
|
SendEmailBlock,
|
||||||
TaskBlock,
|
TaskBlock,
|
||||||
@@ -1176,7 +1177,7 @@ async def run_task(
|
|||||||
if cache_key and cached_fn:
|
if cache_key and cached_fn:
|
||||||
# Auto-create workflow block run and task if workflow_run_id is available
|
# Auto-create workflow block run and task if workflow_run_id is available
|
||||||
workflow_run_block_id, task_id, step_id = await _create_workflow_block_run_and_task(
|
workflow_run_block_id, task_id, step_id = await _create_workflow_block_run_and_task(
|
||||||
block_type=BlockType.TASK,
|
block_type=BlockType.NAVIGATION,
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
url=url,
|
url=url,
|
||||||
label=cache_key,
|
label=cache_key,
|
||||||
@@ -1201,7 +1202,7 @@ async def run_task(
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
LOG.exception("Failed to run task block. Falling back to AI run.")
|
LOG.exception("Failed to run task block. Falling back to AI run.")
|
||||||
await _fallback_to_ai_run(
|
await _fallback_to_ai_run(
|
||||||
block_type=BlockType.TASK,
|
block_type=BlockType.NAVIGATION,
|
||||||
cache_key=cache_key,
|
cache_key=cache_key,
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
url=url,
|
url=url,
|
||||||
@@ -1216,7 +1217,7 @@ async def run_task(
|
|||||||
context.prompt = None
|
context.prompt = None
|
||||||
else:
|
else:
|
||||||
block_validation_output = await _validate_and_get_output_parameter(label)
|
block_validation_output = await _validate_and_get_output_parameter(label)
|
||||||
task_block = TaskBlock(
|
task_block = NavigationBlock(
|
||||||
label=block_validation_output.label,
|
label=block_validation_output.label,
|
||||||
output_parameter=block_validation_output.output_parameter,
|
output_parameter=block_validation_output.output_parameter,
|
||||||
url=url,
|
url=url,
|
||||||
|
|||||||
Reference in New Issue
Block a user