add phone number format checking (#2125)

This commit is contained in:
Shuchang Zheng
2025-04-10 23:20:22 -07:00
committed by GitHub
parent 7e81386b63
commit 715fe9809e
3 changed files with 116 additions and 2 deletions

View File

@@ -0,0 +1,41 @@
You need to help checking if the current phone number format is matching the required format according to the user goal, user details and HTML elements.
There are several checkpoints to verify the format of the phone number:
- Whether the phone number should add hyphen.
- Whether the phone number should add country code.
- Whether the phone number should add space between the country code and the area code.
MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments (//), no unnecessary quotes, etc.
Reply in JSON format with the following keys:
{
"phone_number_format": str, // Think step by step. The format of the phone number required on the page according to HTML elements.
"thought": str, // Think step by step. Describe your thought about how you come up with the phone_number_format. Use information you see on the site to explain.
"is_current_format_correct": bool, // True if the current phone number format is matching the required format.
"recommended_phone_number": str, // If is_current_format_correct is True, return null. Otherwise, return the recommended phone number with the correct format.
}
Current phone number:
```
{{ current_phone_number }}
```
User goal:
```
{{ navigation_goal }}
```
User details:
```
{{ navigation_payload_str }}
```
HTML elements:
```
{{ elements }}
```
Current datetime, ISO format:
```
{{ local_datetime }}
```

View File

@@ -1,6 +1,7 @@
from typing import Any
import structlog
from pydantic import BaseModel
from skyvern.constants import DEFAULT_MAX_TOKENS
from skyvern.forge.sdk.prompting import PromptEngine
@@ -10,6 +11,13 @@ from skyvern.webeye.scraper.scraper import ScrapedPage
LOG = structlog.get_logger()
class CheckPhoneNumberFormatResponse(BaseModel):
phone_number_format: str
thought: str
is_current_format_correct: bool
recommended_phone_number: str | None
def load_prompt_with_elements(
scraped_page: ScrapedPage,
prompt_engine: PromptEngine,

View File

@@ -68,7 +68,7 @@ from skyvern.forge.sdk.db.enums import OrganizationAuthTokenType
from skyvern.forge.sdk.models import Step
from skyvern.forge.sdk.schemas.tasks import Task
from skyvern.forge.sdk.services.bitwarden import BitwardenConstants
from skyvern.utils.prompt_engine import load_prompt_with_elements
from skyvern.utils.prompt_engine import CheckPhoneNumberFormatResponse, load_prompt_with_elements
from skyvern.webeye.actions import actions
from skyvern.webeye.actions.actions import (
Action,
@@ -211,6 +211,52 @@ def clean_and_remove_element_tree_factory(
return helper_func
async def check_phone_number_format(
phone_number: str,
action: actions.InputTextAction,
skyvern_element: SkyvernElement,
scraped_page: ScrapedPage,
task: Task,
step: Step,
) -> str:
# check the phone number format
LOG.info(
"Input is a tel input, trigger phone number format checking",
action=action,
element_id=skyvern_element.get_id(),
)
new_scraped_page = await scraped_page.generate_scraped_page_without_screenshots()
html = new_scraped_page.build_element_tree(html_need_skyvern_attrs=False)
prompt = prompt_engine.load_prompt(
template="check-phone-number-format",
current_phone_number=phone_number,
navigation_goal=task.navigation_goal,
navigation_payload_str=json.dumps(task.navigation_payload),
elements=html,
local_datetime=datetime.now(skyvern_context.ensure_context().tz_info).isoformat(),
)
json_response = await app.SECONDARY_LLM_API_HANDLER(
prompt=prompt, step=step, prompt_name="check-phone-number-format"
)
check_phone_number_format_response = CheckPhoneNumberFormatResponse.model_validate(json_response)
if (
check_phone_number_format_response.is_current_format_correct
or not check_phone_number_format_response.recommended_phone_number
):
return phone_number
LOG.info(
"The current phone number format is incorrect, using the recommended phone number",
action=action,
element_id=skyvern_element.get_id(),
recommended_phone_number=check_phone_number_format_response.recommended_phone_number,
)
return check_phone_number_format_response.recommended_phone_number
class AutoCompletionResult(BaseModel):
auto_completion_attempt: bool = False
incremental_elements: list[dict] = []
@@ -557,7 +603,7 @@ async def handle_input_text_action(
# before filling text, we need to validate if the element can be filled if it's not one of COMMON_INPUT_TAGS
tag_name = scraped_page.id_to_element_dict[action.element_id]["tagName"].lower()
text = await get_actual_value_of_parameter_if_secret(task, action.text)
text: str | None = await get_actual_value_of_parameter_if_secret(task, action.text)
if text is None:
return [ActionFailure(FailedToFetchSecret())]
@@ -706,6 +752,25 @@ async def handle_input_text_action(
# force to move focus back to the element
await skyvern_element.get_locator().focus(timeout=timeout)
# check the phone number format
if await skyvern_element.get_attr("type") == "tel":
try:
text = await check_phone_number_format(
phone_number=text,
action=action,
skyvern_element=skyvern_element,
scraped_page=scraped_page,
task=task,
step=step,
)
except Exception:
LOG.warning(
"Failed to check the phone number format, using the original text",
action=action,
exc_info=True,
)
# `Locator.clear()` on a spin button could cause the cursor moving away, and never be back
if not await skyvern_element.is_spinbtn_input():
try: