From 715fe9809e6a2db385fe0283e93ddca38a1b5234 Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Thu, 10 Apr 2025 23:20:22 -0700 Subject: [PATCH] add phone number format checking (#2125) --- .../skyvern/check-phone-number-format.j2 | 41 +++++++++++ skyvern/utils/prompt_engine.py | 8 +++ skyvern/webeye/actions/handler.py | 69 ++++++++++++++++++- 3 files changed, 116 insertions(+), 2 deletions(-) create mode 100644 skyvern/forge/prompts/skyvern/check-phone-number-format.j2 diff --git a/skyvern/forge/prompts/skyvern/check-phone-number-format.j2 b/skyvern/forge/prompts/skyvern/check-phone-number-format.j2 new file mode 100644 index 00000000..c4f3c0b9 --- /dev/null +++ b/skyvern/forge/prompts/skyvern/check-phone-number-format.j2 @@ -0,0 +1,41 @@ +You need to help checking if the current phone number format is matching the required format according to the user goal, user details and HTML elements. + +There are several checkpoints to verify the format of the phone number: +- Whether the phone number should add hyphen. +- Whether the phone number should add country code. +- Whether the phone number should add space between the country code and the area code. + +MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments (//), no unnecessary quotes, etc. + +Reply in JSON format with the following keys: +{ +"phone_number_format": str, // Think step by step. The format of the phone number required on the page according to HTML elements. +"thought": str, // Think step by step. Describe your thought about how you come up with the phone_number_format. Use information you see on the site to explain. +"is_current_format_correct": bool, // True if the current phone number format is matching the required format. +"recommended_phone_number": str, // If is_current_format_correct is True, return null. Otherwise, return the recommended phone number with the correct format. +} + +Current phone number: +``` +{{ current_phone_number }} +``` + +User goal: +``` +{{ navigation_goal }} +``` + +User details: +``` +{{ navigation_payload_str }} +``` + +HTML elements: +``` +{{ elements }} +``` + +Current datetime, ISO format: +``` +{{ local_datetime }} +``` \ No newline at end of file diff --git a/skyvern/utils/prompt_engine.py b/skyvern/utils/prompt_engine.py index 1f1ad701..5723777e 100644 --- a/skyvern/utils/prompt_engine.py +++ b/skyvern/utils/prompt_engine.py @@ -1,6 +1,7 @@ from typing import Any import structlog +from pydantic import BaseModel from skyvern.constants import DEFAULT_MAX_TOKENS from skyvern.forge.sdk.prompting import PromptEngine @@ -10,6 +11,13 @@ from skyvern.webeye.scraper.scraper import ScrapedPage LOG = structlog.get_logger() +class CheckPhoneNumberFormatResponse(BaseModel): + phone_number_format: str + thought: str + is_current_format_correct: bool + recommended_phone_number: str | None + + def load_prompt_with_elements( scraped_page: ScrapedPage, prompt_engine: PromptEngine, diff --git a/skyvern/webeye/actions/handler.py b/skyvern/webeye/actions/handler.py index 042d7cc9..7a50db89 100644 --- a/skyvern/webeye/actions/handler.py +++ b/skyvern/webeye/actions/handler.py @@ -68,7 +68,7 @@ from skyvern.forge.sdk.db.enums import OrganizationAuthTokenType from skyvern.forge.sdk.models import Step from skyvern.forge.sdk.schemas.tasks import Task from skyvern.forge.sdk.services.bitwarden import BitwardenConstants -from skyvern.utils.prompt_engine import load_prompt_with_elements +from skyvern.utils.prompt_engine import CheckPhoneNumberFormatResponse, load_prompt_with_elements from skyvern.webeye.actions import actions from skyvern.webeye.actions.actions import ( Action, @@ -211,6 +211,52 @@ def clean_and_remove_element_tree_factory( return helper_func +async def check_phone_number_format( + phone_number: str, + action: actions.InputTextAction, + skyvern_element: SkyvernElement, + scraped_page: ScrapedPage, + task: Task, + step: Step, +) -> str: + # check the phone number format + LOG.info( + "Input is a tel input, trigger phone number format checking", + action=action, + element_id=skyvern_element.get_id(), + ) + + new_scraped_page = await scraped_page.generate_scraped_page_without_screenshots() + html = new_scraped_page.build_element_tree(html_need_skyvern_attrs=False) + prompt = prompt_engine.load_prompt( + template="check-phone-number-format", + current_phone_number=phone_number, + navigation_goal=task.navigation_goal, + navigation_payload_str=json.dumps(task.navigation_payload), + elements=html, + local_datetime=datetime.now(skyvern_context.ensure_context().tz_info).isoformat(), + ) + + json_response = await app.SECONDARY_LLM_API_HANDLER( + prompt=prompt, step=step, prompt_name="check-phone-number-format" + ) + + check_phone_number_format_response = CheckPhoneNumberFormatResponse.model_validate(json_response) + if ( + check_phone_number_format_response.is_current_format_correct + or not check_phone_number_format_response.recommended_phone_number + ): + return phone_number + + LOG.info( + "The current phone number format is incorrect, using the recommended phone number", + action=action, + element_id=skyvern_element.get_id(), + recommended_phone_number=check_phone_number_format_response.recommended_phone_number, + ) + return check_phone_number_format_response.recommended_phone_number + + class AutoCompletionResult(BaseModel): auto_completion_attempt: bool = False incremental_elements: list[dict] = [] @@ -557,7 +603,7 @@ async def handle_input_text_action( # before filling text, we need to validate if the element can be filled if it's not one of COMMON_INPUT_TAGS tag_name = scraped_page.id_to_element_dict[action.element_id]["tagName"].lower() - text = await get_actual_value_of_parameter_if_secret(task, action.text) + text: str | None = await get_actual_value_of_parameter_if_secret(task, action.text) if text is None: return [ActionFailure(FailedToFetchSecret())] @@ -706,6 +752,25 @@ async def handle_input_text_action( # force to move focus back to the element await skyvern_element.get_locator().focus(timeout=timeout) + + # check the phone number format + if await skyvern_element.get_attr("type") == "tel": + try: + text = await check_phone_number_format( + phone_number=text, + action=action, + skyvern_element=skyvern_element, + scraped_page=scraped_page, + task=task, + step=step, + ) + except Exception: + LOG.warning( + "Failed to check the phone number format, using the original text", + action=action, + exc_info=True, + ) + # `Locator.clear()` on a spin button could cause the cursor moving away, and never be back if not await skyvern_element.is_spinbtn_input(): try: