verification code V2 - support verification code of multiple separate single character input fields (#683)
Co-authored-by: Shuchang Zheng <wintonzheng0325@gmail.com>
This commit is contained in:
@@ -11,7 +11,6 @@ PAGE_CONTENT_TIMEOUT = 300 # 5 mins
|
||||
|
||||
# reserved fields for navigation payload
|
||||
SPECIAL_FIELD_VERIFICATION_CODE = "verification_code"
|
||||
VERIFICATION_CODE_PLACEHOLDER = "REAL_TOTP_CODE"
|
||||
|
||||
VERIFICATION_CODE_POLLING_TIMEOUT_MINS = 10
|
||||
|
||||
|
||||
@@ -11,12 +11,7 @@ from playwright._impl._errors import TargetClosedError
|
||||
from playwright.async_api import Page
|
||||
|
||||
from skyvern import analytics
|
||||
from skyvern.constants import (
|
||||
SCRAPE_TYPE_ORDER,
|
||||
SPECIAL_FIELD_VERIFICATION_CODE,
|
||||
VERIFICATION_CODE_PLACEHOLDER,
|
||||
ScrapeType,
|
||||
)
|
||||
from skyvern.constants import SCRAPE_TYPE_ORDER, SPECIAL_FIELD_VERIFICATION_CODE, ScrapeType
|
||||
from skyvern.exceptions import (
|
||||
BrowserStateMissingPage,
|
||||
EmptyScrapePage,
|
||||
@@ -53,7 +48,7 @@ from skyvern.webeye.actions.actions import (
|
||||
WebAction,
|
||||
parse_actions,
|
||||
)
|
||||
from skyvern.webeye.actions.handler import ActionHandler
|
||||
from skyvern.webeye.actions.handler import ActionHandler, poll_verification_code
|
||||
from skyvern.webeye.actions.models import AgentStepOutput, DetailedAgentStepOutput
|
||||
from skyvern.webeye.actions.responses import ActionResult
|
||||
from skyvern.webeye.browser_factory import BrowserState
|
||||
@@ -548,6 +543,13 @@ class ForgeAgent:
|
||||
step=step,
|
||||
screenshots=scraped_page.screenshots,
|
||||
)
|
||||
json_response = await self.handle_potential_verification_code(
|
||||
task,
|
||||
step,
|
||||
scraped_page,
|
||||
browser_state,
|
||||
json_response,
|
||||
)
|
||||
detailed_agent_step_output.llm_response = json_response
|
||||
|
||||
actions = parse_actions(task, json_response["actions"])
|
||||
@@ -951,16 +953,6 @@ class ForgeAgent:
|
||||
num_elements=len(scraped_page.elements),
|
||||
url=task.url,
|
||||
)
|
||||
|
||||
actions_and_results_str = await self._get_action_results(task)
|
||||
|
||||
# Generate the extract action prompt
|
||||
navigation_goal = task.navigation_goal
|
||||
starting_url = task.url
|
||||
current_url = (
|
||||
await browser_state.page.evaluate("() => document.location.href") if browser_state.page else starting_url
|
||||
)
|
||||
|
||||
# TODO: we only use HTML element for now, introduce a way to switch in the future
|
||||
element_tree_format = ElementTreeFormat.HTML
|
||||
LOG.info(
|
||||
@@ -971,18 +963,12 @@ class ForgeAgent:
|
||||
)
|
||||
|
||||
element_tree_in_prompt: str = scraped_page.build_element_tree(element_tree_format)
|
||||
final_navigation_payload = self._build_navigation_payload(task)
|
||||
extract_action_prompt = prompt_engine.load_prompt(
|
||||
"extract-action",
|
||||
navigation_goal=navigation_goal,
|
||||
navigation_payload_str=json.dumps(final_navigation_payload),
|
||||
starting_url=starting_url,
|
||||
current_url=current_url,
|
||||
elements=element_tree_in_prompt,
|
||||
data_extraction_goal=task.data_extraction_goal,
|
||||
action_history=actions_and_results_str,
|
||||
error_code_mapping_str=(json.dumps(task.error_code_mapping) if task.error_code_mapping else None),
|
||||
utc_datetime=datetime.utcnow().strftime("%Y-%m-%d %H:%M"),
|
||||
extract_action_prompt = await self._build_extract_action_prompt(
|
||||
task,
|
||||
browser_state,
|
||||
element_tree_in_prompt,
|
||||
verification_code_check=bool(task.totp_verification_url),
|
||||
expire_verification_code=True,
|
||||
)
|
||||
|
||||
await app.ARTIFACT_MANAGER.create_artifact(
|
||||
@@ -1013,26 +999,62 @@ class ForgeAgent:
|
||||
|
||||
return scraped_page, extract_action_prompt
|
||||
|
||||
async def _build_extract_action_prompt(
|
||||
self,
|
||||
task: Task,
|
||||
browser_state: BrowserState,
|
||||
element_tree_in_prompt: str,
|
||||
verification_code_check: bool = False,
|
||||
expire_verification_code: bool = False,
|
||||
) -> str:
|
||||
actions_and_results_str = await self._get_action_results(task)
|
||||
|
||||
# Generate the extract action prompt
|
||||
navigation_goal = task.navigation_goal
|
||||
starting_url = task.url
|
||||
current_url = (
|
||||
await browser_state.page.evaluate("() => document.location.href") if browser_state.page else starting_url
|
||||
)
|
||||
final_navigation_payload = self._build_navigation_payload(
|
||||
task, expire_verification_code=expire_verification_code
|
||||
)
|
||||
return prompt_engine.load_prompt(
|
||||
"extract-action",
|
||||
navigation_goal=navigation_goal,
|
||||
navigation_payload_str=json.dumps(final_navigation_payload),
|
||||
starting_url=starting_url,
|
||||
current_url=current_url,
|
||||
elements=element_tree_in_prompt,
|
||||
data_extraction_goal=task.data_extraction_goal,
|
||||
action_history=actions_and_results_str,
|
||||
error_code_mapping_str=(json.dumps(task.error_code_mapping) if task.error_code_mapping else None),
|
||||
utc_datetime=datetime.utcnow().strftime("%Y-%m-%d %H:%M"),
|
||||
verification_code_check=verification_code_check,
|
||||
)
|
||||
|
||||
def _build_navigation_payload(
|
||||
self,
|
||||
task: Task,
|
||||
expire_verification_code: bool = False,
|
||||
) -> dict[str, Any] | list | str | None:
|
||||
final_navigation_payload = task.navigation_payload
|
||||
if task.totp_verification_url:
|
||||
current_context = skyvern_context.ensure_context()
|
||||
verification_code = current_context.totp_codes.get(task.task_id)
|
||||
if task.totp_verification_url and verification_code:
|
||||
if (
|
||||
isinstance(final_navigation_payload, dict)
|
||||
and SPECIAL_FIELD_VERIFICATION_CODE not in final_navigation_payload
|
||||
):
|
||||
final_navigation_payload[SPECIAL_FIELD_VERIFICATION_CODE] = VERIFICATION_CODE_PLACEHOLDER
|
||||
final_navigation_payload[SPECIAL_FIELD_VERIFICATION_CODE] = verification_code
|
||||
elif (
|
||||
isinstance(final_navigation_payload, str)
|
||||
and SPECIAL_FIELD_VERIFICATION_CODE not in final_navigation_payload
|
||||
):
|
||||
final_navigation_payload = (
|
||||
final_navigation_payload
|
||||
+ "\n"
|
||||
+ str({SPECIAL_FIELD_VERIFICATION_CODE: VERIFICATION_CODE_PLACEHOLDER})
|
||||
final_navigation_payload + "\n" + str({SPECIAL_FIELD_VERIFICATION_CODE: verification_code})
|
||||
)
|
||||
if expire_verification_code:
|
||||
current_context.totp_codes.pop(task.task_id)
|
||||
return final_navigation_payload
|
||||
|
||||
async def _get_action_results(self, task: Task) -> str:
|
||||
@@ -1552,6 +1574,40 @@ class ForgeAgent:
|
||||
)
|
||||
return None, None, next_step
|
||||
|
||||
async def handle_potential_verification_code(
|
||||
self,
|
||||
task: Task,
|
||||
step: Step,
|
||||
scraped_page: ScrapedPage,
|
||||
browser_state: BrowserState,
|
||||
json_response: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
# TODO: handle verifications and resend the request if needed
|
||||
# parse the "need_verification_code" field from the response
|
||||
need_verification_code = json_response.get("need_verification_code")
|
||||
if need_verification_code and task.totp_verification_url and task.organization_id:
|
||||
LOG.info("Need verification code", step_id=step.step_id)
|
||||
verification_code = await poll_verification_code(
|
||||
task.task_id, task.organization_id, url=task.totp_verification_url
|
||||
)
|
||||
current_context = skyvern_context.ensure_context()
|
||||
current_context.totp_codes[task.task_id] = verification_code
|
||||
|
||||
element_tree_in_prompt: str = scraped_page.build_element_tree(ElementTreeFormat.HTML)
|
||||
extract_action_prompt = await self._build_extract_action_prompt(
|
||||
task,
|
||||
browser_state,
|
||||
element_tree_in_prompt,
|
||||
verification_code_check=False,
|
||||
expire_verification_code=False,
|
||||
)
|
||||
return await app.LLM_API_HANDLER(
|
||||
prompt=extract_action_prompt,
|
||||
step=step,
|
||||
screenshots=scraped_page.screenshots,
|
||||
)
|
||||
return json_response
|
||||
|
||||
@staticmethod
|
||||
async def get_task_errors(task: Task) -> list[UserDefinedError]:
|
||||
steps = await app.DATABASE.get_task_steps(task_id=task.task_id, organization_id=task.organization_id)
|
||||
|
||||
@@ -9,6 +9,7 @@ If you see a popup in the page screenshot, prioritize actions on the popup.
|
||||
|
||||
Reply in JSON format with the following keys:
|
||||
{
|
||||
{% if verification_code_check %} "need_verification_code": bool, // Whether a verification code is needed to proceed.{% endif %}
|
||||
"user_goal_achieved": str, // A string that describes if user goal has been completed with reasoning.
|
||||
"action_plan": str, // A string that describes the plan of actions you're going to take. Be specific and to the point. Use this as a quick summary of the actions you're going to take, and what order you're going to take them in, and how that moves you towards your overall goal. Output "COMPLETE" action in the "actions" if user goal has been achieved.
|
||||
"actions": array // An array of actions. Here's the format of each action:
|
||||
@@ -36,10 +37,8 @@ Reply in JSON format with the following keys:
|
||||
}],
|
||||
}
|
||||
{% if action_history %}
|
||||
|
||||
Consider the action history from the last step and the screenshot together, if actions from the last step don't yield positive impact, try other actions or other action combinations.
|
||||
{% endif %}
|
||||
|
||||
Clickable elements from `{{ current_url }}`:
|
||||
```
|
||||
{{ elements }}
|
||||
@@ -52,12 +51,10 @@ User goal:
|
||||
{{ navigation_goal }}
|
||||
```
|
||||
{% if error_code_mapping_str %}
|
||||
|
||||
Use the error codes and their descriptions to surface user-defined errors. Do not return any error that's not defined by the user. User defined errors:
|
||||
{{ error_code_mapping_str }}
|
||||
{% endif %}
|
||||
{% if data_extraction_goal %}
|
||||
|
||||
User Data Extraction Goal:
|
||||
```
|
||||
{{ data_extraction_goal }}
|
||||
@@ -69,11 +66,9 @@ User details:
|
||||
{{ navigation_payload_str }}
|
||||
```
|
||||
{% if action_history %}
|
||||
|
||||
Action results from previous steps: (note: even if the action history suggests goal is achieved, check the screenshot and the DOM elements to make sure the goal is achieved)
|
||||
{{ action_history }}
|
||||
{% endif %}
|
||||
|
||||
Current datetime in UTC, YYYY-MM-DD HH:MM format:
|
||||
```
|
||||
{{ utc_datetime }}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from contextvars import ContextVar
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -10,6 +10,7 @@ class SkyvernContext:
|
||||
workflow_id: str | None = None
|
||||
workflow_run_id: str | None = None
|
||||
max_steps_override: int | None = None
|
||||
totp_codes: dict[str, str | None] = field(default_factory=dict)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"SkyvernContext(request_id={self.request_id}, organization_id={self.organization_id}, task_id={self.task_id}, workflow_id={self.workflow_id}, workflow_run_id={self.workflow_run_id}, max_steps_override={self.max_steps_override})"
|
||||
|
||||
@@ -10,7 +10,7 @@ import structlog
|
||||
from deprecation import deprecated
|
||||
from playwright.async_api import FileChooser, Locator, Page, TimeoutError
|
||||
|
||||
from skyvern.constants import REPO_ROOT_DIR, VERIFICATION_CODE_PLACEHOLDER, VERIFICATION_CODE_POLLING_TIMEOUT_MINS
|
||||
from skyvern.constants import REPO_ROOT_DIR, VERIFICATION_CODE_POLLING_TIMEOUT_MINS
|
||||
from skyvern.exceptions import (
|
||||
EmptySelect,
|
||||
ErrFoundSelectableElement,
|
||||
@@ -711,13 +711,6 @@ async def get_actual_value_of_parameter_if_secret(task: Task, parameter: str) ->
|
||||
|
||||
This is only used for InputTextAction, UploadFileAction, and ClickAction (if it has a file_url).
|
||||
"""
|
||||
if task.totp_verification_url and task.organization_id and VERIFICATION_CODE_PLACEHOLDER == parameter:
|
||||
# if parameter is the secret code in the navigation playload,
|
||||
# fetch the real verification from totp_verification_url
|
||||
# do polling every 10 seconds to fetch the verification code
|
||||
verification_code = await poll_verification_code(task.task_id, task.organization_id, task.totp_verification_url)
|
||||
return verification_code
|
||||
|
||||
if task.workflow_run_id is None:
|
||||
return parameter
|
||||
|
||||
|
||||
Reference in New Issue
Block a user