verification code V2 - support verification code of multiple separate single character input fields (#683)

Co-authored-by: Shuchang Zheng <wintonzheng0325@gmail.com>
This commit is contained in:
Kerem Yilmaz
2024-08-08 02:17:15 +03:00
committed by GitHub
parent 78adb8b276
commit c872b1e4a8
5 changed files with 94 additions and 50 deletions

View File

@@ -11,7 +11,6 @@ PAGE_CONTENT_TIMEOUT = 300 # 5 mins
# reserved fields for navigation payload
SPECIAL_FIELD_VERIFICATION_CODE = "verification_code"
VERIFICATION_CODE_PLACEHOLDER = "REAL_TOTP_CODE"
VERIFICATION_CODE_POLLING_TIMEOUT_MINS = 10

View File

@@ -11,12 +11,7 @@ from playwright._impl._errors import TargetClosedError
from playwright.async_api import Page
from skyvern import analytics
from skyvern.constants import (
SCRAPE_TYPE_ORDER,
SPECIAL_FIELD_VERIFICATION_CODE,
VERIFICATION_CODE_PLACEHOLDER,
ScrapeType,
)
from skyvern.constants import SCRAPE_TYPE_ORDER, SPECIAL_FIELD_VERIFICATION_CODE, ScrapeType
from skyvern.exceptions import (
BrowserStateMissingPage,
EmptyScrapePage,
@@ -53,7 +48,7 @@ from skyvern.webeye.actions.actions import (
WebAction,
parse_actions,
)
from skyvern.webeye.actions.handler import ActionHandler
from skyvern.webeye.actions.handler import ActionHandler, poll_verification_code
from skyvern.webeye.actions.models import AgentStepOutput, DetailedAgentStepOutput
from skyvern.webeye.actions.responses import ActionResult
from skyvern.webeye.browser_factory import BrowserState
@@ -548,6 +543,13 @@ class ForgeAgent:
step=step,
screenshots=scraped_page.screenshots,
)
json_response = await self.handle_potential_verification_code(
task,
step,
scraped_page,
browser_state,
json_response,
)
detailed_agent_step_output.llm_response = json_response
actions = parse_actions(task, json_response["actions"])
@@ -951,16 +953,6 @@ class ForgeAgent:
num_elements=len(scraped_page.elements),
url=task.url,
)
actions_and_results_str = await self._get_action_results(task)
# Generate the extract action prompt
navigation_goal = task.navigation_goal
starting_url = task.url
current_url = (
await browser_state.page.evaluate("() => document.location.href") if browser_state.page else starting_url
)
# TODO: we only use HTML element for now, introduce a way to switch in the future
element_tree_format = ElementTreeFormat.HTML
LOG.info(
@@ -971,18 +963,12 @@ class ForgeAgent:
)
element_tree_in_prompt: str = scraped_page.build_element_tree(element_tree_format)
final_navigation_payload = self._build_navigation_payload(task)
extract_action_prompt = prompt_engine.load_prompt(
"extract-action",
navigation_goal=navigation_goal,
navigation_payload_str=json.dumps(final_navigation_payload),
starting_url=starting_url,
current_url=current_url,
elements=element_tree_in_prompt,
data_extraction_goal=task.data_extraction_goal,
action_history=actions_and_results_str,
error_code_mapping_str=(json.dumps(task.error_code_mapping) if task.error_code_mapping else None),
utc_datetime=datetime.utcnow().strftime("%Y-%m-%d %H:%M"),
extract_action_prompt = await self._build_extract_action_prompt(
task,
browser_state,
element_tree_in_prompt,
verification_code_check=bool(task.totp_verification_url),
expire_verification_code=True,
)
await app.ARTIFACT_MANAGER.create_artifact(
@@ -1013,26 +999,62 @@ class ForgeAgent:
return scraped_page, extract_action_prompt
async def _build_extract_action_prompt(
self,
task: Task,
browser_state: BrowserState,
element_tree_in_prompt: str,
verification_code_check: bool = False,
expire_verification_code: bool = False,
) -> str:
actions_and_results_str = await self._get_action_results(task)
# Generate the extract action prompt
navigation_goal = task.navigation_goal
starting_url = task.url
current_url = (
await browser_state.page.evaluate("() => document.location.href") if browser_state.page else starting_url
)
final_navigation_payload = self._build_navigation_payload(
task, expire_verification_code=expire_verification_code
)
return prompt_engine.load_prompt(
"extract-action",
navigation_goal=navigation_goal,
navigation_payload_str=json.dumps(final_navigation_payload),
starting_url=starting_url,
current_url=current_url,
elements=element_tree_in_prompt,
data_extraction_goal=task.data_extraction_goal,
action_history=actions_and_results_str,
error_code_mapping_str=(json.dumps(task.error_code_mapping) if task.error_code_mapping else None),
utc_datetime=datetime.utcnow().strftime("%Y-%m-%d %H:%M"),
verification_code_check=verification_code_check,
)
def _build_navigation_payload(
self,
task: Task,
expire_verification_code: bool = False,
) -> dict[str, Any] | list | str | None:
final_navigation_payload = task.navigation_payload
if task.totp_verification_url:
current_context = skyvern_context.ensure_context()
verification_code = current_context.totp_codes.get(task.task_id)
if task.totp_verification_url and verification_code:
if (
isinstance(final_navigation_payload, dict)
and SPECIAL_FIELD_VERIFICATION_CODE not in final_navigation_payload
):
final_navigation_payload[SPECIAL_FIELD_VERIFICATION_CODE] = VERIFICATION_CODE_PLACEHOLDER
final_navigation_payload[SPECIAL_FIELD_VERIFICATION_CODE] = verification_code
elif (
isinstance(final_navigation_payload, str)
and SPECIAL_FIELD_VERIFICATION_CODE not in final_navigation_payload
):
final_navigation_payload = (
final_navigation_payload
+ "\n"
+ str({SPECIAL_FIELD_VERIFICATION_CODE: VERIFICATION_CODE_PLACEHOLDER})
final_navigation_payload + "\n" + str({SPECIAL_FIELD_VERIFICATION_CODE: verification_code})
)
if expire_verification_code:
current_context.totp_codes.pop(task.task_id)
return final_navigation_payload
async def _get_action_results(self, task: Task) -> str:
@@ -1552,6 +1574,40 @@ class ForgeAgent:
)
return None, None, next_step
async def handle_potential_verification_code(
self,
task: Task,
step: Step,
scraped_page: ScrapedPage,
browser_state: BrowserState,
json_response: dict[str, Any],
) -> dict[str, Any]:
# TODO: handle verifications and resend the request if needed
# parse the "need_verification_code" field from the response
need_verification_code = json_response.get("need_verification_code")
if need_verification_code and task.totp_verification_url and task.organization_id:
LOG.info("Need verification code", step_id=step.step_id)
verification_code = await poll_verification_code(
task.task_id, task.organization_id, url=task.totp_verification_url
)
current_context = skyvern_context.ensure_context()
current_context.totp_codes[task.task_id] = verification_code
element_tree_in_prompt: str = scraped_page.build_element_tree(ElementTreeFormat.HTML)
extract_action_prompt = await self._build_extract_action_prompt(
task,
browser_state,
element_tree_in_prompt,
verification_code_check=False,
expire_verification_code=False,
)
return await app.LLM_API_HANDLER(
prompt=extract_action_prompt,
step=step,
screenshots=scraped_page.screenshots,
)
return json_response
@staticmethod
async def get_task_errors(task: Task) -> list[UserDefinedError]:
steps = await app.DATABASE.get_task_steps(task_id=task.task_id, organization_id=task.organization_id)

View File

@@ -9,6 +9,7 @@ If you see a popup in the page screenshot, prioritize actions on the popup.
Reply in JSON format with the following keys:
{
{% if verification_code_check %} "need_verification_code": bool, // Whether a verification code is needed to proceed.{% endif %}
"user_goal_achieved": str, // A string that describes if user goal has been completed with reasoning.
"action_plan": str, // A string that describes the plan of actions you're going to take. Be specific and to the point. Use this as a quick summary of the actions you're going to take, and what order you're going to take them in, and how that moves you towards your overall goal. Output "COMPLETE" action in the "actions" if user goal has been achieved.
"actions": array // An array of actions. Here's the format of each action:
@@ -36,10 +37,8 @@ Reply in JSON format with the following keys:
}],
}
{% if action_history %}
Consider the action history from the last step and the screenshot together, if actions from the last step don't yield positive impact, try other actions or other action combinations.
{% endif %}
Clickable elements from `{{ current_url }}`:
```
{{ elements }}
@@ -52,12 +51,10 @@ User goal:
{{ navigation_goal }}
```
{% if error_code_mapping_str %}
Use the error codes and their descriptions to surface user-defined errors. Do not return any error that's not defined by the user. User defined errors:
{{ error_code_mapping_str }}
{% endif %}
{% if data_extraction_goal %}
User Data Extraction Goal:
```
{{ data_extraction_goal }}
@@ -69,11 +66,9 @@ User details:
{{ navigation_payload_str }}
```
{% if action_history %}
Action results from previous steps: (note: even if the action history suggests goal is achieved, check the screenshot and the DOM elements to make sure the goal is achieved)
{{ action_history }}
{% endif %}
Current datetime in UTC, YYYY-MM-DD HH:MM format:
```
{{ utc_datetime }}

View File

@@ -1,5 +1,5 @@
from contextvars import ContextVar
from dataclasses import dataclass
from dataclasses import dataclass, field
@dataclass
@@ -10,6 +10,7 @@ class SkyvernContext:
workflow_id: str | None = None
workflow_run_id: str | None = None
max_steps_override: int | None = None
totp_codes: dict[str, str | None] = field(default_factory=dict)
def __repr__(self) -> str:
return f"SkyvernContext(request_id={self.request_id}, organization_id={self.organization_id}, task_id={self.task_id}, workflow_id={self.workflow_id}, workflow_run_id={self.workflow_run_id}, max_steps_override={self.max_steps_override})"

View File

@@ -10,7 +10,7 @@ import structlog
from deprecation import deprecated
from playwright.async_api import FileChooser, Locator, Page, TimeoutError
from skyvern.constants import REPO_ROOT_DIR, VERIFICATION_CODE_PLACEHOLDER, VERIFICATION_CODE_POLLING_TIMEOUT_MINS
from skyvern.constants import REPO_ROOT_DIR, VERIFICATION_CODE_POLLING_TIMEOUT_MINS
from skyvern.exceptions import (
EmptySelect,
ErrFoundSelectableElement,
@@ -711,13 +711,6 @@ async def get_actual_value_of_parameter_if_secret(task: Task, parameter: str) ->
This is only used for InputTextAction, UploadFileAction, and ClickAction (if it has a file_url).
"""
if task.totp_verification_url and task.organization_id and VERIFICATION_CODE_PLACEHOLDER == parameter:
# if parameter is the secret code in the navigation playload,
# fetch the real verification from totp_verification_url
# do polling every 10 seconds to fetch the verification code
verification_code = await poll_verification_code(task.task_id, task.organization_id, task.totp_verification_url)
return verification_code
if task.workflow_run_id is None:
return parameter