add totp_code_required info to the InputTextAction json (#3415)

This commit is contained in:
Shuchang Zheng
2025-09-11 19:05:25 -07:00
committed by GitHub
parent 0e2aecc75d
commit f1aa653b82
8 changed files with 227 additions and 120 deletions

View File

@@ -164,6 +164,7 @@ class ClickAction(WebAction):
class InputTextAction(WebAction):
action_type: ActionType = ActionType.INPUT_TEXT
text: str
totp_code_required: bool = False
def __repr__(self) -> str:
return f"InputTextAction(element_id={self.element_id}, text={self.text}, context={self.input_or_select_context}, tool_call_id={self.tool_call_id})"

View File

@@ -48,7 +48,6 @@ from skyvern.exceptions import (
NoIncrementalElementFoundForAutoCompletion,
NoIncrementalElementFoundForCustomSelection,
NoSuitableAutoCompleteOption,
NoTOTPVerificationCodeFound,
OptionIndexOutOfBound,
WrongElementToUploadFile,
)
@@ -64,10 +63,7 @@ from skyvern.forge.sdk.api.files import (
from skyvern.forge.sdk.api.llm.api_handler_factory import LLMAPIHandlerFactory, LLMCallerManager
from skyvern.forge.sdk.api.llm.exceptions import LLMProviderError
from skyvern.forge.sdk.core import skyvern_context
from skyvern.forge.sdk.core.aiohttp_helper import aiohttp_post
from skyvern.forge.sdk.core.security import generate_skyvern_signature
from skyvern.forge.sdk.core.skyvern_context import ensure_context
from skyvern.forge.sdk.db.enums import OrganizationAuthTokenType
from skyvern.forge.sdk.models import Step
from skyvern.forge.sdk.schemas.tasks import Task
from skyvern.forge.sdk.services.bitwarden import BitwardenConstants
@@ -3664,106 +3660,6 @@ async def get_input_value(tag_name: str, locator: Locator) -> str | None:
return await locator.inner_text()
async def poll_verification_code(
task_id: str,
organization_id: str,
workflow_id: str | None = None,
workflow_run_id: str | None = None,
workflow_permanent_id: str | None = None,
totp_verification_url: str | None = None,
totp_identifier: str | None = None,
) -> str | None:
timeout = timedelta(minutes=settings.VERIFICATION_CODE_POLLING_TIMEOUT_MINS)
start_datetime = datetime.utcnow()
timeout_datetime = start_datetime + timeout
org_token = await app.DATABASE.get_valid_org_auth_token(organization_id, OrganizationAuthTokenType.api)
if not org_token:
LOG.error("Failed to get organization token when trying to get verification code")
return None
# wait for 40 seconds to let the verification code comes in before polling
await asyncio.sleep(settings.VERIFICATION_CODE_INITIAL_WAIT_TIME_SECS)
while True:
# check timeout
if datetime.utcnow() > timeout_datetime:
LOG.warning("Polling verification code timed out")
raise NoTOTPVerificationCodeFound(
task_id=task_id,
workflow_run_id=workflow_run_id,
workflow_id=workflow_permanent_id,
totp_verification_url=totp_verification_url,
totp_identifier=totp_identifier,
)
verification_code = None
if totp_verification_url:
verification_code = await _get_verification_code_from_url(
task_id,
totp_verification_url,
org_token.token,
workflow_run_id=workflow_run_id,
)
elif totp_identifier:
verification_code = await _get_verification_code_from_db(
task_id,
organization_id,
totp_identifier,
workflow_id=workflow_permanent_id,
workflow_run_id=workflow_run_id,
)
if verification_code:
LOG.info("Got verification code", verification_code=verification_code)
return verification_code
await asyncio.sleep(10)
async def _get_verification_code_from_url(
task_id: str,
url: str,
api_key: str,
workflow_run_id: str | None = None,
workflow_permanent_id: str | None = None,
) -> str | None:
request_data = {"task_id": task_id}
if workflow_run_id:
request_data["workflow_run_id"] = workflow_run_id
if workflow_permanent_id:
request_data["workflow_permanent_id"] = workflow_permanent_id
payload = json.dumps(request_data)
signature = generate_skyvern_signature(
payload=payload,
api_key=api_key,
)
timestamp = str(int(datetime.utcnow().timestamp()))
headers = {
"x-skyvern-timestamp": timestamp,
"x-skyvern-signature": signature,
"Content-Type": "application/json",
}
json_resp = await aiohttp_post(url=url, data=request_data, headers=headers, raise_exception=False)
return json_resp.get("verification_code", None)
async def _get_verification_code_from_db(
task_id: str,
organization_id: str,
totp_identifier: str,
workflow_id: str | None = None,
workflow_run_id: str | None = None,
) -> str | None:
totp_codes = await app.DATABASE.get_totp_codes(organization_id=organization_id, totp_identifier=totp_identifier)
for totp_code in totp_codes:
if totp_code.workflow_run_id and workflow_run_id and totp_code.workflow_run_id != workflow_run_id:
continue
if totp_code.workflow_id and workflow_id and totp_code.workflow_id != workflow_id:
continue
if totp_code.task_id and totp_code.task_id != task_id:
continue
if totp_code.expired_at and totp_code.expired_at < datetime.utcnow():
continue
return totp_code.code
return None
class AbstractActionForContextParse(BaseModel):
reasoning: str | None
element_id: str

View File

@@ -7,9 +7,11 @@ from openai.types.responses.response import Response as OpenAIResponse
from pydantic import ValidationError
from skyvern.constants import SCROLL_AMOUNT_MULTIPLIER
from skyvern.core.totp import poll_verification_code
from skyvern.exceptions import NoTOTPVerificationCodeFound, UnsupportedActionType
from skyvern.forge import app
from skyvern.forge.prompts import prompt_engine
from skyvern.forge.sdk.core import skyvern_context
from skyvern.forge.sdk.models import Step
from skyvern.forge.sdk.schemas.tasks import Task
from skyvern.utils.image_resizer import Resolution, scale_coordinates
@@ -36,13 +38,17 @@ from skyvern.webeye.actions.actions import (
VerificationCodeAction,
WaitAction,
)
from skyvern.webeye.actions.handler import poll_verification_code
from skyvern.webeye.scraper.scraper import ScrapedPage
LOG = structlog.get_logger()
def parse_action(action: Dict[str, Any], scraped_page: ScrapedPage, data_extraction_goal: str | None = None) -> Action:
def parse_action(
action: Dict[str, Any],
scraped_page: ScrapedPage,
data_extraction_goal: str | None = None,
totp_code_required: bool = False,
) -> Action:
if "id" in action:
element_id = action["id"]
elif "element_id" in action:
@@ -95,7 +101,12 @@ def parse_action(action: Dict[str, Any], scraped_page: ScrapedPage, data_extract
if context_dict and len(context_dict) > 0:
context_dict["intention"] = intention
input_or_select_context = InputOrSelectContext.model_validate(context_dict)
return InputTextAction(**base_action_dict, text=action["text"], input_or_select_context=input_or_select_context)
return InputTextAction(
**base_action_dict,
text=action["text"],
input_or_select_context=input_or_select_context,
totp_code_required=totp_code_required,
)
if action_type == ActionType.UPLOAD_FILE:
# TODO: see if the element is a file input element. if it's not, convert this action into a click action
@@ -162,10 +173,16 @@ def parse_actions(
task: Task, step_id: str, step_order: int, scraped_page: ScrapedPage, json_response: list[Dict[str, Any]]
) -> list[Action]:
actions: list[Action] = []
context = skyvern_context.ensure_context()
totp_code = context.totp_codes.get(task.task_id)
totp_code_required = bool(totp_code)
for idx, action in enumerate(json_response):
try:
action_instance = parse_action(
action=action, scraped_page=scraped_page, data_extraction_goal=task.data_extraction_goal
action=action,
scraped_page=scraped_page,
data_extraction_goal=task.data_extraction_goal,
totp_code_required=totp_code_required,
)
action_instance.organization_id = task.organization_id
action_instance.workflow_run_id = task.workflow_run_id
@@ -793,8 +810,8 @@ async def generate_cua_fallback_actions(
)
try:
verification_code = await poll_verification_code(
task.task_id,
task.organization_id,
organization_id=task.organization_id,
task_id=task.task_id,
workflow_run_id=task.workflow_run_id,
totp_verification_url=task.totp_verification_url,
totp_identifier=task.totp_identifier,