diff --git a/alembic/versions/2025_10_14_0819-774e10939484_add_magic_link_type_otp.py b/alembic/versions/2025_10_14_0819-774e10939484_add_magic_link_type_otp.py new file mode 100644 index 00000000..b8294546 --- /dev/null +++ b/alembic/versions/2025_10_14_0819-774e10939484_add_magic_link_type_otp.py @@ -0,0 +1,33 @@ +"""add magic link type otp + +Revision ID: 774e10939484 +Revises: d648e2df239e +Create Date: 2025-10-14 08:19:23.888067+00:00 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "774e10939484" +down_revision: Union[str, None] = "d648e2df239e" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("totp_codes", sa.Column("otp_type", sa.String(), server_default=sa.text("'totp'"), nullable=True)) + op.create_index("ix_totp_codes_otp_type", "totp_codes", ["organization_id", "otp_type"], unique=False) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index("ix_totp_codes_otp_type", table_name="totp_codes") + op.drop_column("totp_codes", "otp_type") + # ### end Alembic commands ### diff --git a/skyvern-frontend/src/api/types.ts b/skyvern-frontend/src/api/types.ts index 6398aa2a..eb4aa4f0 100644 --- a/skyvern-frontend/src/api/types.ts +++ b/skyvern-frontend/src/api/types.ts @@ -243,6 +243,7 @@ export const ActionTypes = { VerificationCode: "verification_code", Drag: "drag", LeftMouse: "left_mouse", + GotoUrl: "goto_url", } as const; export type ActionType = (typeof ActionTypes)[keyof typeof ActionTypes]; @@ -267,6 +268,7 @@ export const ReadableActionTypes: { verification_code: "Verification Code", drag: "Drag", left_mouse: "Left Mouse", + goto_url: "Goto URL", }; export type Option = { diff --git a/skyvern/core/script_generations/skyvern_page.py b/skyvern/core/script_generations/skyvern_page.py index 47b9d636..957bc159 100644 --- a/skyvern/core/script_generations/skyvern_page.py +++ b/skyvern/core/script_generations/skyvern_page.py @@ -14,13 +14,14 @@ from playwright.async_api import Page from skyvern.config import settings from skyvern.constants import SPECIAL_FIELD_VERIFICATION_CODE -from skyvern.core.totp import poll_verification_code from skyvern.exceptions import WorkflowRunNotFound from skyvern.forge import app from skyvern.forge.prompts import prompt_engine from skyvern.forge.sdk.api.files import download_file from skyvern.forge.sdk.artifact.models import ArtifactType from skyvern.forge.sdk.core import skyvern_context +from skyvern.forge.sdk.schemas.totp_codes import OTPType +from skyvern.services.otp_service import poll_otp_value from skyvern.utils.prompt_engine import load_prompt_with_elements from skyvern.webeye.actions import handler_utils from skyvern.webeye.actions.action_types import ActionType @@ -597,14 +598,15 @@ class SkyvernPage: totp_identifier = _render_template_with_label(totp_identifier, label=self.current_label) if totp_url: totp_url = _render_template_with_label(totp_url, label=self.current_label) - verification_code = await poll_verification_code( + otp_value = await poll_otp_value( organization_id=organization_id, task_id=task_id, workflow_run_id=workflow_run_id, totp_identifier=totp_identifier, totp_verification_url=totp_url, ) - if verification_code: + if otp_value and otp_value.get_otp_type() == OTPType.TOTP: + verification_code = otp_value.value if isinstance(data, dict) and SPECIAL_FIELD_VERIFICATION_CODE not in data: data[SPECIAL_FIELD_VERIFICATION_CODE] = verification_code elif isinstance(data, str) and SPECIAL_FIELD_VERIFICATION_CODE not in data: diff --git a/skyvern/core/totp.py b/skyvern/core/totp.py index aea88f43..49cf3515 100644 --- a/skyvern/core/totp.py +++ b/skyvern/core/totp.py @@ -10,6 +10,7 @@ from skyvern.forge import app from skyvern.forge.sdk.core.aiohttp_helper import aiohttp_post from skyvern.forge.sdk.core.security import generate_skyvern_signature from skyvern.forge.sdk.db.enums import OrganizationAuthTokenType +from skyvern.forge.sdk.schemas.totp_codes import OTPType LOG = structlog.get_logger() @@ -117,7 +118,9 @@ async def _get_verification_code_from_db( workflow_id: str | None = None, workflow_run_id: str | None = None, ) -> str | None: - totp_codes = await app.DATABASE.get_totp_codes(organization_id=organization_id, totp_identifier=totp_identifier) + totp_codes = await app.DATABASE.get_otp_codes( + organization_id=organization_id, totp_identifier=totp_identifier, otp_type=OTPType.TOTP + ) for totp_code in totp_codes: if totp_code.workflow_run_id and workflow_run_id and totp_code.workflow_run_id != workflow_run_id: continue diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index 31fc3b6a..ce5402d7 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -27,7 +27,6 @@ from skyvern.constants import ( SPECIAL_FIELD_VERIFICATION_CODE, ScrapeType, ) -from skyvern.core.totp import poll_verification_code from skyvern.errors.errors import ( GetTOTPVerificationCodeError, ReachMaxRetriesError, @@ -82,6 +81,7 @@ from skyvern.forge.sdk.models import Step, StepStatus from skyvern.forge.sdk.schemas.files import FileInfo from skyvern.forge.sdk.schemas.organizations import Organization from skyvern.forge.sdk.schemas.tasks import Task, TaskRequest, TaskResponse, TaskStatus +from skyvern.forge.sdk.schemas.totp_codes import OTPType from skyvern.forge.sdk.trace import TraceManager from skyvern.forge.sdk.trace.experiment_utils import collect_experiment_metadata_safely from skyvern.forge.sdk.workflow.context_manager import WorkflowRunContext @@ -91,6 +91,7 @@ from skyvern.schemas.runs import CUA_ENGINES, RunEngine from skyvern.schemas.steps import AgentStepOutput from skyvern.services import run_service, service_utils from skyvern.services.action_service import get_action_history +from skyvern.services.otp_service import poll_otp_value from skyvern.utils.image_resizer import Resolution from skyvern.utils.prompt_engine import MaxStepsReasonResponse, load_prompt_with_elements from skyvern.webeye.actions.action_types import ActionType @@ -101,6 +102,7 @@ from skyvern.webeye.actions.actions import ( CompleteVerifyResult, DecisiveAction, ExtractAction, + GotoUrlAction, ReloadPageAction, TerminateAction, WebAction, @@ -1030,15 +1032,17 @@ class ForgeAgent: screenshots=scraped_page.screenshots, ) try: - json_response = await self.handle_potential_verification_code( - task, - step, - scraped_page, - browser_state, - json_response, + otp_json_response, otp_actions = await self.handle_potential_OTP_actions( + task, step, scraped_page, browser_state, json_response ) - detailed_agent_step_output.llm_response = json_response - actions = parse_actions(task, step.step_id, step.order, scraped_page, json_response["actions"]) + if otp_actions: + detailed_agent_step_output.llm_response = otp_json_response + actions = otp_actions + else: + actions = parse_actions( + task, step.step_id, step.order, scraped_page, json_response["actions"] + ) + if context: context.pop_totp_code(task.task_id) except NoTOTPVerificationCodeFound: @@ -3215,6 +3219,83 @@ class ForgeAgent: ) return None, None, next_step + async def handle_potential_OTP_actions( + self, + task: Task, + step: Step, + scraped_page: ScrapedPage, + browser_state: BrowserState, + json_response: dict[str, Any], + ) -> tuple[dict[str, Any], list[Action]]: + if not task.organization_id: + return json_response, [] + + if not task.totp_verification_url and not task.totp_identifier: + return json_response, [] + + should_verify_by_magic_link = json_response.get("should_verify_by_magic_link") + place_to_enter_verification_code = json_response.get("place_to_enter_verification_code") + should_enter_verification_code = json_response.get("should_enter_verification_code") + + if ( + not should_verify_by_magic_link + and not place_to_enter_verification_code + and not should_enter_verification_code + ): + return json_response, [] + + if place_to_enter_verification_code and should_enter_verification_code: + json_response = await self.handle_potential_verification_code( + task, step, scraped_page, browser_state, json_response + ) + actions = parse_actions(task, step.step_id, step.order, scraped_page, json_response["actions"]) + return json_response, actions + + if should_verify_by_magic_link: + actions = await self.handle_potential_magic_link(task, step, scraped_page, browser_state, json_response) + return json_response, actions + + return json_response, [] + + async def handle_potential_magic_link( + self, + task: Task, + step: Step, + scraped_page: ScrapedPage, + browser_state: BrowserState, + json_response: dict[str, Any], + ) -> list[Action]: + should_verify_by_magic_link = json_response.get("should_verify_by_magic_link") + if not should_verify_by_magic_link: + return [] + + LOG.info("Handling magic link verification", task_id=task.task_id) + otp_value = await poll_otp_value( + organization_id=task.organization_id, + task_id=task.task_id, + workflow_run_id=task.workflow_run_id, + totp_verification_url=task.totp_verification_url, + totp_identifier=task.totp_identifier, + ) + if not otp_value or otp_value.get_otp_type() != OTPType.MAGIC_LINK: + return [] + + # TODO: not sure whether all magic links can directly login + navigate to the homepage + return [ + GotoUrlAction( + action_type=ActionType.GOTO_URL, + reasoning="Navigating to the magic link URL to verify the login", + intention="Navigating to the magic link URL to verify the login", + url=otp_value.value, + organization_id=task.organization_id, + workflow_run_id=task.workflow_run_id, + task_id=task.task_id, + step_id=step.step_id, + step_order=step.order, + action_order=0, + ), + ] + async def handle_potential_verification_code( self, task: Task, @@ -3238,7 +3319,7 @@ class ForgeAgent: if workflow_run: workflow_id = workflow_run.workflow_id workflow_permanent_id = workflow_run.workflow_permanent_id - verification_code = await poll_verification_code( + otp_value = await poll_otp_value( organization_id=task.organization_id, task_id=task.task_id, workflow_id=workflow_id, @@ -3247,8 +3328,11 @@ class ForgeAgent: totp_verification_url=task.totp_verification_url, totp_identifier=task.totp_identifier, ) + if not otp_value or otp_value.get_otp_type() != OTPType.TOTP: + return json_response + current_context = skyvern_context.ensure_context() - current_context.totp_codes[task.task_id] = verification_code + current_context.totp_codes[task.task_id] = otp_value.value extract_action_prompt, use_caching = await self._build_extract_action_prompt( task, diff --git a/skyvern/forge/prompts/skyvern/extract-action.j2 b/skyvern/forge/prompts/skyvern/extract-action.j2 index 0f859852..8bf9a874 100644 --- a/skyvern/forge/prompts/skyvern/extract-action.j2 +++ b/skyvern/forge/prompts/skyvern/extract-action.j2 @@ -39,7 +39,8 @@ Reply in JSON format with the following keys: }],{% if verification_code_check %} "verification_code_reasoning": str, // Let's think step by step. Describe what you see and think if there is somewhere on the current page where you must enter the verification code now for login or any verification step. Explain why you believe a verification code needs to be entered somewhere or not. Do not imagine any place to enter the code if the code has not been sent yet. "place_to_enter_verification_code": bool, // Whether there is a place on the current page to enter the verification code now. - "should_enter_verification_code": bool // Whether the user should proceed to enter the verification code {% endif %} + "should_enter_verification_code": bool, // Whether the user should proceed to enter the verification code. + "should_verify_by_magic_link": bool // Whether the page instructs the user to check their email for a magic link to verify the login.{% endif %} } Consider the action history from the last step and the screenshot together, if actions from the last step don't yield positive impact, try other actions or other action combinations. diff --git a/skyvern/forge/prompts/skyvern/parse-otp-login.j2 b/skyvern/forge/prompts/skyvern/parse-otp-login.j2 new file mode 100644 index 00000000..033681a1 --- /dev/null +++ b/skyvern/forge/prompts/skyvern/parse-otp-login.j2 @@ -0,0 +1,20 @@ +You receive either an email or a text message containing an OTP(like TOTP, Magic Link) to verify the login. Your job is to parse the content, identify the OTP type and value. There should be only one OTP type and one OTP value in the content. The value must be from the content + +You should follow the rules below to identify the OTP type and value: +- If it's a Magic Link login, the value is usually a link which must be a valid HTTP or HTTPS URL. +- If it's a TOTP login, The most common value is a code which is a series of digits, although sometimes it may contain letters. + +MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments (//), no unnecessary quotes, etc. + +Reply in the following JSON format: +{ + "reasoning": str, // How you figure out what the OTP type and value is or why the OTP type and value is missing. Be precise here to explain the data source and the context that makes you believe where the correct OTP type and value is + "otp_type": str, // the type of OTP. It can be "magic_link" or "totp" + "otp_value_found": bool, // true if the OTP value is found. false if the OTP value is not found + "otp_value": str, // the OTP value. If you cannot identify any OTP value, do not come up with a OTP value and return null +} + +Received Content containing OTP: +``` +{{ content }} +``` diff --git a/skyvern/forge/sdk/db/client.py b/skyvern/forge/sdk/db/client.py index f026bb2f..14901c05 100644 --- a/skyvern/forge/sdk/db/client.py +++ b/skyvern/forge/sdk/db/client.py @@ -91,7 +91,7 @@ from skyvern.forge.sdk.schemas.runs import Run from skyvern.forge.sdk.schemas.task_generations import TaskGeneration from skyvern.forge.sdk.schemas.task_v2 import TaskV2, TaskV2Status, Thought, ThoughtType from skyvern.forge.sdk.schemas.tasks import OrderBy, SortDirection, Task, TaskStatus -from skyvern.forge.sdk.schemas.totp_codes import TOTPCode +from skyvern.forge.sdk.schemas.totp_codes import OTPType, TOTPCode from skyvern.forge.sdk.schemas.workflow_runs import WorkflowRunBlock from skyvern.forge.sdk.workflow.models.parameter import ( AWSSecretParameter, @@ -2610,11 +2610,12 @@ class AgentDB: return None return TaskGeneration.model_validate(task_generation) - async def get_totp_codes( + async def get_otp_codes( self, organization_id: str, totp_identifier: str, valid_lifespan_minutes: int = settings.TOTP_LIFESPAN_MINUTES, + otp_type: OTPType | None = None, ) -> list[TOTPCode]: """ 1. filter by: @@ -2634,17 +2635,20 @@ class AgentDB: .filter_by(organization_id=organization_id) .filter_by(totp_identifier=totp_identifier) .filter(TOTPCodeModel.created_at > datetime.utcnow() - timedelta(minutes=valid_lifespan_minutes)) - .order_by(asc(all_null), TOTPCodeModel.created_at.desc()) ) + if otp_type: + query = query.filter(TOTPCodeModel.otp_type == otp_type) + query = query.order_by(asc(all_null), TOTPCodeModel.created_at.desc()) totp_code = (await session.scalars(query)).all() return [TOTPCode.model_validate(totp_code) for totp_code in totp_code] - async def create_totp_code( + async def create_otp_code( self, organization_id: str, totp_identifier: str, content: str, code: str, + otp_type: OTPType, task_id: str | None = None, workflow_id: str | None = None, workflow_run_id: str | None = None, @@ -2662,6 +2666,7 @@ class AgentDB: workflow_run_id=workflow_run_id, source=source, expired_at=expired_at, + otp_type=otp_type, ) session.add(new_totp_code) await session.commit() diff --git a/skyvern/forge/sdk/db/id.py b/skyvern/forge/sdk/db/id.py index 48a92f75..e291b2e4 100644 --- a/skyvern/forge/sdk/db/id.py +++ b/skyvern/forge/sdk/db/id.py @@ -54,7 +54,7 @@ STEP_PREFIX = "stp" TASK_GENERATION_PREFIX = "tg" TASK_PREFIX = "tsk" TASK_RUN_PREFIX = "tr" -TOTP_CODE_PREFIX = "totp" +OTP_CODE_PREFIX = "otp" USER_PREFIX = "u" WORKFLOW_PARAMETER_PREFIX = "wp" WORKFLOW_PERMANENT_ID_PREFIX = "wpid" @@ -169,9 +169,9 @@ def generate_ai_suggestion_id() -> str: return f"{AI_SUGGESTION_PREFIX}_{int_id}" -def generate_totp_code_id() -> str: +def generate_otp_code_id() -> str: int_id = generate_id() - return f"{TOTP_CODE_PREFIX}_{int_id}" + return f"{OTP_CODE_PREFIX}_{int_id}" def generate_action_id() -> str: diff --git a/skyvern/forge/sdk/db/models.py b/skyvern/forge/sdk/db/models.py index 4d82a3a2..4d212fdb 100644 --- a/skyvern/forge/sdk/db/models.py +++ b/skyvern/forge/sdk/db/models.py @@ -35,6 +35,7 @@ from skyvern.forge.sdk.db.id import ( generate_org_id, generate_organization_auth_token_id, generate_organization_bitwarden_collection_id, + generate_otp_code_id, generate_output_parameter_id, generate_persistent_browser_session_id, generate_script_block_id, @@ -47,7 +48,6 @@ from skyvern.forge.sdk.db.id import ( generate_task_run_id, generate_task_v2_id, generate_thought_id, - generate_totp_code_id, generate_workflow_id, generate_workflow_parameter_id, generate_workflow_permanent_id, @@ -579,9 +579,12 @@ class AISuggestionModel(Base): class TOTPCodeModel(Base): __tablename__ = "totp_codes" - __table_args__ = (Index("ix_totp_codes_org_created_at", "organization_id", "created_at"),) + __table_args__ = ( + Index("ix_totp_codes_org_created_at", "organization_id", "created_at"), + Index("ix_totp_codes_otp_type", "organization_id", "otp_type"), + ) - totp_code_id = Column(String, primary_key=True, default=generate_totp_code_id) + totp_code_id = Column(String, primary_key=True, default=generate_otp_code_id) totp_identifier = Column(String, nullable=False, index=True) organization_id = Column(String, ForeignKey("organizations.organization_id")) task_id = Column(String, ForeignKey("tasks.task_id")) @@ -593,6 +596,7 @@ class TOTPCodeModel(Base): created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False, index=True) modified_at = Column(DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow, nullable=False) expired_at = Column(DateTime, index=True) + otp_type = Column(String, server_default=sqlalchemy.text("'totp'")) class ActionModel(Base): diff --git a/skyvern/forge/sdk/db/utils.py b/skyvern/forge/sdk/db/utils.py index df852472..0ca4b5d8 100644 --- a/skyvern/forge/sdk/db/utils.py +++ b/skyvern/forge/sdk/db/utils.py @@ -67,6 +67,7 @@ from skyvern.webeye.actions.actions import ( DownloadFileAction, DragAction, ExtractAction, + GotoUrlAction, InputTextAction, KeypressAction, LeftMouseAction, @@ -105,6 +106,7 @@ ACTION_TYPE_TO_CLASS = { ActionType.DRAG: DragAction, ActionType.VERIFICATION_CODE: VerificationCodeAction, ActionType.LEFT_MOUSE: LeftMouseAction, + ActionType.GOTO_URL: GotoUrlAction, } diff --git a/skyvern/forge/sdk/routes/credentials.py b/skyvern/forge/sdk/routes/credentials.py index b82f8db1..4bb241c5 100644 --- a/skyvern/forge/sdk/routes/credentials.py +++ b/skyvern/forge/sdk/routes/credentials.py @@ -29,10 +29,11 @@ from skyvern.forge.sdk.schemas.organizations import ( CreateOnePasswordTokenResponse, Organization, ) -from skyvern.forge.sdk.schemas.totp_codes import TOTPCode, TOTPCodeCreate +from skyvern.forge.sdk.schemas.totp_codes import OTPType, TOTPCode, TOTPCodeCreate from skyvern.forge.sdk.services import org_auth_service from skyvern.forge.sdk.services.bitwarden import BitwardenService from skyvern.forge.sdk.services.credential.credential_vault_service import CredentialVaultService +from skyvern.services.otp_service import OTPValue, parse_otp_login LOG = structlog.get_logger() @@ -59,6 +60,58 @@ async def parse_totp_code(content: str, organization_id: str) -> str | None: return code_resp.get("code", None) +@legacy_base_router.post("/otp") +@legacy_base_router.post("/otp/", include_in_schema=False) +@base_router.post( + "/credentials/otp", + response_model=TOTPCode, + summary="Send OTP content", + description="Forward a OTP (TOTP, Magic Link) email or sms message containing otp login data to Skyvern. This endpoint stores the otp login data in database so that Skyvern can use it while running tasks/workflows.", + tags=["Credentials"], + openapi_extra={ + "x-fern-sdk-method-name": "send_otp_content", + }, +) +@base_router.post( + "/credentials/otp/", + response_model=TOTPCode, + include_in_schema=False, +) +async def send_otp_content( + data: TOTPCodeCreate, + curr_org: Organization = Depends(org_auth_service.get_current_org), +) -> TOTPCode: + content = data.content.strip() + otp_value: OTPValue | None = OTPValue(value=content, type=OTPType.TOTP) + # We assume the user is sending the code directly when the length of code is less than or equal to 10 + if len(content) > 10: + otp_value = await parse_otp_login(content, curr_org.organization_id) + + if not otp_value: + LOG.error( + "Failed to parse otp login", + totp_identifier=data.totp_identifier, + task_id=data.task_id, + workflow_id=data.workflow_id, + workflow_run_id=data.workflow_run_id, + content=data.content, + ) + raise HTTPException(status_code=400, detail="Failed to parse otp login") + + return await app.DATABASE.create_otp_code( + organization_id=curr_org.organization_id, + totp_identifier=data.totp_identifier, + content=data.content, + code=otp_value.value, + task_id=data.task_id, + workflow_id=data.workflow_id, + workflow_run_id=data.workflow_run_id, + source=data.source, + expired_at=data.expired_at, + otp_type=otp_value.get_otp_type(), + ) + + @legacy_base_router.post("/totp") @legacy_base_router.post("/totp/", include_in_schema=False) @base_router.post( @@ -104,7 +157,7 @@ async def send_totp_code( content=data.content, ) raise HTTPException(status_code=400, detail="Failed to parse totp code") - return await app.DATABASE.create_totp_code( + return await app.DATABASE.create_otp_code( organization_id=curr_org.organization_id, totp_identifier=data.totp_identifier, content=data.content, @@ -114,6 +167,7 @@ async def send_totp_code( workflow_run_id=data.workflow_run_id, source=data.source, expired_at=data.expired_at, + otp_type=OTPType.TOTP, ) diff --git a/skyvern/forge/sdk/schemas/totp_codes.py b/skyvern/forge/sdk/schemas/totp_codes.py index 1c7bee77..238e4f45 100644 --- a/skyvern/forge/sdk/schemas/totp_codes.py +++ b/skyvern/forge/sdk/schemas/totp_codes.py @@ -1,4 +1,5 @@ from datetime import datetime +from enum import StrEnum from pydantic import BaseModel, ConfigDict, Field, field_validator @@ -65,9 +66,15 @@ class TOTPCodeCreate(TOTPCodeBase): return sanitize_postgres_text(value) +class OTPType(StrEnum): + TOTP = "totp" + MAGIC_LINK = "magic_link" + + class TOTPCode(TOTPCodeCreate): totp_code_id: str = Field(..., description="The skyvern ID of the TOTP code.") code: str = Field(..., description="The TOTP code extracted from the content.") organization_id: str = Field(..., description="The ID of the organization that the TOTP code is for.") created_at: datetime = Field(..., description="The timestamp when the TOTP code was created.") modified_at: datetime = Field(..., description="The timestamp when the TOTP code was modified.") + otp_type: OTPType | None = Field(None, description="The type of the OTP code.") diff --git a/skyvern/services/otp_service.py b/skyvern/services/otp_service.py new file mode 100644 index 00000000..7f76e9e8 --- /dev/null +++ b/skyvern/services/otp_service.py @@ -0,0 +1,173 @@ +import asyncio +import json +from datetime import datetime, timedelta + +import structlog +from pydantic import BaseModel, Field + +from skyvern.config import settings +from skyvern.exceptions import FailedToGetTOTPVerificationCode, NoTOTPVerificationCodeFound +from skyvern.forge import app +from skyvern.forge.prompts import prompt_engine +from skyvern.forge.sdk.core.aiohttp_helper import aiohttp_post +from skyvern.forge.sdk.core.security import generate_skyvern_signature +from skyvern.forge.sdk.db.enums import OrganizationAuthTokenType +from skyvern.forge.sdk.schemas.totp_codes import OTPType + +LOG = structlog.get_logger() + + +class OTPValue(BaseModel): + value: str = Field(..., description="The value of the OTP code.") + type: OTPType | None = Field(None, description="The type of the OTP code.") + + def get_otp_type(self) -> OTPType: + if self.type: + return self.type + value = self.value.strip().lower() + if value.startswith("https://") or value.startswith("http://"): + return OTPType.MAGIC_LINK + return OTPType.TOTP + + +class OTPResultParsedByLLM(BaseModel): + reasoning: str = Field(..., description="The reasoning of the OTP code.") + otp_type: OTPType | None = Field(None, description="The type of the OTP code.") + otp_value_found: bool = Field(..., description="Whether the OTP value is found.") + otp_value: str | None = Field(None, description="The OTP value.") + + +async def parse_otp_login(content: str, organization_id: str) -> OTPValue | None: + prompt = prompt_engine.load_prompt("parse-otp-login", content=content) + resp = await app.SECONDARY_LLM_API_HANDLER( + prompt=prompt, prompt_name="parse-otp-login", organization_id=organization_id + ) + LOG.info("OTP Login Parser Response", resp=resp) + otp_result = OTPResultParsedByLLM.model_validate(resp) + if otp_result.otp_value_found and otp_result.otp_value: + return OTPValue(value=otp_result.otp_value, type=otp_result.otp_type) + return None + + +async def poll_otp_value( + organization_id: str, + task_id: str | None = None, + workflow_id: str | None = None, + workflow_run_id: str | None = None, + workflow_permanent_id: str | None = None, + totp_verification_url: str | None = None, + totp_identifier: str | None = None, +) -> OTPValue | None: + timeout = timedelta(minutes=settings.VERIFICATION_CODE_POLLING_TIMEOUT_MINS) + start_datetime = datetime.utcnow() + timeout_datetime = start_datetime + timeout + org_token = await app.DATABASE.get_valid_org_auth_token(organization_id, OrganizationAuthTokenType.api.value) + if not org_token: + LOG.error("Failed to get organization token when trying to get otp value") + return None + LOG.info( + "Polling otp value", + task_id=task_id, + workflow_run_id=workflow_run_id, + workflow_permanent_id=workflow_permanent_id, + totp_verification_url=totp_verification_url, + totp_identifier=totp_identifier, + ) + while True: + await asyncio.sleep(10) + # check timeout + if datetime.utcnow() > timeout_datetime: + LOG.warning("Polling otp value timed out") + raise NoTOTPVerificationCodeFound( + task_id=task_id, + workflow_run_id=workflow_run_id, + workflow_id=workflow_permanent_id, + totp_verification_url=totp_verification_url, + totp_identifier=totp_identifier, + ) + otp_value: OTPValue | None = None + if totp_verification_url: + otp_value = await _get_otp_value_from_url( + totp_verification_url, + org_token.token, + task_id=task_id, + workflow_run_id=workflow_run_id, + ) + elif totp_identifier: + otp_value = await _get_otp_value_from_db( + organization_id, + totp_identifier, + task_id=task_id, + workflow_id=workflow_permanent_id, + workflow_run_id=workflow_run_id, + ) + if otp_value: + LOG.info("Got otp value", otp_value=otp_value) + return otp_value + + +async def _get_otp_value_from_url( + url: str, + api_key: str, + task_id: str | None = None, + workflow_run_id: str | None = None, + workflow_permanent_id: str | None = None, +) -> OTPValue | None: + request_data = {} + if task_id: + request_data["task_id"] = task_id + if workflow_run_id: + request_data["workflow_run_id"] = workflow_run_id + if workflow_permanent_id: + request_data["workflow_permanent_id"] = workflow_permanent_id + payload = json.dumps(request_data) + signature = generate_skyvern_signature( + payload=payload, + api_key=api_key, + ) + timestamp = str(int(datetime.utcnow().timestamp())) + headers = { + "x-skyvern-timestamp": timestamp, + "x-skyvern-signature": signature, + "Content-Type": "application/json", + } + try: + json_resp = await aiohttp_post(url=url, data=request_data, headers=headers, raise_exception=False) + except Exception as e: + LOG.error("Failed to get otp value from url", exc_info=True) + raise FailedToGetTOTPVerificationCode( + task_id=task_id, + workflow_run_id=workflow_run_id, + workflow_id=workflow_permanent_id, + totp_verification_url=url, + reason=str(e), + ) + code = json_resp.get("verification_code", None) + if code: + return OTPValue(value=code, type=OTPType.TOTP) + + magic_link = json_resp.get("magic_link", None) + if magic_link: + return OTPValue(value=magic_link, type=OTPType.MAGIC_LINK) + return None + + +async def _get_otp_value_from_db( + organization_id: str, + totp_identifier: str, + task_id: str | None = None, + workflow_id: str | None = None, + workflow_run_id: str | None = None, +) -> OTPValue | None: + totp_codes = await app.DATABASE.get_otp_codes(organization_id=organization_id, totp_identifier=totp_identifier) + for totp_code in totp_codes: + if totp_code.workflow_run_id and workflow_run_id and totp_code.workflow_run_id != workflow_run_id: + continue + if totp_code.workflow_id and workflow_id and totp_code.workflow_id != workflow_id: + continue + if totp_code.task_id and totp_code.task_id != task_id: + continue + if totp_code.expired_at and totp_code.expired_at < datetime.utcnow(): + continue + return OTPValue(value=totp_code.code, type=totp_code.otp_type) + return None diff --git a/skyvern/services/totp_service.py b/skyvern/services/totp_service.py deleted file mode 100644 index e69de29b..00000000 diff --git a/skyvern/webeye/actions/action_types.py b/skyvern/webeye/actions/action_types.py index 4ab32121..cd894ec8 100644 --- a/skyvern/webeye/actions/action_types.py +++ b/skyvern/webeye/actions/action_types.py @@ -21,6 +21,7 @@ class ActionType(StrEnum): EXTRACT = "extract" VERIFICATION_CODE = "verification_code" + GOTO_URL = "goto_url" SCROLL = "scroll" KEYPRESS = "keypress" MOVE = "move" diff --git a/skyvern/webeye/actions/actions.py b/skyvern/webeye/actions/actions.py index d03b3766..f4403709 100644 --- a/skyvern/webeye/actions/actions.py +++ b/skyvern/webeye/actions/actions.py @@ -260,6 +260,11 @@ class KeypressAction(Action): duration: int = 0 +class GotoUrlAction(Action): + action_type: ActionType = ActionType.GOTO_URL + url: str + + class MoveAction(Action): action_type: ActionType = ActionType.MOVE x: int diff --git a/skyvern/webeye/actions/handler.py b/skyvern/webeye/actions/handler.py index 6f17fb9b..889b74e8 100644 --- a/skyvern/webeye/actions/handler.py +++ b/skyvern/webeye/actions/handler.py @@ -2153,6 +2153,18 @@ async def handle_left_mouse_action( return [ActionSuccess()] +@TraceManager.traced_async(ignore_inputs=["scraped_page", "page"]) +async def handle_goto_url_action( + action: actions.GotoUrlAction, + page: Page, + scraped_page: ScrapedPage, + task: Task, + step: Step, +) -> list[ActionResult]: + await page.goto(action.url, timeout=settings.BROWSER_LOADING_TIMEOUT_MS) + return [ActionSuccess()] + + ActionHandler.register_action_type(ActionType.SOLVE_CAPTCHA, handle_solve_captcha_action) ActionHandler.register_action_type(ActionType.CLICK, handle_click_action) ActionHandler.register_action_type(ActionType.INPUT_TEXT, handle_input_text_action) @@ -2170,6 +2182,7 @@ ActionHandler.register_action_type(ActionType.MOVE, handle_move_action) ActionHandler.register_action_type(ActionType.DRAG, handle_drag_action) ActionHandler.register_action_type(ActionType.VERIFICATION_CODE, handle_verification_code_action) ActionHandler.register_action_type(ActionType.LEFT_MOUSE, handle_left_mouse_action) +ActionHandler.register_action_type(ActionType.GOTO_URL, handle_goto_url_action) async def get_actual_value_of_parameter_if_secret(task: Task, parameter: str) -> Any: diff --git a/skyvern/webeye/actions/parse_actions.py b/skyvern/webeye/actions/parse_actions.py index b05f87a5..935e238a 100644 --- a/skyvern/webeye/actions/parse_actions.py +++ b/skyvern/webeye/actions/parse_actions.py @@ -7,13 +7,14 @@ from openai.types.responses.response import Response as OpenAIResponse from pydantic import ValidationError from skyvern.constants import SCROLL_AMOUNT_MULTIPLIER -from skyvern.core.totp import poll_verification_code from skyvern.exceptions import FailedToGetTOTPVerificationCode, NoTOTPVerificationCodeFound, UnsupportedActionType from skyvern.forge import app from skyvern.forge.prompts import prompt_engine from skyvern.forge.sdk.core import skyvern_context from skyvern.forge.sdk.models import Step from skyvern.forge.sdk.schemas.tasks import Task +from skyvern.forge.sdk.schemas.totp_codes import OTPType +from skyvern.services.otp_service import poll_otp_value from skyvern.utils.image_resizer import Resolution, scale_coordinates from skyvern.webeye.actions.action_types import ActionType from skyvern.webeye.actions.actions import ( @@ -809,13 +810,16 @@ async def generate_cua_fallback_actions( totp_identifier=task.totp_identifier, ) try: - verification_code = await poll_verification_code( + otp_value = await poll_otp_value( organization_id=task.organization_id, task_id=task.task_id, workflow_run_id=task.workflow_run_id, totp_verification_url=task.totp_verification_url, totp_identifier=task.totp_identifier, ) + if not otp_value or otp_value.get_otp_type() != OTPType.TOTP: + raise NoTOTPVerificationCodeFound() + verification_code = otp_value.value reasoning = reasoning or f"Received verification code: {verification_code}" action = VerificationCodeAction( verification_code=verification_code,