general text captcha solution (#4517)

This commit is contained in:
LawyZheng
2026-01-23 00:23:57 +08:00
committed by GitHub
parent b5ff547a3a
commit 2c60d8e180
12 changed files with 62 additions and 8 deletions

View File

@@ -12,6 +12,16 @@ LOG = structlog.get_logger()
T = TypeVar("T", bound="Action")
class CaptchaType(StrEnum):
TEXT_CAPTCHA = "text_captcha"
RECAPTCHA = "recaptcha"
HCAPTCHA = "hcaptcha"
MTCAPTCHA = "mtcaptcha"
FUNCAPTCHA = "funcaptcha"
CLOUDFLARE = "cloudflare"
OTHER = "other"
class ActionStatus(StrEnum):
pending = "pending"
skipped = "skipped"
@@ -82,9 +92,10 @@ class InputOrSelectContext(BaseModel):
is_location_input: bool | None = None # address input usually requires auto completion
is_date_related: bool | None = None # date picker mini agent requires some special logic
date_format: str | None = None
is_text_captcha: bool | None = None
def __repr__(self) -> str:
return f"InputOrSelectContext(field={self.field}, is_required={self.is_required}, is_search_bar={self.is_search_bar}, is_location_input={self.is_location_input}, intention={self.intention})"
return f"InputOrSelectContext(field={self.field}, is_required={self.is_required}, is_search_bar={self.is_search_bar}, is_location_input={self.is_location_input}, is_date_related={self.is_date_related}, date_format={self.date_format}, is_text_captcha={self.is_text_captcha}, intention={self.intention})"
class ClickContext(BaseModel):
@@ -266,6 +277,7 @@ class NullAction(Action):
class SolveCaptchaAction(Action):
action_type: ActionType = ActionType.SOLVE_CAPTCHA
captcha_type: CaptchaType | None = None
class SelectOptionAction(WebAction):

View File

@@ -19,6 +19,7 @@ from skyvern.utils.image_resizer import Resolution, scale_coordinates
from skyvern.webeye.actions.action_types import ActionType
from skyvern.webeye.actions.actions import (
Action,
CaptchaType,
CheckboxAction,
ClickAction,
ClickContext,
@@ -181,7 +182,10 @@ def parse_action(
return NullAction(**base_action_dict)
if action_type == ActionType.SOLVE_CAPTCHA:
return SolveCaptchaAction(**base_action_dict)
captcha_type: str | None = action.get("captcha_type")
return SolveCaptchaAction(
**base_action_dict, captcha_type=CaptchaType[captcha_type.upper()] if captcha_type else None
)
if action_type == ActionType.CLOSE_PAGE:
return ClosePageAction(**base_action_dict)