general text captcha solution (#4517)
This commit is contained in:
@@ -12,6 +12,16 @@ LOG = structlog.get_logger()
|
||||
T = TypeVar("T", bound="Action")
|
||||
|
||||
|
||||
class CaptchaType(StrEnum):
|
||||
TEXT_CAPTCHA = "text_captcha"
|
||||
RECAPTCHA = "recaptcha"
|
||||
HCAPTCHA = "hcaptcha"
|
||||
MTCAPTCHA = "mtcaptcha"
|
||||
FUNCAPTCHA = "funcaptcha"
|
||||
CLOUDFLARE = "cloudflare"
|
||||
OTHER = "other"
|
||||
|
||||
|
||||
class ActionStatus(StrEnum):
|
||||
pending = "pending"
|
||||
skipped = "skipped"
|
||||
@@ -82,9 +92,10 @@ class InputOrSelectContext(BaseModel):
|
||||
is_location_input: bool | None = None # address input usually requires auto completion
|
||||
is_date_related: bool | None = None # date picker mini agent requires some special logic
|
||||
date_format: str | None = None
|
||||
is_text_captcha: bool | None = None
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"InputOrSelectContext(field={self.field}, is_required={self.is_required}, is_search_bar={self.is_search_bar}, is_location_input={self.is_location_input}, intention={self.intention})"
|
||||
return f"InputOrSelectContext(field={self.field}, is_required={self.is_required}, is_search_bar={self.is_search_bar}, is_location_input={self.is_location_input}, is_date_related={self.is_date_related}, date_format={self.date_format}, is_text_captcha={self.is_text_captcha}, intention={self.intention})"
|
||||
|
||||
|
||||
class ClickContext(BaseModel):
|
||||
@@ -266,6 +277,7 @@ class NullAction(Action):
|
||||
|
||||
class SolveCaptchaAction(Action):
|
||||
action_type: ActionType = ActionType.SOLVE_CAPTCHA
|
||||
captcha_type: CaptchaType | None = None
|
||||
|
||||
|
||||
class SelectOptionAction(WebAction):
|
||||
|
||||
@@ -19,6 +19,7 @@ from skyvern.utils.image_resizer import Resolution, scale_coordinates
|
||||
from skyvern.webeye.actions.action_types import ActionType
|
||||
from skyvern.webeye.actions.actions import (
|
||||
Action,
|
||||
CaptchaType,
|
||||
CheckboxAction,
|
||||
ClickAction,
|
||||
ClickContext,
|
||||
@@ -181,7 +182,10 @@ def parse_action(
|
||||
return NullAction(**base_action_dict)
|
||||
|
||||
if action_type == ActionType.SOLVE_CAPTCHA:
|
||||
return SolveCaptchaAction(**base_action_dict)
|
||||
captcha_type: str | None = action.get("captcha_type")
|
||||
return SolveCaptchaAction(
|
||||
**base_action_dict, captcha_type=CaptchaType[captcha_type.upper()] if captcha_type else None
|
||||
)
|
||||
|
||||
if action_type == ActionType.CLOSE_PAGE:
|
||||
return ClosePageAction(**base_action_dict)
|
||||
|
||||
@@ -83,4 +83,5 @@ class BrowserState(Protocol):
|
||||
scroll: bool = True,
|
||||
support_empty_page: bool = False,
|
||||
wait_seconds: float = 0,
|
||||
must_included_tags: list[str] | None = None,
|
||||
) -> ScrapedPage: ...
|
||||
|
||||
@@ -394,6 +394,7 @@ class RealBrowserState(BrowserState):
|
||||
scroll: bool = True,
|
||||
support_empty_page: bool = False,
|
||||
wait_seconds: float = 0,
|
||||
must_included_tags: list[str] | None = None,
|
||||
) -> ScrapedPage:
|
||||
return await scraper.scrape_website(
|
||||
browser_state=self,
|
||||
@@ -408,6 +409,7 @@ class RealBrowserState(BrowserState):
|
||||
scroll=scroll,
|
||||
support_empty_page=support_empty_page,
|
||||
wait_seconds=wait_seconds,
|
||||
must_included_tags=must_included_tags,
|
||||
)
|
||||
|
||||
async def close(self, close_browser_on_completion: bool = True) -> None:
|
||||
|
||||
@@ -1683,6 +1683,7 @@ async function buildElementObject(
|
||||
async function buildTreeFromBody(
|
||||
frame = "main.frame",
|
||||
frame_index = undefined,
|
||||
must_included_tags = [],
|
||||
) {
|
||||
if (
|
||||
window.GlobalSkyvernFrameIndex === undefined &&
|
||||
@@ -1697,6 +1698,7 @@ async function buildTreeFromBody(
|
||||
false,
|
||||
undefined,
|
||||
maxElementNumber,
|
||||
must_included_tags,
|
||||
);
|
||||
DomUtils.elementListCache = elementsAndResultArray[0];
|
||||
return elementsAndResultArray;
|
||||
@@ -1708,12 +1710,19 @@ async function buildElementTree(
|
||||
full_tree = false,
|
||||
hoverStylesMap = undefined,
|
||||
maxElementNumber = 0,
|
||||
must_included_tags = [],
|
||||
) {
|
||||
// Generate hover styles map at the start
|
||||
if (hoverStylesMap === undefined) {
|
||||
hoverStylesMap = await getHoverStylesMap();
|
||||
}
|
||||
|
||||
if (must_included_tags.length > 0) {
|
||||
_jsConsoleLog(
|
||||
"full tree will be enabled as the must_included_tags is not empty",
|
||||
);
|
||||
full_tree = true;
|
||||
}
|
||||
if (window.GlobalEnableAllTextualElements === undefined) {
|
||||
window.GlobalEnableAllTextualElements = false;
|
||||
}
|
||||
@@ -1835,6 +1844,10 @@ async function buildElementTree(
|
||||
if (elementObj.text.length > 0) {
|
||||
elementObj.purgeable = false;
|
||||
}
|
||||
if (must_included_tags.includes(tagName)) {
|
||||
elementObj.purgeable = false;
|
||||
elementObj.interactable = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (elementObj) {
|
||||
|
||||
@@ -306,6 +306,7 @@ class ScrapedPage(BaseModel, ElementTreeBuilder):
|
||||
scroll: bool = True,
|
||||
take_screenshots: bool = True,
|
||||
max_retries: int = 0,
|
||||
must_included_tags: list[str] | None = None,
|
||||
) -> Self:
|
||||
return await self._browser_state.scrape_website(
|
||||
url=self.url,
|
||||
@@ -315,6 +316,7 @@ class ScrapedPage(BaseModel, ElementTreeBuilder):
|
||||
take_screenshots=take_screenshots,
|
||||
draw_boxes=draw_boxes,
|
||||
scroll=scroll,
|
||||
must_included_tags=must_included_tags,
|
||||
)
|
||||
|
||||
async def generate_scraped_page_without_screenshots(self, max_retries: int = 0) -> Self:
|
||||
|
||||
@@ -150,6 +150,7 @@ async def scrape_website(
|
||||
scroll: bool = True,
|
||||
support_empty_page: bool = False,
|
||||
wait_seconds: float = 0,
|
||||
must_included_tags: list[str] | None = None,
|
||||
) -> ScrapedPage:
|
||||
"""
|
||||
************************************************************************************************
|
||||
@@ -186,6 +187,7 @@ async def scrape_website(
|
||||
scroll=scroll,
|
||||
support_empty_page=support_empty_page,
|
||||
wait_seconds=wait_seconds,
|
||||
must_included_tags=must_included_tags,
|
||||
)
|
||||
except ScrapingFailedBlankPage:
|
||||
raise
|
||||
@@ -216,6 +218,7 @@ async def scrape_website(
|
||||
draw_boxes=draw_boxes,
|
||||
max_screenshot_number=max_screenshot_number,
|
||||
scroll=scroll,
|
||||
must_included_tags=must_included_tags,
|
||||
)
|
||||
|
||||
|
||||
@@ -269,6 +272,7 @@ async def scrape_web_unsafe(
|
||||
scroll: bool = True,
|
||||
support_empty_page: bool = False,
|
||||
wait_seconds: float = 0,
|
||||
must_included_tags: list[str] | None = None,
|
||||
) -> ScrapedPage:
|
||||
"""
|
||||
Asynchronous function that performs web scraping without any built-in error handling. This function is intended
|
||||
@@ -301,11 +305,11 @@ async def scrape_web_unsafe(
|
||||
LOG.info(f"Waiting for {wait_seconds} seconds before scraping the website.", wait_seconds=wait_seconds)
|
||||
await asyncio.sleep(wait_seconds)
|
||||
|
||||
elements, element_tree = await get_interactable_element_tree(page, scrape_exclude)
|
||||
elements, element_tree = await get_interactable_element_tree(page, scrape_exclude, must_included_tags)
|
||||
if not elements and not support_empty_page:
|
||||
LOG.warning("No elements found on the page, wait and retry")
|
||||
await empty_page_retry_wait()
|
||||
elements, element_tree = await get_interactable_element_tree(page, scrape_exclude)
|
||||
elements, element_tree = await get_interactable_element_tree(page, scrape_exclude, must_included_tags)
|
||||
|
||||
element_tree = await cleanup_element_tree(page, url, copy.deepcopy(element_tree))
|
||||
element_tree_trimmed = trim_element_tree(copy.deepcopy(element_tree))
|
||||
@@ -415,6 +419,7 @@ async def add_frame_interactable_elements(
|
||||
frame_index: int,
|
||||
elements: list[dict],
|
||||
element_tree: list[dict],
|
||||
must_included_tags: list[str] | None = None,
|
||||
) -> tuple[list[dict], list[dict]]:
|
||||
"""
|
||||
Add the interactable element of the frame to the elements and element_tree.
|
||||
@@ -444,7 +449,7 @@ async def add_frame_interactable_elements(
|
||||
await skyvern_frame.safe_wait_for_animation_end()
|
||||
|
||||
frame_elements, frame_element_tree = await skyvern_frame.build_tree_from_body(
|
||||
frame_name=skyvern_id, frame_index=frame_index
|
||||
frame_name=skyvern_id, frame_index=frame_index, must_included_tags=must_included_tags
|
||||
)
|
||||
|
||||
for element in elements:
|
||||
@@ -460,6 +465,7 @@ async def add_frame_interactable_elements(
|
||||
async def get_interactable_element_tree(
|
||||
page: Page,
|
||||
scrape_exclude: ScrapeExcludeFunc | None = None,
|
||||
must_included_tags: list[str] | None = None,
|
||||
) -> tuple[list[dict], list[dict]]:
|
||||
"""
|
||||
Get the element tree of the page, including all the elements that are interactable.
|
||||
@@ -468,7 +474,9 @@ async def get_interactable_element_tree(
|
||||
"""
|
||||
# main page index is 0
|
||||
skyvern_page = await SkyvernFrame.create_instance(page)
|
||||
elements, element_tree = await skyvern_page.build_tree_from_body(frame_name="main.frame", frame_index=0)
|
||||
elements, element_tree = await skyvern_page.build_tree_from_body(
|
||||
frame_name="main.frame", frame_index=0, must_included_tags=must_included_tags
|
||||
)
|
||||
|
||||
context = skyvern_context.ensure_context()
|
||||
frames = await get_all_children_frames(page)
|
||||
@@ -487,6 +495,7 @@ async def get_interactable_element_tree(
|
||||
frame_index,
|
||||
elements,
|
||||
element_tree,
|
||||
must_included_tags,
|
||||
)
|
||||
|
||||
return elements, element_tree
|
||||
|
||||
@@ -489,11 +489,16 @@ class SkyvernFrame:
|
||||
self,
|
||||
frame_name: str | None,
|
||||
frame_index: int,
|
||||
must_included_tags: list[str] | None = None,
|
||||
timeout_ms: float = SettingsManager.get_settings().BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS,
|
||||
) -> tuple[list[dict], list[dict]]:
|
||||
js_script = "async ([frame_name, frame_index]) => await buildTreeFromBody(frame_name, frame_index)"
|
||||
must_included_tags = must_included_tags or []
|
||||
js_script = "async ([frame_name, frame_index, must_included_tags]) => await buildTreeFromBody(frame_name, frame_index, must_included_tags)"
|
||||
return await self.evaluate(
|
||||
frame=self.frame, expression=js_script, timeout_ms=timeout_ms, arg=[frame_name, frame_index]
|
||||
frame=self.frame,
|
||||
expression=js_script,
|
||||
timeout_ms=timeout_ms,
|
||||
arg=[frame_name, frame_index, must_included_tags],
|
||||
)
|
||||
|
||||
@TraceManager.traced_async()
|
||||
|
||||
Reference in New Issue
Block a user