reenbale the download action (#4299)

This commit is contained in:
LawyZheng
2025-12-15 14:30:32 +08:00
committed by GitHub
parent 6178a20824
commit ce717146f3
9 changed files with 208 additions and 37 deletions

View File

@@ -1794,6 +1794,16 @@ async function buildElementTree(
elementObj = await buildElementObject(frame, element, interactable);
} else if (tagName === "div" && isDOMNodeRepresentDiv(element)) {
elementObj = await buildElementObject(frame, element, interactable);
} else if (
tagName === "embed" &&
element.getAttribute("type")?.toLowerCase() === "application/pdf"
) {
elementObj = await buildElementObject(
frame,
element,
interactable,
true,
);
} else if (
getElementText(element).length > 0 &&
getElementText(element).length <= 5000

View File

@@ -178,6 +178,32 @@ class ScrapedPage(BaseModel, ElementTreeBuilder):
self._clean_up_func = clean_up_func
self._scrape_exclude = scrape_exclude
def check_pdf_viewer_embed(self) -> str | None:
"""
Check if the page contains a PDF viewer embed.
If found, return the src attribute of the embed.
"""
if len(self.elements) != 1:
return None
element = self.elements[0]
if element.get("tagName", "") != "embed":
return None
attributes: dict = element.get("attributes", {})
if not attributes:
return None
type_attr: str | None = attributes.get("type")
if not type_attr:
return None
if type_attr.lower() != "application/pdf":
return None
LOG.info("Found a PDF viewer page", element=element)
return attributes.get("src", "")
def support_economy_elements_tree(self) -> bool:
return True