reenbale the download action (#4299)
This commit is contained in:
@@ -1794,6 +1794,16 @@ async function buildElementTree(
|
||||
elementObj = await buildElementObject(frame, element, interactable);
|
||||
} else if (tagName === "div" && isDOMNodeRepresentDiv(element)) {
|
||||
elementObj = await buildElementObject(frame, element, interactable);
|
||||
} else if (
|
||||
tagName === "embed" &&
|
||||
element.getAttribute("type")?.toLowerCase() === "application/pdf"
|
||||
) {
|
||||
elementObj = await buildElementObject(
|
||||
frame,
|
||||
element,
|
||||
interactable,
|
||||
true,
|
||||
);
|
||||
} else if (
|
||||
getElementText(element).length > 0 &&
|
||||
getElementText(element).length <= 5000
|
||||
|
||||
@@ -178,6 +178,32 @@ class ScrapedPage(BaseModel, ElementTreeBuilder):
|
||||
self._clean_up_func = clean_up_func
|
||||
self._scrape_exclude = scrape_exclude
|
||||
|
||||
def check_pdf_viewer_embed(self) -> str | None:
|
||||
"""
|
||||
Check if the page contains a PDF viewer embed.
|
||||
If found, return the src attribute of the embed.
|
||||
"""
|
||||
if len(self.elements) != 1:
|
||||
return None
|
||||
|
||||
element = self.elements[0]
|
||||
if element.get("tagName", "") != "embed":
|
||||
return None
|
||||
|
||||
attributes: dict = element.get("attributes", {})
|
||||
if not attributes:
|
||||
return None
|
||||
|
||||
type_attr: str | None = attributes.get("type")
|
||||
if not type_attr:
|
||||
return None
|
||||
|
||||
if type_attr.lower() != "application/pdf":
|
||||
return None
|
||||
|
||||
LOG.info("Found a PDF viewer page", element=element)
|
||||
return attributes.get("src", "")
|
||||
|
||||
def support_economy_elements_tree(self) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
Reference in New Issue
Block a user