feat: null checks for doc and iframe,frame
This commit is contained in:
@@ -537,6 +537,11 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
|
|
||||||
const evaluateXPath = (document, xpath, isShadow = false) => {
|
const evaluateXPath = (document, xpath, isShadow = false) => {
|
||||||
try {
|
try {
|
||||||
|
if (!document || !xpath) {
|
||||||
|
console.warn('Invalid document or xpath provided to evaluateXPath');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
const result = document.evaluate(
|
const result = document.evaluate(
|
||||||
xpath,
|
xpath,
|
||||||
document,
|
document,
|
||||||
@@ -632,6 +637,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
return null;
|
return null;
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error("Critical XPath failure:", xpath, err);
|
console.error("Critical XPath failure:", xpath, err);
|
||||||
|
// Return null instead of throwing to prevent crashes
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -694,16 +700,25 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
for (let i = 0; i < parts.length; i++) {
|
for (let i = 0; i < parts.length; i++) {
|
||||||
if (!currentElement) return null;
|
if (!currentElement) return null;
|
||||||
|
|
||||||
// Handle iframe and frame traversal
|
// Handle iframe and frame traversal with enhanced safety
|
||||||
if (
|
if (
|
||||||
currentElement.tagName === "IFRAME" ||
|
currentElement.tagName === "IFRAME" ||
|
||||||
currentElement.tagName === "FRAME"
|
currentElement.tagName === "FRAME"
|
||||||
) {
|
) {
|
||||||
try {
|
try {
|
||||||
|
// Check if frame is accessible
|
||||||
|
if (!currentElement.contentDocument && !currentElement.contentWindow) {
|
||||||
|
console.warn('Frame is not accessible (cross-origin or unloaded)');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
const frameDoc =
|
const frameDoc =
|
||||||
currentElement.contentDocument ||
|
currentElement.contentDocument ||
|
||||||
currentElement.contentWindow.document;
|
currentElement.contentWindow?.document;
|
||||||
if (!frameDoc) return null;
|
if (!frameDoc) {
|
||||||
|
console.warn('Frame document is not available');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
if (isXPathSelector(parts[i])) {
|
if (isXPathSelector(parts[i])) {
|
||||||
currentElement = evaluateXPath(frameDoc, parts[i]);
|
currentElement = evaluateXPath(frameDoc, parts[i]);
|
||||||
|
|||||||
Reference in New Issue
Block a user