feat: null checks for doc and iframe,frame

This commit is contained in:
Rohit
2025-08-05 01:25:05 +05:30
parent 780b18b9f4
commit 6dac0827b0

View File

@@ -537,6 +537,11 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
const evaluateXPath = (document, xpath, isShadow = false) => { const evaluateXPath = (document, xpath, isShadow = false) => {
try { try {
if (!document || !xpath) {
console.warn('Invalid document or xpath provided to evaluateXPath');
return null;
}
const result = document.evaluate( const result = document.evaluate(
xpath, xpath,
document, document,
@@ -632,6 +637,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
return null; return null;
} catch (err) { } catch (err) {
console.error("Critical XPath failure:", xpath, err); console.error("Critical XPath failure:", xpath, err);
// Return null instead of throwing to prevent crashes
return null; return null;
} }
}; };
@@ -694,16 +700,25 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
for (let i = 0; i < parts.length; i++) { for (let i = 0; i < parts.length; i++) {
if (!currentElement) return null; if (!currentElement) return null;
// Handle iframe and frame traversal // Handle iframe and frame traversal with enhanced safety
if ( if (
currentElement.tagName === "IFRAME" || currentElement.tagName === "IFRAME" ||
currentElement.tagName === "FRAME" currentElement.tagName === "FRAME"
) { ) {
try { try {
// Check if frame is accessible
if (!currentElement.contentDocument && !currentElement.contentWindow) {
console.warn('Frame is not accessible (cross-origin or unloaded)');
return null;
}
const frameDoc = const frameDoc =
currentElement.contentDocument || currentElement.contentDocument ||
currentElement.contentWindow.document; currentElement.contentWindow?.document;
if (!frameDoc) return null; if (!frameDoc) {
console.warn('Frame document is not available');
return null;
}
if (isXPathSelector(parts[i])) { if (isXPathSelector(parts[i])) {
currentElement = evaluateXPath(frameDoc, parts[i]); currentElement = evaluateXPath(frameDoc, parts[i]);