diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index d6489e6b..d9765f3a 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -189,111 +189,145 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, * @returns {Array.>} */ window.scrapeSchema = function(lists) { + // Utility functions remain the same function omap(object, f, kf = (x) => x) { - return Object.fromEntries( - Object.entries(object) - .map(([k, v]) => [kf(k), f(v)]), - ); + return Object.fromEntries( + Object.entries(object) + .map(([k, v]) => [kf(k), f(v)]), + ); } function ofilter(object, f) { - return Object.fromEntries( - Object.entries(object) - .filter(([k, v]) => f(k, v)), - ); - } - - function findAllElements(config) { - if (!config.shadow || !config.selector.includes('>>')) { - return Array.from(document.querySelectorAll(config.selector)); - } - - // For shadow DOM, we'll get all possible combinations - const parts = config.selector.split('>>').map(s => s.trim()); - let currentElements = [document]; - - for (let i = 0; i < parts.length; i++) { - const part = parts[i]; - const nextElements = []; - - for (const element of currentElements) { - let targets; - if (i === 0) { - // First selector is queried from document - targets = Array.from(element.querySelectorAll(part)) - .filter(el => { - // Only include elements that either: - // 1. Have an open shadow root - // 2. Don't need shadow root (last part of selector) - if (i === parts.length - 1) return true; - const shadowRoot = el.shadowRoot; - return shadowRoot && shadowRoot.mode === 'open'; - }); - } else { - // For subsequent selectors, only use elements with open shadow roots - const shadowRoot = element.shadowRoot; - if (!shadowRoot || shadowRoot.mode !== 'open') continue; - - targets = Array.from(shadowRoot.querySelectorAll(part)); - } - nextElements.push(...targets); - } - - if (nextElements.length === 0) return []; - currentElements = nextElements; - } - - return currentElements; - } - - function getElementValue(element, attribute) { - if (!element) return null; - - switch (attribute) { - case 'href': { - const relativeHref = element.getAttribute('href'); - return relativeHref ? new URL(relativeHref, window.location.origin).href : null; - } - case 'src': { - const relativeSrc = element.getAttribute('src'); - return relativeSrc ? new URL(relativeSrc, window.location.origin).href : null; - } - case 'innerText': - return element.innerText?.trim(); - case 'textContent': - return element.textContent?.trim(); - default: - return element.getAttribute(attribute) || element.innerText?.trim(); - } + return Object.fromEntries( + Object.entries(object) + .filter(([k, v]) => f(k, v)), + ); } - // Get the seed key based on the maximum number of elements found + function findAllElements(config) { + // Regular DOM query if no special delimiters + if (!config.selector.includes('>>') && !config.selector.includes(':>>')) { + return Array.from(document.querySelectorAll(config.selector)); + } + + // Split by both types of delimiters + const parts = config.selector.split(/(?:>>|:>>)/).map(s => s.trim()); + const delimiters = config.selector.match(/(?:>>|:>>)/g) || []; + let currentElements = [document]; + + for (let i = 0; i < parts.length; i++) { + const part = parts[i]; + const nextElements = []; + const isLast = i === parts.length - 1; + const delimiter = delimiters[i] || ''; + const isIframeTraversal = delimiter === ':>>'; + + for (const element of currentElements) { + try { + let targets; + + if (i === 0) { + // First selector is queried from main document + targets = Array.from(element.querySelectorAll(part)) + .filter(el => { + if (isLast) return true; + // For iframe traversal, only include iframes + if (isIframeTraversal) return el.tagName === 'IFRAME'; + // For shadow DOM traversal, only include elements with shadow root + return el.shadowRoot && el.shadowRoot.mode === 'open'; + }); + } else { + if (isIframeTraversal) { + // Handle iframe traversal + const iframeDocument = element.contentDocument || element.contentWindow?.document; + if (!iframeDocument) continue; + + targets = Array.from(iframeDocument.querySelectorAll(part)); + if (!isLast) { + targets = targets.filter(el => el.tagName === 'IFRAME'); + } + } else { + // Handle shadow DOM traversal + const shadowRoot = element.shadowRoot; + if (!shadowRoot || shadowRoot.mode !== 'open') continue; + + targets = Array.from(shadowRoot.querySelectorAll(part)); + if (!isLast) { + targets = targets.filter(el => el.shadowRoot && el.shadowRoot.mode === 'open'); + } + } + } + + nextElements.push(...targets); + } catch (error) { + console.warn('Cannot access content:', error); + continue; + } + } + + if (nextElements.length === 0) return []; + currentElements = nextElements; + } + + return currentElements; + } + + // Modified to handle iframe context for URL resolution + function getElementValue(element, attribute) { + if (!element) return null; + + // Get the base URL for resolving relative URLs + const baseURL = element.ownerDocument?.location?.href || window.location.origin; + + switch (attribute) { + case 'href': { + const relativeHref = element.getAttribute('href'); + return relativeHref ? new URL(relativeHref, baseURL).href : null; + } + case 'src': { + const relativeSrc = element.getAttribute('src'); + return relativeSrc ? new URL(relativeSrc, baseURL).href : null; + } + case 'innerText': + return element.innerText?.trim(); + case 'textContent': + return element.textContent?.trim(); + default: + return element.getAttribute(attribute) || element.innerText?.trim(); + } + } + + // Rest of the functions remain largely the same function getSeedKey(listObj) { - const maxLength = Math.max(...Object.values( - omap(listObj, (x) => findAllElements(x).length) - )); - return Object.keys( - ofilter(listObj, (_, v) => findAllElements(v).length === maxLength) - )[0]; + const maxLength = Math.max(...Object.values( + omap(listObj, (x) => findAllElements(x).length) + )); + return Object.keys( + ofilter(listObj, (_, v) => findAllElements(v).length === maxLength) + )[0]; } // Find minimal bounding elements function getMBEs(elements) { return elements.map((element) => { - let candidate = element; - const isUniqueChild = (e) => elements - .filter((elem) => e.parentNode?.contains(elem)) - .length === 1; - - while (candidate && isUniqueChild(candidate)) { - candidate = candidate.parentNode; - } - - return candidate; + let candidate = element; + const isUniqueChild = (e) => elements + .filter((elem) => { + // Handle both iframe and shadow DOM boundaries + const sameContext = elem.getRootNode() === e.getRootNode() && + elem.ownerDocument === e.ownerDocument; + return sameContext && e.parentNode?.contains(elem); + }) + .length === 1; + + while (candidate && isUniqueChild(candidate)) { + candidate = candidate.parentNode; + } + + return candidate; }); } - // First try the MBE approach const seedName = getSeedKey(lists); const seedElements = findAllElements(lists[seedName]); const MBEs = getMBEs(seedElements); @@ -347,164 +381,210 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, * @returns {Array.>} Array of arrays of scraped items, one sub-array per list */ window.scrapeList = async function ({ listSelector, fields, limit = 10 }) { - // Shadow DOM query functions remain unchanged - const queryShadowDOM = (rootElement, selector) => { - if (!selector.includes('>>')) { + // Enhanced query function to handle both iframe and shadow DOM + const queryElement = (rootElement, selector) => { + if (!selector.includes('>>') && !selector.includes(':>>')) { return rootElement.querySelector(selector); - } + } - const parts = selector.split('>>').map(part => part.trim()); - let currentElement = rootElement; + const parts = selector.split(/(?:>>|:>>)/).map(part => part.trim()); + let currentElement = rootElement; - for (let i = 0; i < parts.length; i++) { - if (!currentElement) return null; + for (let i = 0; i < parts.length; i++) { + if (!currentElement) return null; - if (!currentElement.querySelector && !currentElement.shadowRoot) { - currentElement = document.querySelector(parts[i]); - continue; - } + // Handle iframe traversal + if (currentElement.tagName === 'IFRAME') { + try { + const iframeDoc = currentElement.contentDocument || currentElement.contentWindow.document; + currentElement = iframeDoc.querySelector(parts[i]); + continue; + } catch (e) { + console.warn('Cannot access iframe content:', e); + return null; + } + } - let nextElement = currentElement.querySelector(parts[i]); + // Try regular DOM first + let nextElement = currentElement.querySelector(parts[i]); - if (!nextElement && currentElement.shadowRoot) { - nextElement = currentElement.shadowRoot.querySelector(parts[i]); - } + // Try shadow DOM if not found + if (!nextElement && currentElement.shadowRoot) { + nextElement = currentElement.shadowRoot.querySelector(parts[i]); + } - if (!nextElement) { - const allChildren = Array.from(currentElement.children || []); - for (const child of allChildren) { - if (child.shadowRoot) { - nextElement = child.shadowRoot.querySelector(parts[i]); - if (nextElement) break; - } - } - } + // Check children's shadow roots if still not found + if (!nextElement) { + const children = Array.from(currentElement.children || []); + for (const child of children) { + if (child.shadowRoot) { + nextElement = child.shadowRoot.querySelector(parts[i]); + if (nextElement) break; + } + } + } - currentElement = nextElement; - } + currentElement = nextElement; + } - return currentElement; + return currentElement; }; - const queryShadowDOMAll = (rootElement, selector) => { - if (!selector.includes('>>')) { + // Enhanced query all function for both contexts + const queryElementAll = (rootElement, selector) => { + if (!selector.includes('>>') && !selector.includes(':>>')) { return rootElement.querySelectorAll(selector); - } + } - const parts = selector.split('>>').map(part => part.trim()); - let currentElements = [rootElement]; - - for (const part of parts) { - const nextElements = []; - - for (const element of currentElements) { - if (element.querySelectorAll) { - nextElements.push(...element.querySelectorAll(part)); - } - - if (element.shadowRoot) { - nextElements.push(...element.shadowRoot.querySelectorAll(part)); - } - - const children = Array.from(element.children || []); - for (const child of children) { - if (child.shadowRoot) { - nextElements.push(...child.shadowRoot.querySelectorAll(part)); - } - } - } - - currentElements = nextElements; - } - - return currentElements; + const parts = selector.split(/(?:>>|:>>)/).map(part => part.trim()); + let currentElements = [rootElement]; + + for (const part of parts) { + const nextElements = []; + + for (const element of currentElements) { + // Handle iframe traversal + if (element.tagName === 'IFRAME') { + try { + const iframeDoc = element.contentDocument || element.contentWindow.document; + nextElements.push(...iframeDoc.querySelectorAll(part)); + } catch (e) { + console.warn('Cannot access iframe content:', e); + continue; + } + } else { + // Regular DOM elements + if (element.querySelectorAll) { + nextElements.push(...element.querySelectorAll(part)); + } + + // Shadow DOM elements + if (element.shadowRoot) { + nextElements.push(...element.shadowRoot.querySelectorAll(part)); + } + + // Check children's shadow roots + const children = Array.from(element.children || []); + for (const child of children) { + if (child.shadowRoot) { + nextElements.push(...child.shadowRoot.querySelectorAll(part)); + } + } + } + } + + currentElements = nextElements; + } + + return currentElements; }; - // Enhanced table processing helper functions with shadow DOM support + // Enhanced value extraction with context awareness function extractValue(element, attribute) { - if (!element) return null; - - // Check for shadow root first - if (element.shadowRoot) { - const shadowContent = element.shadowRoot.textContent; - if (shadowContent && shadowContent.trim()) { - return shadowContent.trim(); - } - } - - if (attribute === 'innerText') { - return element.innerText.trim(); - } else if (attribute === 'innerHTML') { - return element.innerHTML.trim(); - } else if (attribute === 'src' || attribute === 'href') { - const attrValue = element.getAttribute(attribute); - return attrValue ? new URL(attrValue, window.location.origin).href : null; - } - return element.getAttribute(attribute); + if (!element) return null; + + // Get context-aware base URL + const baseURL = element.ownerDocument?.location?.href || window.location.origin; + + // Check shadow root first + if (element.shadowRoot) { + const shadowContent = element.shadowRoot.textContent; + if (shadowContent?.trim()) { + return shadowContent.trim(); + } + } + + if (attribute === 'innerText') { + return element.innerText.trim(); + } else if (attribute === 'innerHTML') { + return element.innerHTML.trim(); + } else if (attribute === 'src' || attribute === 'href') { + const attrValue = element.getAttribute(attribute); + return attrValue ? new URL(attrValue, baseURL).href : null; + } + return element.getAttribute(attribute); } + // Enhanced table ancestor finding with context support function findTableAncestor(element) { - let currentElement = element; - const MAX_DEPTH = 5; - let depth = 0; - - while (currentElement && depth < MAX_DEPTH) { - // Check if current element is in shadow DOM - if (currentElement.getRootNode() instanceof ShadowRoot) { - currentElement = currentElement.getRootNode().host; - continue; - } - - if (currentElement.tagName === 'TD') { - return { type: 'TD', element: currentElement }; - } else if (currentElement.tagName === 'TR') { - return { type: 'TR', element: currentElement }; - } - currentElement = currentElement.parentElement; - depth++; - } - return null; + let currentElement = element; + const MAX_DEPTH = 5; + let depth = 0; + + while (currentElement && depth < MAX_DEPTH) { + // Handle shadow DOM + if (currentElement.getRootNode() instanceof ShadowRoot) { + currentElement = currentElement.getRootNode().host; + continue; + } + + if (currentElement.tagName === 'TD') { + return { type: 'TD', element: currentElement }; + } else if (currentElement.tagName === 'TR') { + return { type: 'TR', element: currentElement }; + } + + // Handle iframe crossing + if (currentElement.tagName === 'IFRAME') { + try { + currentElement = currentElement.contentDocument.body; + } catch (e) { + return null; + } + } else { + currentElement = currentElement.parentElement; + } + depth++; + } + return null; } + // Helper function to get cell index function getCellIndex(td) { - let index = 0; - let sibling = td; - - // Handle shadow DOM case - if (td.getRootNode() instanceof ShadowRoot) { - const shadowRoot = td.getRootNode(); - const allCells = Array.from(shadowRoot.querySelectorAll('td')); - return allCells.indexOf(td); - } - - while (sibling = sibling.previousElementSibling) { - index++; - } - return index; + if (td.getRootNode() instanceof ShadowRoot) { + const shadowRoot = td.getRootNode(); + const allCells = Array.from(shadowRoot.querySelectorAll('td')); + return allCells.indexOf(td); + } + + let index = 0; + let sibling = td; + while (sibling = sibling.previousElementSibling) { + index++; + } + return index; } + // Helper function to check for TH elements function hasThElement(row, tableFields) { - for (const [label, { selector }] of Object.entries(tableFields)) { - const element = queryShadowDOM(row, selector); - if (element) { - let current = element; - while (current && current !== row) { - // Check if we're in shadow DOM - if (current.getRootNode() instanceof ShadowRoot) { - current = current.getRootNode().host; - continue; - } - - if (current.tagName === 'TH') { - return true; - } - current = current.parentElement; - } - } - } - return false; + for (const [_, { selector }] of Object.entries(tableFields)) { + const element = queryElement(row, selector); + if (element) { + let current = element; + while (current && current !== row) { + if (current.getRootNode() instanceof ShadowRoot) { + current = current.getRootNode().host; + continue; + } + + if (current.tagName === 'TH') return true; + + if (current.tagName === 'IFRAME') { + try { + current = current.contentDocument.body; + } catch (e) { + break; + } + } else { + current = current.parentElement; + } + } + } + } + return false; } + // Helper function to filter rows function filterRowsBasedOnTag(rows, tableFields) { for (const row of rows) { if (hasThElement(row, tableFields)) { @@ -520,7 +600,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, }); } - // Class similarity functions remain unchanged + // Class similarity comparison functions function calculateClassSimilarity(classList1, classList2) { const set1 = new Set(classList1); const set2 = new Set(classList2); @@ -529,189 +609,237 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, return intersection.size / union.size; } + // Enhanced similar elements finding with context support function findSimilarElements(baseElement, similarityThreshold = 0.7) { - const baseClasses = Array.from(baseElement.classList); - if (baseClasses.length === 0) return []; - const potentialElements = document.getElementsByTagName(baseElement.tagName); - return Array.from(potentialElements).filter(element => { - if (element === baseElement) return false; - const similarity = calculateClassSimilarity( - baseClasses, - Array.from(element.classList) - ); - return similarity >= similarityThreshold; - }); + const baseClasses = Array.from(baseElement.classList); + if (baseClasses.length === 0) return []; + + const allElements = []; + + // Get elements from main document + allElements.push(...document.getElementsByTagName(baseElement.tagName)); + + // Get elements from shadow DOM + if (baseElement.getRootNode() instanceof ShadowRoot) { + const shadowHost = baseElement.getRootNode().host; + allElements.push(...shadowHost.getElementsByTagName(baseElement.tagName)); + } + + // Get elements from iframes + const iframes = document.getElementsByTagName('iframe'); + for (const iframe of iframes) { + try { + const iframeDoc = iframe.contentDocument || iframe.contentWindow.document; + allElements.push(...iframeDoc.getElementsByTagName(baseElement.tagName)); + } catch (e) { + console.warn('Cannot access iframe content:', e); + } + } + + return allElements.filter(element => { + if (element === baseElement) return false; + const similarity = calculateClassSimilarity( + baseClasses, + Array.from(element.classList) + ); + return similarity >= similarityThreshold; + }); } - // Main scraping logic with shadow DOM support - let containers = queryShadowDOMAll(document, listSelector); + // Main scraping logic with context support + let containers = queryElementAll(document, listSelector); containers = Array.from(containers); if (containers.length === 0) return []; if (limit > 1 && containers.length === 1) { - const baseContainer = containers[0]; - const similarContainers = findSimilarElements(baseContainer); - - if (similarContainers.length > 0) { - const newContainers = similarContainers.filter(container => - !container.matches(listSelector) - ); - containers = [...containers, ...newContainers]; - } + const baseContainer = containers[0]; + const similarContainers = findSimilarElements(baseContainer); + + if (similarContainers.length > 0) { + const newContainers = similarContainers.filter(container => + !container.matches(listSelector) + ); + containers = [...containers, ...newContainers]; + } } const containerFields = containers.map(() => ({ - tableFields: {}, - nonTableFields: {} + tableFields: {}, + nonTableFields: {} })); // Classify fields containers.forEach((container, containerIndex) => { - for (const [label, field] of Object.entries(fields)) { - const sampleElement = queryShadowDOM(container, field.selector); - - if (sampleElement) { - const ancestor = findTableAncestor(sampleElement); - if (ancestor) { - containerFields[containerIndex].tableFields[label] = { - ...field, - tableContext: ancestor.type, - cellIndex: ancestor.type === 'TD' ? getCellIndex(ancestor.element) : -1 - }; - } else { - containerFields[containerIndex].nonTableFields[label] = field; - } + for (const [label, field] of Object.entries(fields)) { + const sampleElement = queryElement(container, field.selector); + + if (sampleElement) { + const ancestor = findTableAncestor(sampleElement); + if (ancestor) { + containerFields[containerIndex].tableFields[label] = { + ...field, + tableContext: ancestor.type, + cellIndex: ancestor.type === 'TD' ? getCellIndex(ancestor.element) : -1 + }; } else { containerFields[containerIndex].nonTableFields[label] = field; } + } else { + containerFields[containerIndex].nonTableFields[label] = field; } + } }); const tableData = []; const nonTableData = []; - // Process table data with shadow DOM support + // Process table data with both iframe and shadow DOM support for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) { - const container = containers[containerIndex]; - const { tableFields } = containerFields[containerIndex]; + const container = containers[containerIndex]; + const { tableFields } = containerFields[containerIndex]; - if (Object.keys(tableFields).length > 0) { - const firstField = Object.values(tableFields)[0]; - const firstElement = queryShadowDOM(container, firstField.selector); - let tableContext = firstElement; - - // Find table context including shadow DOM - while (tableContext && tableContext.tagName !== 'TABLE' && tableContext !== container) { - if (tableContext.getRootNode() instanceof ShadowRoot) { - tableContext = tableContext.getRootNode().host; - } else { - tableContext = tableContext.parentElement; - } - } + if (Object.keys(tableFields).length > 0) { + const firstField = Object.values(tableFields)[0]; + const firstElement = queryElement(container, firstField.selector); + let tableContext = firstElement; + + // Find table context including both iframe and shadow DOM + while (tableContext && tableContext.tagName !== 'TABLE' && tableContext !== container) { + if (tableContext.getRootNode() instanceof ShadowRoot) { + tableContext = tableContext.getRootNode().host; + continue; + } + + if (tableContext.tagName === 'IFRAME') { + try { + tableContext = tableContext.contentDocument.body; + } catch (e) { + break; + } + } else { + tableContext = tableContext.parentElement; + } + } - if (tableContext) { - // Get rows from both regular DOM and shadow DOM - const rows = []; - if (tableContext.shadowRoot) { - rows.push(...tableContext.shadowRoot.getElementsByTagName('TR')); - } - rows.push(...tableContext.getElementsByTagName('TR')); - - const processedRows = filterRowsBasedOnTag(rows, tableFields); - - for (let rowIndex = 0; rowIndex < Math.min(processedRows.length, limit); rowIndex++) { - const record = {}; - const currentRow = processedRows[rowIndex]; - - for (const [label, { selector, attribute, cellIndex }] of Object.entries(tableFields)) { - let element = null; - - if (cellIndex >= 0) { - let td = currentRow.children[cellIndex]; - - // Check shadow DOM for td - if (!td && currentRow.shadowRoot) { - const shadowCells = currentRow.shadowRoot.children; - if (shadowCells && shadowCells.length > cellIndex) { - td = shadowCells[cellIndex]; - } - } - - if (td) { - element = queryShadowDOM(td, selector); - - if (!element && selector.split(">").pop().includes('td:nth-child')) { - element = td; - } + if (tableContext) { + // Get rows from all contexts + const rows = []; + + // Get rows from regular DOM + rows.push(...tableContext.getElementsByTagName('TR')); + + // Get rows from shadow DOM + if (tableContext.shadowRoot) { + rows.push(...tableContext.shadowRoot.getElementsByTagName('TR')); + } + + // Get rows from iframes + if (tableContext.tagName === 'IFRAME') { + try { + const iframeDoc = tableContext.contentDocument || tableContext.contentWindow.document; + rows.push(...iframeDoc.getElementsByTagName('TR')); + } catch (e) { + console.warn('Cannot access iframe rows:', e); + } + } + + const processedRows = filterRowsBasedOnTag(rows, tableFields); + + for (let rowIndex = 0; rowIndex < Math.min(processedRows.length, limit); rowIndex++) { + const record = {}; + const currentRow = processedRows[rowIndex]; + + for (const [label, { selector, attribute, cellIndex }] of Object.entries(tableFields)) { + let element = null; + + if (cellIndex >= 0) { + // Get TD element considering both contexts + let td = currentRow.children[cellIndex]; + + // Check shadow DOM for td + if (!td && currentRow.shadowRoot) { + const shadowCells = currentRow.shadowRoot.children; + if (shadowCells && shadowCells.length > cellIndex) { + td = shadowCells[cellIndex]; + } + } + + if (td) { + element = queryElement(td, selector); + + if (!element && selector.split(/(?:>>|:>>)/).pop().includes('td:nth-child')) { + element = td; + } - if (!element) { - const tagOnlySelector = selector.split('.')[0]; - element = queryShadowDOM(td, tagOnlySelector); - } - - if (!element) { - let currentElement = td; - while (currentElement && currentElement.children.length > 0) { - let foundContentChild = false; - for (const child of currentElement.children) { - if (extractValue(child, attribute)) { - currentElement = child; - foundContentChild = true; - break; - } - } - if (!foundContentChild) break; - } - element = currentElement; - } - } - } else { - element = queryShadowDOM(currentRow, selector); - } - - if (element) { - record[label] = extractValue(element, attribute); - } - } + if (!element) { + const tagOnlySelector = selector.split('.')[0]; + element = queryElement(td, tagOnlySelector); + } + + if (!element) { + let currentElement = td; + while (currentElement && currentElement.children.length > 0) { + let foundContentChild = false; + for (const child of currentElement.children) { + if (extractValue(child, attribute)) { + currentElement = child; + foundContentChild = true; + break; + } + } + if (!foundContentChild) break; + } + element = currentElement; + } + } + } else { + element = queryElement(currentRow, selector); + } + + if (element) { + record[label] = extractValue(element, attribute); + } + } - if (Object.keys(record).length > 0) { - tableData.push(record); - } - } - } - } + if (Object.keys(record).length > 0) { + tableData.push(record); + } + } + } + } } - // Non-table data scraping remains unchanged + // Process non-table data with both contexts support for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) { - if (nonTableData.length >= limit) break; + if (nonTableData.length >= limit) break; - const container = containers[containerIndex]; - const { nonTableFields } = containerFields[containerIndex]; + const container = containers[containerIndex]; + const { nonTableFields } = containerFields[containerIndex]; - if (Object.keys(nonTableFields).length > 0) { - const record = {}; + if (Object.keys(nonTableFields).length > 0) { + const record = {}; - for (const [label, { selector, attribute }] of Object.entries(nonTableFields)) { - const relativeSelector = selector.split('>>').slice(-1)[0]; - const element = queryShadowDOM(container, relativeSelector); - - if (element) { - record[label] = extractValue(element, attribute); - } - } - - if (Object.keys(record).length > 0) { - nonTableData.push(record); - } - } + for (const [label, { selector, attribute }] of Object.entries(nonTableFields)) { + // Get the last part of the selector after any context delimiter + const relativeSelector = selector.split(/(?:>>|:>>)/).slice(-1)[0]; + const element = queryElement(container, relativeSelector); + + if (element) { + record[label] = extractValue(element, attribute); + } + } + + if (Object.keys(record).length > 0) { + nonTableData.push(record); + } + } } // Merge and limit the results const scrapedData = [...tableData, ...nonTableData]; return scrapedData; -}; + }; /** * Gets all children of the elements matching the listSelector, diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 3cef8c29..e09ac5d5 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -680,11 +680,25 @@ export default class Interpreter extends EventEmitter { return workflow; } + private removeSpecialSelectors(workflow: Workflow) { + for (let actionId = workflow.length - 1; actionId >= 0; actionId--) { + const step = workflow[actionId]; + + if (step.where && Array.isArray(step.where.selectors)) { + // Filter out if selector has EITHER ":>>" OR ">>" + step.where.selectors = step.where.selectors.filter(selector => + !(selector.includes(':>>') || selector.includes('>>')) + ); + } + } + + return workflow; + } + private async runLoop(p: Page, workflow: Workflow) { let workflowCopy: Workflow = JSON.parse(JSON.stringify(workflow)); - // remove shadow selectors - workflowCopy = this.removeShadowSelectors(workflowCopy); + workflowCopy = this.removeSpecialSelectors(workflowCopy); // apply ad-blocker to the current page try { diff --git a/server/src/types/index.ts b/server/src/types/index.ts index 151e3dd4..75aac802 100644 --- a/server/src/types/index.ts +++ b/server/src/types/index.ts @@ -129,6 +129,12 @@ export interface BaseActionInfo { hasOnlyText: boolean; } + +interface IframeSelector { + full: string; + isIframe: boolean; +} + interface ShadowSelector { full: string; mode: string; @@ -148,7 +154,8 @@ export interface Selectors { hrefSelector: string|null; accessibilitySelector: string|null; formSelector: string|null; - shadowSelector: ShadowSelector | null; + iframeSelector: IframeSelector|null; + shadowSelector: ShadowSelector|null; } /** diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 82e50d9f..82464e63 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -23,39 +23,86 @@ export const getElementInformation = async ( if (!getList || listSelector !== '') { const elementInfo = await page.evaluate( async ({ x, y }) => { - // Enhanced helper function to get element from point including shadow DOM const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { + // First, get the element at the clicked coordinates in the main document let element = document.elementFromPoint(x, y) as HTMLElement; if (!element) return null; - // Traverse through shadow roots - let current = element; - let shadowRoot = current.shadowRoot; - - // Keep track of the deepest shadow DOM element found - let deepestElement = current; - - while (shadowRoot) { - // Try to find element at same point in shadow DOM - const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; - if (!shadowElement || shadowElement === current) break; - - // Update our tracking of the deepest element - deepestElement = shadowElement; - current = shadowElement; - shadowRoot = current.shadowRoot; + // Track the deepest element found + let deepestElement = element; + + // Function to traverse shadow DOM + const traverseShadowDOM = (element: HTMLElement): HTMLElement => { + let current = element; + let shadowRoot = current.shadowRoot; + let deepest = current; + + while (shadowRoot) { + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement || shadowElement === current) break; + + deepest = shadowElement; + current = shadowElement; + shadowRoot = current.shadowRoot; + } + + return deepest; + }; + + // Handle iframe traversal + if (element.tagName === 'IFRAME') { + let currentIframe = element as HTMLIFrameElement; + + while (currentIframe) { + try { + // Convert coordinates to iframe's local space + const iframeRect = currentIframe.getBoundingClientRect(); + const iframeX = x - iframeRect.left; + const iframeY = y - iframeRect.top; + + const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document; + if (!iframeDocument) break; + + const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement; + if (!iframeElement) break; + + // Update deepest element and check for shadow DOM + deepestElement = traverseShadowDOM(iframeElement); + + // Continue traversing if we found another iframe + if (iframeElement.tagName === 'IFRAME') { + currentIframe = iframeElement as HTMLIFrameElement; + } else { + break; + } + } catch (error) { + console.warn('Cannot access iframe content:', error); + break; + } + } + } else { + // If not an iframe, check for shadow DOM + deepestElement = traverseShadowDOM(element); } return deepestElement; }; + // Get the element and its iframe path const el = getDeepestElementFromPoint(x, y); + if (el) { + // Handle potential anchor parent const { parentElement } = el; - const element = parentElement?.tagName === 'A' ? parentElement : el; + const targetElement = parentElement?.tagName === 'A' ? parentElement : el; + + // Get containing context information + const ownerDocument = targetElement.ownerDocument; + const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement; + const isIframeContent = Boolean(frameElement); // Get the containing shadow root if any - const containingShadowRoot = element.getRootNode() as ShadowRoot; + const containingShadowRoot = targetElement.getRootNode() as ShadowRoot; const isShadowRoot = containingShadowRoot instanceof ShadowRoot; let info: { @@ -67,37 +114,76 @@ export const getElementInformation = async ( attributes?: Record; innerHTML?: string; outerHTML?: string; + isIframeContent?: boolean; + iframeURL?: string; + iframeIndex?: number; + frameHierarchy?: string[]; isShadowRoot?: boolean; shadowRootMode?: string; shadowRootContent?: string; } = { - tagName: element?.tagName ?? '', - isShadowRoot: isShadowRoot + tagName: targetElement?.tagName ?? '', + isIframeContent, + isShadowRoot }; + if (isIframeContent) { + // Include iframe specific information + info.iframeURL = frameElement.src; + + // Calculate the frame's position in the hierarchy + let currentFrame = frameElement; + const frameHierarchy: string[] = []; + let frameIndex = 0; + + while (currentFrame) { + // Store the frame's identifier (src, id, or index) + frameHierarchy.unshift( + currentFrame.id || + currentFrame.src || + `iframe[${frameIndex}]` + ); + + // Move up to parent frame if it exists + const parentDoc = currentFrame.ownerDocument; + currentFrame = parentDoc?.defaultView?.frameElement as HTMLIFrameElement; + frameIndex++; + } + + info.frameHierarchy = frameHierarchy; + info.iframeIndex = frameIndex - 1; // Adjust for 0-based index + } + if (isShadowRoot) { // Include shadow root specific information info.shadowRootMode = containingShadowRoot.mode; info.shadowRootContent = containingShadowRoot.innerHTML; } - // Get attributes including those from shadow DOM context - if (element) { - info.attributes = Array.from(element.attributes).reduce( + // Collect element attributes and properties + if (targetElement) { + info.attributes = Array.from(targetElement.attributes).reduce( (acc, attr) => { acc[attr.name] = attr.value; return acc; }, {} as Record ); - - // Get text content considering shadow DOM context - info.innerText = element.textContent ?? ''; - info.innerHTML = element.innerHTML; - info.outerHTML = element.outerHTML; - info.hasOnlyText = element.children.length === 0 && - (element.textContent !== null && - element.textContent.trim().length > 0); + + if (targetElement.tagName === 'A') { + info.url = (targetElement as HTMLAnchorElement).href; + info.innerText = targetElement.textContent ?? ''; + } else if (targetElement.tagName === 'IMG') { + info.imageUrl = (targetElement as HTMLImageElement).src; + } else { + info.hasOnlyText = targetElement.children.length === 0 && + (targetElement.textContent !== null && + targetElement.textContent.trim().length > 0); + info.innerText = targetElement.textContent ?? ''; + } + + info.innerHTML = targetElement.innerHTML; + info.outerHTML = targetElement.outerHTML; } return info; @@ -112,27 +198,67 @@ export const getElementInformation = async ( async ({ x, y }) => { // Enhanced helper function to get element from point including shadow DOM const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { + // First, get the element at the clicked coordinates in the main document let element = document.elementFromPoint(x, y) as HTMLElement; if (!element) return null; - - // Traverse through shadow roots - let current = element; - let shadowRoot = current.shadowRoot; - - // Keep track of the deepest shadow DOM element found - let deepestElement = current; - - while (shadowRoot) { - // Try to find element at same point in shadow DOM - const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; - if (!shadowElement || shadowElement === current) break; - - // Update our tracking of the deepest element - deepestElement = shadowElement; - current = shadowElement; - shadowRoot = current.shadowRoot; + + // Track the deepest element found + let deepestElement = element; + + // Function to traverse shadow DOM + const traverseShadowDOM = (element: HTMLElement): HTMLElement => { + let current = element; + let shadowRoot = current.shadowRoot; + let deepest = current; + + while (shadowRoot) { + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement || shadowElement === current) break; + + deepest = shadowElement; + current = shadowElement; + shadowRoot = current.shadowRoot; + } + + return deepest; + }; + + // Handle iframe traversal + if (element.tagName === 'IFRAME') { + let currentIframe = element as HTMLIFrameElement; + + while (currentIframe) { + try { + // Convert coordinates to iframe's local space + const iframeRect = currentIframe.getBoundingClientRect(); + const iframeX = x - iframeRect.left; + const iframeY = y - iframeRect.top; + + const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document; + if (!iframeDocument) break; + + const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement; + if (!iframeElement) break; + + // Update deepest element and check for shadow DOM + deepestElement = traverseShadowDOM(iframeElement); + + // Continue traversing if we found another iframe + if (iframeElement.tagName === 'IFRAME') { + currentIframe = iframeElement as HTMLIFrameElement; + } else { + break; + } + } catch (error) { + console.warn('Cannot access iframe content:', error); + break; + } + } + } else { + // If not an iframe, check for shadow DOM + deepestElement = traverseShadowDOM(element); } - + return deepestElement; }; @@ -181,10 +307,13 @@ export const getElementInformation = async ( } } - // Get the containing shadow root if any + const ownerDocument = element.ownerDocument; + const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement; + const isIframeContent = Boolean(frameElement); + const containingShadowRoot = element.getRootNode() as ShadowRoot; const isShadowRoot = containingShadowRoot instanceof ShadowRoot; - + let info: { tagName: string; hasOnlyText?: boolean; @@ -194,12 +323,44 @@ export const getElementInformation = async ( attributes?: Record; innerHTML?: string; outerHTML?: string; + isIframeContent?: boolean; + iframeURL?: string; + iframeIndex?: number; + frameHierarchy?: string[]; isShadowRoot?: boolean; shadowRootMode?: string; shadowRootContent?: string; } = { tagName: element?.tagName ?? '', - isShadowRoot: isShadowRoot + isIframeContent, + isShadowRoot + }; + + if (isIframeContent) { + // Include iframe specific information + info.iframeURL = frameElement.src; + + // Calculate the frame's position in the hierarchy + let currentFrame = frameElement; + const frameHierarchy: string[] = []; + let frameIndex = 0; + + while (currentFrame) { + // Store the frame's identifier (src, id, or index) + frameHierarchy.unshift( + currentFrame.id || + currentFrame.src || + `iframe[${frameIndex}]` + ); + + // Move up to parent frame if it exists + const parentDoc = currentFrame.ownerDocument; + currentFrame = parentDoc?.defaultView?.frameElement as HTMLIFrameElement; + frameIndex++; + } + + info.frameHierarchy = frameHierarchy; + info.iframeIndex = frameIndex - 1; // Adjust for 0-based index }; if (isShadowRoot) { @@ -256,27 +417,67 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector if (!getList || listSelector !== '') { const rect = await page.evaluate( async ({ x, y }) => { - // Enhanced helper function to get element from point including shadow DOM + // Enhanced helper function to get element from point including iframes const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { + // First, get the element at the clicked coordinates in the main document let element = document.elementFromPoint(x, y) as HTMLElement; if (!element) return null; - // Traverse through shadow roots - let current = element; - let shadowRoot = current.shadowRoot; - - // Keep track of the deepest shadow DOM element found - let deepestElement = current; - - while (shadowRoot) { - // Try to find element at same point in shadow DOM - const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; - if (!shadowElement || shadowElement === current) break; - - // Update our tracking of the deepest element - deepestElement = shadowElement; - current = shadowElement; - shadowRoot = current.shadowRoot; + // Track the deepest element found + let deepestElement = element; + + // Function to traverse shadow DOM + const traverseShadowDOM = (element: HTMLElement): HTMLElement => { + let current = element; + let shadowRoot = current.shadowRoot; + let deepest = current; + + while (shadowRoot) { + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement || shadowElement === current) break; + + deepest = shadowElement; + current = shadowElement; + shadowRoot = current.shadowRoot; + } + + return deepest; + }; + + // Handle iframe traversal + if (element.tagName === 'IFRAME') { + let currentIframe = element as HTMLIFrameElement; + + while (currentIframe) { + try { + // Convert coordinates to iframe's local space + const iframeRect = currentIframe.getBoundingClientRect(); + const iframeX = x - iframeRect.left; + const iframeY = y - iframeRect.top; + + const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document; + if (!iframeDocument) break; + + const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement; + if (!iframeElement) break; + + // Update deepest element and check for shadow DOM + deepestElement = traverseShadowDOM(iframeElement); + + // Continue traversing if we found another iframe + if (iframeElement.tagName === 'IFRAME') { + currentIframe = iframeElement as HTMLIFrameElement; + } else { + break; + } + } catch (error) { + console.warn('Cannot access iframe content:', error); + break; + } + } + } else { + // If not an iframe, check for shadow DOM + deepestElement = traverseShadowDOM(element); } return deepestElement; @@ -288,16 +489,53 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector const element = parentElement?.tagName === 'A' ? parentElement : el; const rectangle = element?.getBoundingClientRect(); if (rectangle) { - return { - x: rectangle.x, - y: rectangle.y, - width: rectangle.width, - height: rectangle.height, - top: rectangle.top, - right: rectangle.right, - bottom: rectangle.bottom, - left: rectangle.left, - }; + const createRectObject = (rect: DOMRect) => ({ + x: rect.x, + y: rect.y, + width: rect.width, + height: rect.height, + top: rect.top, + right: rect.right, + bottom: rect.bottom, + left: rect.left, + toJSON() { + return { + x: this.x, + y: this.y, + width: this.width, + height: this.height, + top: this.top, + right: this.right, + bottom: this.bottom, + left: this.left + }; + } + }); + + // For elements inside iframes, adjust coordinates relative to the top window + let adjustedRect = createRectObject(rectangle); + let currentWindow = element.ownerDocument.defaultView; + + while (currentWindow !== window.top) { + const frameElement = currentWindow?.frameElement as HTMLIFrameElement; + if (!frameElement) break; + + const frameRect = frameElement.getBoundingClientRect(); + adjustedRect = createRectObject({ + x: adjustedRect.x + frameRect.x, + y: adjustedRect.y + frameRect.y, + width: adjustedRect.width, + height: adjustedRect.height, + top: adjustedRect.top + frameRect.top, + right: adjustedRect.right + frameRect.left, + bottom: adjustedRect.bottom + frameRect.top, + left: adjustedRect.left + frameRect.left, + } as DOMRect); + + currentWindow = frameElement.ownerDocument.defaultView; + } + + return adjustedRect; } } return null; @@ -308,27 +546,66 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector } else { const rect = await page.evaluate( async ({ x, y }) => { - // Enhanced helper function to get element from point including shadow DOM const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { + // First, get the element at the clicked coordinates in the main document let element = document.elementFromPoint(x, y) as HTMLElement; if (!element) return null; - // Traverse through shadow roots - let current = element; - let shadowRoot = current.shadowRoot; - - // Keep track of the deepest shadow DOM element found - let deepestElement = current; - - while (shadowRoot) { - // Try to find element at same point in shadow DOM - const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; - if (!shadowElement || shadowElement === current) break; - - // Update our tracking of the deepest element - deepestElement = shadowElement; - current = shadowElement; - shadowRoot = current.shadowRoot; + // Track the deepest element found + let deepestElement = element; + + // Function to traverse shadow DOM + const traverseShadowDOM = (element: HTMLElement): HTMLElement => { + let current = element; + let shadowRoot = current.shadowRoot; + let deepest = current; + + while (shadowRoot) { + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement || shadowElement === current) break; + + deepest = shadowElement; + current = shadowElement; + shadowRoot = current.shadowRoot; + } + + return deepest; + }; + + // Handle iframe traversal + if (element.tagName === 'IFRAME') { + let currentIframe = element as HTMLIFrameElement; + + while (currentIframe) { + try { + // Convert coordinates to iframe's local space + const iframeRect = currentIframe.getBoundingClientRect(); + const iframeX = x - iframeRect.left; + const iframeY = y - iframeRect.top; + + const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document; + if (!iframeDocument) break; + + const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement; + if (!iframeElement) break; + + // Update deepest element and check for shadow DOM + deepestElement = traverseShadowDOM(iframeElement); + + // Continue traversing if we found another iframe + if (iframeElement.tagName === 'IFRAME') { + currentIframe = iframeElement as HTMLIFrameElement; + } else { + break; + } + } catch (error) { + console.warn('Cannot access iframe content:', error); + break; + } + } + } else { + // If not an iframe, check for shadow DOM + deepestElement = traverseShadowDOM(element); } return deepestElement; @@ -381,16 +658,53 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector const rectangle = element?.getBoundingClientRect(); if (rectangle) { - return { - x: rectangle.x, - y: rectangle.y, - width: rectangle.width, - height: rectangle.height, - top: rectangle.top, - right: rectangle.right, - bottom: rectangle.bottom, - left: rectangle.left, - }; + const createRectObject = (rect: DOMRect) => ({ + x: rect.x, + y: rect.y, + width: rect.width, + height: rect.height, + top: rect.top, + right: rect.right, + bottom: rect.bottom, + left: rect.left, + toJSON() { + return { + x: this.x, + y: this.y, + width: this.width, + height: this.height, + top: this.top, + right: this.right, + bottom: this.bottom, + left: this.left + }; + } + }); + + // Same coordinate adjustment for iframe elements as above + let adjustedRect = createRectObject(rectangle); + let currentWindow = element.ownerDocument.defaultView; + + while (currentWindow !== window.top) { + const frameElement = currentWindow?.frameElement as HTMLIFrameElement; + if (!frameElement) break; + + const frameRect = frameElement.getBoundingClientRect(); + adjustedRect = createRectObject({ + x: adjustedRect.x + frameRect.x, + y: adjustedRect.y + frameRect.y, + width: adjustedRect.width, + height: adjustedRect.height, + top: adjustedRect.top + frameRect.top, + right: adjustedRect.right + frameRect.left, + bottom: adjustedRect.bottom + frameRect.top, + left: adjustedRect.left + frameRect.left, + } as DOMRect); + + currentWindow = frameElement.ownerDocument.defaultView; + } + + return adjustedRect; } } return null; @@ -875,32 +1189,146 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { } return output; } - - // const MAX_DEPTH = 10; - + const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { + // Helper function to traverse shadow DOM + const traverseShadowDOM = (element: HTMLElement, depth: number = 0): HTMLElement => { + const MAX_SHADOW_DEPTH = 4; + let current = element; + let deepest = current; + + while (current && depth < MAX_SHADOW_DEPTH) { + const shadowRoot = current.shadowRoot; + if (!shadowRoot) break; + + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement || shadowElement === current) break; + + deepest = shadowElement; + current = shadowElement; + depth++; + } + + return deepest; + }; + + // Start with the element at the specified coordinates let element = document.elementFromPoint(x, y) as HTMLElement; if (!element) return null; - let current = element; - let deepestElement = current; + // Initialize tracking variables + let deepestElement = element; let depth = 0; - const MAX_DEPTH = 4; // Limit to 2 levels of shadow DOM + const MAX_IFRAME_DEPTH = 4; - while (current && depth < MAX_DEPTH) { - const shadowRoot = current.shadowRoot; - if (!shadowRoot) break; + // First check if the initial element has a shadow root + deepestElement = traverseShadowDOM(element); - const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; - if (!shadowElement || shadowElement === current) break; - - deepestElement = shadowElement; - current = shadowElement; - depth++; + // If it's an iframe, traverse through iframe hierarchy + if (deepestElement.tagName === 'IFRAME') { + let currentIframe = deepestElement as HTMLIFrameElement; + + while (currentIframe && depth < MAX_IFRAME_DEPTH) { + try { + // Convert coordinates to iframe's local space + const iframeRect = currentIframe.getBoundingClientRect(); + const iframeX = x - iframeRect.left; + const iframeY = y - iframeRect.top; + + // Access iframe's document + const iframeDoc = currentIframe.contentDocument || currentIframe.contentWindow?.document; + if (!iframeDoc) break; + + // Get element at transformed coordinates in iframe + const iframeElement = iframeDoc.elementFromPoint(iframeX, iframeY) as HTMLElement; + if (!iframeElement) break; + + // Check for shadow DOM within iframe + const shadowResult = traverseShadowDOM(iframeElement); + deepestElement = shadowResult; + + // If we found another iframe, continue traversing + if (shadowResult.tagName === 'IFRAME') { + currentIframe = shadowResult as HTMLIFrameElement; + depth++; + } else { + break; + } + } catch (error) { + console.warn('Cannot access iframe content:', error); + break; + } + } } return deepestElement; }; + + const genSelectorForIframe = (element: HTMLElement) => { + // Helper function to get the complete iframe path up to document root + const getIframePath = (el: HTMLElement) => { + const path = []; + let current = el; + let depth = 0; + const MAX_DEPTH = 4; + + while (current && depth < MAX_DEPTH) { + // Get the owner document of the current element + const ownerDocument = current.ownerDocument; + + // Check if this document belongs to an iframe + const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement; + + if (frameElement) { + path.unshift({ + frame: frameElement, + document: ownerDocument, + element: current + }); + // Move up to the parent document's element (the iframe) + current = frameElement; + depth++; + } else { + break; + } + } + return path; + }; + + const iframePath = getIframePath(element); + if (iframePath.length === 0) return null; + + try { + const selectorParts: string[] = []; + + // Generate selector for each iframe boundary + iframePath.forEach((context, index) => { + // Get selector for the iframe element + const frameSelector = finder(context.frame, { + root: index === 0 ? document.body : + (iframePath[index - 1].document.body as Element) + }); + + // For the last context, get selector for target element + if (index === iframePath.length - 1) { + const elementSelector = finder(element, { + root: context.document.body as Element + }); + selectorParts.push(`${frameSelector} :>> ${elementSelector}`); + } else { + selectorParts.push(frameSelector); + } + }); + + return { + fullSelector: selectorParts.join(' :>> '), + isFrameContent: true + }; + } catch (e) { + console.warn('Error generating iframe selector:', e); + return null; + } + }; // Helper function to generate selectors for shadow DOM elements const genSelectorForShadowDOM = (element: HTMLElement) => { @@ -981,7 +1409,8 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { } catch (e) { } - // Generate shadow DOM specific selector + + const iframeSelector = genSelectorForIframe(element); const shadowSelector = genSelectorForShadowDOM(element); const hrefSelector = genSelectorForAttributes(element, ['href']); @@ -1030,7 +1459,10 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { hrefSelector, accessibilitySelector, formSelector, - // Shadow DOM selector + iframeSelector: iframeSelector ? { + full: iframeSelector.fullSelector, + isIframe: iframeSelector.isFrameContent, + } : null, shadowSelector: shadowSelector ? { full: shadowSelector.fullSelector, mode: shadowSelector.mode @@ -1079,7 +1511,8 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { return char.length === 1 && char.match(/[0-9]/); } - const hoveredElement = getDeepestElementFromPoint(x, y); + const hoveredElement = getDeepestElementFromPoint(x, y) as HTMLElement; + if ( hoveredElement != null && !hoveredElement.closest('#overlay-controls') != null @@ -1114,41 +1547,83 @@ interface SelectorResult { */ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates, listSelector: string): Promise => { - interface ShadowContext { - host: HTMLElement; - root: ShadowRoot; + interface DOMContext { + type: 'iframe' | 'shadow'; element: HTMLElement; + container: HTMLIFrameElement | ShadowRoot; + host?: HTMLElement; + document?: Document; } try { if (!listSelector) { const selectors = await page.evaluate(({ x, y }: { x: number, y: number }) => { - // Helper function to get deepest element, traversing shadow DOM - function getDeepestElementFromPoint(x: number, y: number): HTMLElement | null { + const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { + // Function to traverse shadow DOM + const traverseShadowDOM = (element: HTMLElement): HTMLElement => { + let current = element; + let deepest = current; + let shadowRoot = current.shadowRoot; + + while (shadowRoot) { + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement || shadowElement === current) break; + + deepest = shadowElement; + current = shadowElement; + shadowRoot = current.shadowRoot; + } + + return deepest; + }; + + // Start with the element at coordinates let element = document.elementFromPoint(x, y) as HTMLElement; if (!element) return null; - let current = element; - let deepestElement = current; + let deepestElement = element; let depth = 0; - const MAX_DEPTH = 4; // Limit shadow DOM traversal depth - - while (current && depth < MAX_DEPTH) { - const shadowRoot = current.shadowRoot; - if (!shadowRoot) break; - - const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; - if (!shadowElement || shadowElement === current) break; - - deepestElement = shadowElement; - current = shadowElement; - depth++; + const MAX_DEPTH = 4; + + // Handle iframe traversal + if (element.tagName === 'IFRAME') { + let currentIframe = element as HTMLIFrameElement; + + while (currentIframe && depth < MAX_DEPTH) { + try { + const iframeRect = currentIframe.getBoundingClientRect(); + const iframeX = x - iframeRect.left; + const iframeY = y - iframeRect.top; + + const iframeDoc = currentIframe.contentDocument || currentIframe.contentWindow?.document; + if (!iframeDoc) break; + + const iframeElement = iframeDoc.elementFromPoint(iframeX, iframeY) as HTMLElement; + if (!iframeElement) break; + + // Check for shadow DOM within iframe + deepestElement = traverseShadowDOM(iframeElement); + + if (deepestElement.tagName === 'IFRAME') { + currentIframe = deepestElement as HTMLIFrameElement; + depth++; + } else { + break; + } + } catch (error) { + console.warn('Cannot access iframe content:', error); + break; + } + } + } else { + // If not an iframe, check for shadow DOM + deepestElement = traverseShadowDOM(element); } return deepestElement; - } + }; - // Generate basic selector from element's tag and classes + // Basic selector generation function getNonUniqueSelector(element: HTMLElement): string { let selector = element.tagName.toLowerCase(); @@ -1172,69 +1647,87 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates return selector; } - // Get complete shadow DOM path for an element - function getShadowPath(element: HTMLElement): ShadowContext[] { - const path: ShadowContext[] = []; + + function getContextPath(element: HTMLElement): DOMContext[] { + const path: DOMContext[] = []; let current = element; let depth = 0; const MAX_DEPTH = 4; while (current && depth < MAX_DEPTH) { + // Check for shadow DOM const rootNode = current.getRootNode(); if (rootNode instanceof ShadowRoot) { path.unshift({ - host: rootNode.host as HTMLElement, - root: rootNode, - element: current + type: 'shadow', + element: current, + container: rootNode, + host: rootNode.host as HTMLElement }); current = rootNode.host as HTMLElement; depth++; - } else { - break; + continue; } + + // Check for iframe + const ownerDocument = current.ownerDocument; + const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement; + + if (frameElement) { + path.unshift({ + type: 'iframe', + element: current, + container: frameElement, + document: ownerDocument + }); + current = frameElement; + depth++; + continue; + } + + break; } + return path; } - // Generate complete selector path for any element function getSelectorPath(element: HTMLElement | null): string { if (!element) return ''; - // Check for shadow DOM path first - const shadowPath = getShadowPath(element); - if (shadowPath.length > 0) { + // Get the complete context path + const contextPath = getContextPath(element); + if (contextPath.length > 0) { const selectorParts: string[] = []; - // Build complete shadow DOM path - shadowPath.forEach((context, index) => { - const hostSelector = getNonUniqueSelector(context.host); + contextPath.forEach((context, index) => { + const containerSelector = getNonUniqueSelector( + context.type === 'shadow' ? context.host! : context.container as HTMLElement + ); - if (index === shadowPath.length - 1) { - // For deepest shadow context, include target element + if (index === contextPath.length - 1) { const elementSelector = getNonUniqueSelector(element); - selectorParts.push(`${hostSelector} >> ${elementSelector}`); + const delimiter = context.type === 'shadow' ? ' >> ' : ' :>> '; + selectorParts.push(`${containerSelector}${delimiter}${elementSelector}`); } else { - // For intermediate shadow boundaries - selectorParts.push(hostSelector); + selectorParts.push(containerSelector); } }); - return selectorParts.join(' >> '); + return selectorParts.join(contextPath[0].type === 'shadow' ? ' >> ' : ' :>> '); } // Regular DOM path generation const path: string[] = []; let currentElement = element; - let depth = 0; const MAX_DEPTH = 2; + let depth = 0; while (currentElement && currentElement !== document.body && depth < MAX_DEPTH) { const selector = getNonUniqueSelector(currentElement); path.unshift(selector); - const parentElement = currentElement.parentElement; - if (!parentElement) break; - currentElement = parentElement; + if (!currentElement.parentElement) break; + currentElement = currentElement.parentElement; depth++; } @@ -1297,30 +1790,79 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates } else { // When we have a list selector, we need special handling while maintaining shadow DOM support const selectors = await page.evaluate(({ x, y }: { x: number, y: number }) => { - // Helper function to get deepest element, traversing shadow DOM - function getDeepestElementFromPoint(x: number, y: number): HTMLElement | null { + const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { + // Helper function to traverse shadow DOM + const traverseShadowDOM = (element: HTMLElement, depth: number = 0): HTMLElement => { + const MAX_SHADOW_DEPTH = 4; + let current = element; + let deepest = current; + + while (current && depth < MAX_SHADOW_DEPTH) { + const shadowRoot = current.shadowRoot; + if (!shadowRoot) break; + + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement || shadowElement === current) break; + + deepest = shadowElement; + current = shadowElement; + depth++; + } + + return deepest; + }; + + // Start with the element at the specified coordinates let element = document.elementFromPoint(x, y) as HTMLElement; if (!element) return null; - - let current = element; - let deepestElement = current; + + // Initialize tracking variables + let deepestElement = element; let depth = 0; - const MAX_DEPTH = 4; - - while (current && depth < MAX_DEPTH) { - const shadowRoot = current.shadowRoot; - if (!shadowRoot) break; + const MAX_IFRAME_DEPTH = 4; + + // First check if the initial element has a shadow root + deepestElement = traverseShadowDOM(element); + + // If it's an iframe, traverse through iframe hierarchy + if (deepestElement.tagName === 'IFRAME') { + let currentIframe = deepestElement as HTMLIFrameElement; - const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; - if (!shadowElement || shadowElement === current) break; - - deepestElement = shadowElement; - current = shadowElement; - depth++; + while (currentIframe && depth < MAX_IFRAME_DEPTH) { + try { + // Convert coordinates to iframe's local space + const iframeRect = currentIframe.getBoundingClientRect(); + const iframeX = x - iframeRect.left; + const iframeY = y - iframeRect.top; + + // Access iframe's document + const iframeDoc = currentIframe.contentDocument || currentIframe.contentWindow?.document; + if (!iframeDoc) break; + + // Get element at transformed coordinates in iframe + const iframeElement = iframeDoc.elementFromPoint(iframeX, iframeY) as HTMLElement; + if (!iframeElement) break; + + // Check for shadow DOM within iframe + const shadowResult = traverseShadowDOM(iframeElement); + deepestElement = shadowResult; + + // If we found another iframe, continue traversing + if (shadowResult.tagName === 'IFRAME') { + currentIframe = shadowResult as HTMLIFrameElement; + depth++; + } else { + break; + } + } catch (error) { + console.warn('Cannot access iframe content:', error); + break; + } + } } - + return deepestElement; - } + }; // Generate basic selector from element's tag and classes function getNonUniqueSelector(element: HTMLElement): string { @@ -1333,9 +1875,9 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates } if (element.className) { - const classes = element.className.split(/\s+/).filter((cls: string) => Boolean(cls)); + const classes = element.className.split(/\s+/).filter(Boolean); if (classes.length > 0) { - const validClasses = classes.filter((cls: string) => !cls.startsWith('!') && !cls.includes(':')); + const validClasses = classes.filter(cls => !cls.startsWith('!') && !cls.includes(':')); if (validClasses.length > 0) { selector += '.' + validClasses.map(cls => CSS.escape(cls)).join('.'); } @@ -1345,60 +1887,82 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates return selector; } - // Get complete shadow DOM path for an element - function getShadowPath(element: HTMLElement): ShadowContext[] { - const path: ShadowContext[] = []; + // Get complete context path (both iframe and shadow DOM) + function getContextPath(element: HTMLElement): DOMContext[] { + const path: DOMContext[] = []; let current = element; let depth = 0; const MAX_DEPTH = 4; while (current && depth < MAX_DEPTH) { + // Check for shadow DOM const rootNode = current.getRootNode(); if (rootNode instanceof ShadowRoot) { path.unshift({ - host: rootNode.host as HTMLElement, - root: rootNode, - element: current + type: 'shadow', + element: current, + container: rootNode, + host: rootNode.host as HTMLElement }); current = rootNode.host as HTMLElement; depth++; - } else { - break; + continue; } + + // Check for iframe + const ownerDocument = current.ownerDocument; + const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement; + + if (frameElement) { + path.unshift({ + type: 'iframe', + element: current, + container: frameElement, + document: ownerDocument + }); + current = frameElement; + depth++; + continue; + } + + break; } + return path; } - // Generate selector path specifically for list items - function getListItemSelectorPath(element: HTMLElement | null): string { + function getSelectorPath(element: HTMLElement | null): string { if (!element) return ''; - // Check for shadow DOM path first - const shadowPath = getShadowPath(element); - if (shadowPath.length > 0) { + // Get the complete context path + const contextPath = getContextPath(element); + if (contextPath.length > 0) { const selectorParts: string[] = []; - shadowPath.forEach((context, index) => { - const hostSelector = getNonUniqueSelector(context.host); + contextPath.forEach((context, index) => { + const containerSelector = getNonUniqueSelector( + context.type === 'shadow' ? context.host! : context.container as HTMLElement + ); - if (index === shadowPath.length - 1) { + if (index === contextPath.length - 1) { const elementSelector = getNonUniqueSelector(element); - selectorParts.push(`${hostSelector} >> ${elementSelector}`); + const delimiter = context.type === 'shadow' ? ' >> ' : ' :>> '; + selectorParts.push(`${containerSelector}${delimiter}${elementSelector}`); } else { - selectorParts.push(hostSelector); + selectorParts.push(containerSelector); } }); - return selectorParts.join(' >> '); + return selectorParts.join(contextPath[0].type === 'shadow' ? ' >> ' : ' :>> '); } - // For list items, we want a shallower path to better match list patterns + // Regular DOM path generation const path: string[] = []; let currentElement = element; + const MAX_DEPTH = 2; let depth = 0; - const MAX_LIST_DEPTH = 2; // Keeping shallow depth for list items - while (currentElement && currentElement !== document.body && depth < MAX_LIST_DEPTH) { + while (currentElement && currentElement !== document.body && depth < MAX_DEPTH) { const selector = getNonUniqueSelector(currentElement); path.unshift(selector); @@ -1410,13 +1974,12 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates return path.join(' > '); } - // Main logic for list item selection const originalEl = getDeepestElementFromPoint(x, y); if (!originalEl) return { generalSelector: '' }; let element = originalEl; - const generalSelector = getListItemSelectorPath(element); + const generalSelector = getSelectorPath(element); return { generalSelector }; }, coordinates); @@ -1459,35 +2022,59 @@ export const getChildSelectors = async (page: Page, parentSelector: string): Pro function getSelectorPath(element: HTMLElement): string { if (!element || !element.parentElement) return ''; - const parentSelector = getNonUniqueSelector(element.parentElement); const elementSelector = getNonUniqueSelector(element); - - // Check if element is in shadow DOM + + // Check for shadow DOM context const rootNode = element.getRootNode(); if (rootNode instanceof ShadowRoot) { const hostSelector = getNonUniqueSelector(rootNode.host as HTMLElement); return `${hostSelector} >> ${elementSelector}`; } + // Check for iframe context + const ownerDocument = element.ownerDocument; + const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement; + if (frameElement) { + const frameSelector = getNonUniqueSelector(frameElement); + return `${frameSelector} :>> ${elementSelector}`; + } + + // Regular DOM context + const parentSelector = getNonUniqueSelector(element.parentElement); return `${parentSelector} > ${elementSelector}`; } - // Function to get all shadow DOM children of an element - function getShadowChildren(element: HTMLElement): HTMLElement[] { + + // Function to get all children from special contexts + function getSpecialContextChildren(element: HTMLElement): HTMLElement[] { const children: HTMLElement[] = []; - // Check if element has shadow root + // Get shadow DOM children const shadowRoot = element.shadowRoot; if (shadowRoot) { - // Get all elements in the shadow DOM const shadowElements = Array.from(shadowRoot.querySelectorAll('*')) as HTMLElement[]; children.push(...shadowElements); } + // Get iframe children + const iframes = Array.from(element.querySelectorAll('iframe')) as HTMLIFrameElement[]; + for (const iframe of iframes) { + try { + const iframeDoc = iframe.contentDocument || iframe.contentWindow?.document; + if (iframeDoc) { + const iframeElements = Array.from(iframeDoc.querySelectorAll('*')) as HTMLElement[]; + children.push(...iframeElements); + } + } catch (error) { + console.warn('Cannot access iframe content:', error); + continue; + } + } + return children; } - // Function to recursively get all descendant selectors including shadow DOM + // Function to recursively get all descendant selectors including shadow DOM and iframes function getAllDescendantSelectors(element: HTMLElement): string[] { let selectors: string[] = []; @@ -1497,53 +2084,77 @@ export const getChildSelectors = async (page: Page, parentSelector: string): Pro const childPath = getSelectorPath(child); if (childPath) { selectors.push(childPath); - // Recursively process regular DOM descendants + + // Process regular descendants selectors = selectors.concat(getAllDescendantSelectors(child)); - // Check for shadow DOM in this child - const shadowChildren = getShadowChildren(child); - for (const shadowChild of shadowChildren) { - const shadowPath = getSelectorPath(shadowChild); - if (shadowPath) { - selectors.push(shadowPath); - // Recursively process shadow DOM descendants - selectors = selectors.concat(getAllDescendantSelectors(shadowChild)); + // Process special context children (shadow DOM and iframes) + const specialChildren = getSpecialContextChildren(child); + for (const specialChild of specialChildren) { + const specialPath = getSelectorPath(specialChild); + if (specialPath) { + selectors.push(specialPath); + selectors = selectors.concat(getAllDescendantSelectors(specialChild)); } } } } - // Handle direct shadow DOM children of the current element - const shadowChildren = getShadowChildren(element); - for (const shadowChild of shadowChildren) { - const shadowPath = getSelectorPath(shadowChild); - if (shadowPath) { - selectors.push(shadowPath); - selectors = selectors.concat(getAllDescendantSelectors(shadowChild)); + // Handle direct special context children + const specialChildren = getSpecialContextChildren(element); + for (const specialChild of specialChildren) { + const specialPath = getSelectorPath(specialChild); + if (specialPath) { + selectors.push(specialPath); + selectors = selectors.concat(getAllDescendantSelectors(specialChild)); } } return selectors; } - // Split the parent selector if it contains shadow DOM parts - const selectorParts = parentSelector.split('>>').map(part => part.trim()); + // Handle both shadow DOM and iframe parent selectors let parentElements: HTMLElement[] = []; - - // Handle shadow DOM traversal if needed - if (selectorParts.length > 1) { - // Start with the host elements + + // Check for special context traversal in parent selector + if (parentSelector.includes('>>') || parentSelector.includes(':>>')) { + // Split by both types of delimiters + const selectorParts = parentSelector.split(/(?:>>|:>>)/).map(part => part.trim()); + + // Start with initial elements parentElements = Array.from(document.querySelectorAll(selectorParts[0])) as HTMLElement[]; - // Traverse through shadow DOM parts + // Traverse through parts for (let i = 1; i < selectorParts.length; i++) { const newParentElements: HTMLElement[] = []; + for (const element of parentElements) { + // Check for shadow DOM if (element.shadowRoot) { - const shadowChildren = Array.from(element.shadowRoot.querySelectorAll(selectorParts[i])) as HTMLElement[]; + const shadowChildren = Array.from( + element.shadowRoot.querySelectorAll(selectorParts[i]) + ) as HTMLElement[]; newParentElements.push(...shadowChildren); } + + // Check for iframe + if (element.tagName === 'IFRAME') { + try { + const iframeDoc = (element as HTMLIFrameElement).contentDocument || + (element as HTMLIFrameElement).contentWindow?.document; + if (iframeDoc) { + const iframeChildren = Array.from( + iframeDoc.querySelectorAll(selectorParts[i]) + ) as HTMLElement[]; + newParentElements.push(...iframeChildren); + } + } catch (error) { + console.warn('Cannot access iframe content during traversal:', error); + continue; + } + } } + parentElements = newParentElements; } } else { @@ -1551,7 +2162,7 @@ export const getChildSelectors = async (page: Page, parentSelector: string): Pro parentElements = Array.from(document.querySelectorAll(parentSelector)) as HTMLElement[]; } - const allChildSelectors = new Set(); + const allChildSelectors = new Set(); // Use a set to ensure uniqueness // Process each parent element and its descendants parentElements.forEach((parentElement) => { diff --git a/server/src/workflow-management/utils.ts b/server/src/workflow-management/utils.ts index 4f747127..03d07211 100644 --- a/server/src/workflow-management/utils.ts +++ b/server/src/workflow-management/utils.ts @@ -13,6 +13,10 @@ export const getBestSelectorForAction = (action: Action) => { case ActionType.DragAndDrop: { const selectors = action.selectors; + + if (selectors?.iframeSelector?.full) { + return selectors.iframeSelector.full; + if (selectors?.shadowSelector?.full) { return selectors.shadowSelector.full; } diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index ad58a309..5a49a89f 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -12,6 +12,7 @@ import { useTranslation } from 'react-i18next'; interface ElementInfo { tagName: string; hasOnlyText?: boolean; + isIframeContent?: boolean; isShadowRoot?: boolean; innerText?: string; url?: string; @@ -117,24 +118,47 @@ export const BrowserWindow = () => { }, [screenShot, canvasRef, socket, screencastHandler]); const highlighterHandler = useCallback((data: { rect: DOMRect, selector: string, elementInfo: ElementInfo | null, childSelectors?: string[] }) => { + console.log("LIST SELECTOR", listSelector); + console.log("DATA SELECTOR", data.selector); + console.log("CHILD SELECTORS", data.childSelectors); if (getList === true) { if (listSelector) { socket?.emit('listSelector', { selector: listSelector }); - const hasValidChildSelectors = Array.isArray(data.childSelectors) && data.childSelectors.length > 0; if (limitMode) { setHighlighterData(null); } else if (paginationMode) { - // only set highlighterData if type is not empty, 'none', 'scrollDown', or 'scrollUp' + // Only set highlighterData if type is not empty, 'none', 'scrollDown', or 'scrollUp' if (paginationType !== '' && !['none', 'scrollDown', 'scrollUp'].includes(paginationType)) { setHighlighterData(data); } else { setHighlighterData(null); } } else if (data.childSelectors && data.childSelectors.includes(data.selector)) { - // highlight only valid child elements within the listSelector + // Highlight only valid child elements within the listSelector setHighlighterData(data); + } else if (data.elementInfo?.isIframeContent && data.childSelectors) { + // Handle pure iframe elements - similar to previous shadow DOM logic but using iframe syntax + // Check if the selector matches any iframe child selectors + const isIframeChild = data.childSelectors.some(childSelector => + data.selector.includes(':>>') && // Iframe uses :>> for traversal + childSelector.split(':>>').some(part => + data.selector.includes(part.trim()) + ) + ); + setHighlighterData(isIframeChild ? data : null); + } else if (data.selector.includes(':>>') && hasValidChildSelectors) { + // Handle mixed DOM cases with iframes + // Split the selector into parts and check each against child selectors + const selectorParts = data.selector.split(':>>').map(part => part.trim()); + const isValidMixedSelector = selectorParts.some(part => + // We know data.childSelectors is defined due to hasValidChildSelectors check + data.childSelectors!.some(childSelector => + childSelector.includes(part) + ) + ); + setHighlighterData(isValidMixedSelector ? data : null); } else if (data.elementInfo?.isShadowRoot && data.childSelectors) { // New case: Handle pure Shadow DOM elements // Check if the selector matches any shadow root child selectors @@ -145,7 +169,7 @@ export const BrowserWindow = () => { ) ); setHighlighterData(isShadowChild ? data : null); - } else if (data.selector.includes('>>') && hasValidChildSelectors) { + } else if (data.selector.includes('>>') && hasValidChildSelectors) { // New case: Handle mixed DOM cases // Split the selector into parts and check each against child selectors const selectorParts = data.selector.split('>>').map(part => part.trim()); @@ -156,19 +180,18 @@ export const BrowserWindow = () => { ) ); setHighlighterData(isValidMixedSelector ? data : null); - } - else { + } else { // if !valid child in normal mode, clear the highlighter setHighlighterData(null); - } - } else { - // set highlighterData for the initial listSelector selection + } + } else { + // Set highlighterData for the initial listSelector selection setHighlighterData(data); - } - } else { - // for non-list steps + } + } else { + // For non-list steps setHighlighterData(data); - } + } }, [highlighterData, getList, socket, listSelector, paginationMode, paginationType, captureStage]);