diff --git a/README.md b/README.md index cebcedd2..47e170b5 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web

- Website | + Website | Discord | Twitter | Join Maxun Cloud | diff --git a/docker-compose.yml b/docker-compose.yml index 874e48d6..6506a4c5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -43,7 +43,7 @@ services: #build: #context: . #dockerfile: server/Dockerfile - image: getmaxun/maxun-backend:v0.0.9 + image: getmaxun/maxun-backend:v0.0.10 ports: - "${BACKEND_PORT:-8080}:${BACKEND_PORT:-8080}" env_file: .env @@ -70,7 +70,7 @@ services: #build: #context: . #dockerfile: Dockerfile - image: getmaxun/maxun-frontend:v0.0.5 + image: getmaxun/maxun-frontend:v0.0.6 ports: - "${FRONTEND_PORT:-5173}:${FRONTEND_PORT:-5173}" env_file: .env diff --git a/maxun-core/package.json b/maxun-core/package.json index 7c92d08e..ddaaa510 100644 --- a/maxun-core/package.json +++ b/maxun-core/package.json @@ -1,6 +1,6 @@ { "name": "maxun-core", - "version": "0.0.7", + "version": "0.0.8", "description": "Core package for Maxun, responsible for data extraction", "main": "build/index.js", "typings": "build/index.d.ts", diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 0c5d74ac..f6b53da2 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -188,69 +188,201 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, * @param {Object.} lists The named lists of HTML elements. * @returns {Array.>} */ - window.scrapeSchema = function (lists) { + window.scrapeSchema = function(lists) { + // Utility functions remain the same function omap(object, f, kf = (x) => x) { return Object.fromEntries( Object.entries(object) - .map(([k, v]) => [kf(k), f(v)]), + .map(([k, v]) => [kf(k), f(v)]), ); } function ofilter(object, f) { return Object.fromEntries( Object.entries(object) - .filter(([k, v]) => f(k, v)), + .filter(([k, v]) => f(k, v)), ); } - function getSeedKey(listObj) { - const maxLength = Math.max(...Object.values(omap(listObj, (x) => document.querySelectorAll(x.selector).length))); - return Object.keys(ofilter(listObj, (_, v) => document.querySelectorAll(v.selector).length === maxLength))[0]; + function findAllElements(config) { + // Regular DOM query if no special delimiters + if (!config.selector.includes('>>') && !config.selector.includes(':>>')) { + return Array.from(document.querySelectorAll(config.selector)); + } + + // First handle iframe traversal if present + if (config.selector.includes(':>>')) { + const parts = config.selector.split(':>>').map(s => s.trim()); + let currentElements = [document]; + + // Traverse through each part of the selector + for (let i = 0; i < parts.length; i++) { + const part = parts[i]; + const nextElements = []; + const isLast = i === parts.length - 1; + + for (const element of currentElements) { + try { + // For document or iframe document + const doc = element.contentDocument || element || element.contentWindow?.document; + if (!doc) continue; + + // Query elements in current context + const found = Array.from(doc.querySelectorAll(part)); + + if (isLast) { + // If it's the last part, keep all matching elements + nextElements.push(...found); + } else { + // If not last, only keep iframes for next iteration + const iframes = found.filter(el => el.tagName === 'IFRAME'); + nextElements.push(...iframes); + } + } catch (error) { + console.warn('Cannot access iframe content:', error, { + part, + element, + index: i + }); + } + } + + if (nextElements.length === 0) { + console.warn('No elements found for part:', part, 'at depth:', i); + return []; + } + currentElements = nextElements; + } + + return currentElements; + } + + // Handle shadow DOM traversal + if (config.selector.includes('>>')) { + const parts = config.selector.split('>>').map(s => s.trim()); + let currentElements = [document]; + + for (const part of parts) { + const nextElements = []; + for (const element of currentElements) { + // Try regular DOM first + const found = Array.from(element.querySelectorAll(part)); + + // Then check shadow roots + for (const foundEl of found) { + if (foundEl.shadowRoot) { + nextElements.push(foundEl.shadowRoot); + } else { + nextElements.push(foundEl); + } + } + } + currentElements = nextElements; + } + return currentElements.filter(el => !(el instanceof ShadowRoot)); + } + + return []; } + // Modified to handle iframe context for URL resolution + function getElementValue(element, attribute) { + if (!element) return null; + + // Get the base URL for resolving relative URLs + const baseURL = element.ownerDocument?.location?.href || window.location.origin; + + switch (attribute) { + case 'href': { + const relativeHref = element.getAttribute('href'); + return relativeHref ? new URL(relativeHref, baseURL).href : null; + } + case 'src': { + const relativeSrc = element.getAttribute('src'); + return relativeSrc ? new URL(relativeSrc, baseURL).href : null; + } + case 'innerText': + return element.innerText?.trim(); + case 'textContent': + return element.textContent?.trim(); + default: + return element.getAttribute(attribute) || element.innerText?.trim(); + } + } + + // Rest of the functions remain largely the same + function getSeedKey(listObj) { + const maxLength = Math.max(...Object.values( + omap(listObj, (x) => findAllElements(x).length) + )); + return Object.keys( + ofilter(listObj, (_, v) => findAllElements(v).length === maxLength) + )[0]; + } + + // Find minimal bounding elements function getMBEs(elements) { return elements.map((element) => { let candidate = element; const isUniqueChild = (e) => elements - .filter((elem) => e.parentNode?.contains(elem)) + .filter((elem) => { + // Handle both iframe and shadow DOM boundaries + const sameContext = elem.getRootNode() === e.getRootNode() && + elem.ownerDocument === e.ownerDocument; + return sameContext && e.parentNode?.contains(elem); + }) .length === 1; - + while (candidate && isUniqueChild(candidate)) { candidate = candidate.parentNode; } - + return candidate; }); } const seedName = getSeedKey(lists); - const seedElements = Array.from(document.querySelectorAll(lists[seedName].selector)); + const seedElements = findAllElements(lists[seedName]); const MBEs = getMBEs(seedElements); - - return MBEs.map((mbe) => omap( - lists, - ({ selector, attribute }, key) => { - const elem = Array.from(document.querySelectorAll(selector)).find((elem) => mbe.contains(elem)); - if (!elem) return undefined; - - switch (attribute) { - case 'href': - const relativeHref = elem.getAttribute('href'); - return relativeHref ? new URL(relativeHref, window.location.origin).href : null; - case 'src': - const relativeSrc = elem.getAttribute('src'); - return relativeSrc ? new URL(relativeSrc, window.location.origin).href : null; - case 'innerText': - return elem.innerText; - case 'textContent': - return elem.textContent; - default: - return elem.innerText; - } - }, - (key) => key // Use the original key in the output + + const mbeResults = MBEs.map((mbe) => omap( + lists, + (config) => { + const elem = findAllElements(config) + .find((elem) => mbe.contains(elem)); + + return elem ? getElementValue(elem, config.attribute) : undefined; + }, + (key) => key )) || []; - } + + // If MBE approach didn't find all elements, try independent scraping + if (mbeResults.some(result => Object.values(result).some(v => v === undefined))) { + // Fall back to independent scraping + const results = []; + const foundElements = new Map(); + + // Find all elements for each selector + Object.entries(lists).forEach(([key, config]) => { + const elements = findAllElements(config); + foundElements.set(key, elements); + }); + + // Create result objects for each found element + foundElements.forEach((elements, key) => { + elements.forEach((element, index) => { + if (!results[index]) { + results[index] = {}; + } + results[index][key] = getElementValue(element, lists[key].attribute); + }); + }); + + return results.filter(result => Object.keys(result).length > 0); + } + + return mbeResults; + }; /** * Scrapes multiple lists of similar items based on a template item. @@ -262,108 +394,275 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, * @returns {Array.>} Array of arrays of scraped items, one sub-array per list */ window.scrapeList = async function ({ listSelector, fields, limit = 10 }) { - // Helper function to extract values from elements + // Enhanced query function to handle both iframe and shadow DOM + const queryElement = (rootElement, selector) => { + if (!selector.includes('>>') && !selector.includes(':>>')) { + return rootElement.querySelector(selector); + } + + const parts = selector.split(/(?:>>|:>>)/).map(part => part.trim()); + let currentElement = rootElement; + + for (let i = 0; i < parts.length; i++) { + if (!currentElement) return null; + + // Handle iframe traversal + if (currentElement.tagName === 'IFRAME') { + try { + const iframeDoc = currentElement.contentDocument || currentElement.contentWindow.document; + currentElement = iframeDoc.querySelector(parts[i]); + continue; + } catch (e) { + console.warn('Cannot access iframe content:', e); + return null; + } + } + + // Try regular DOM first + let nextElement = currentElement.querySelector(parts[i]); + + // Try shadow DOM if not found + if (!nextElement && currentElement.shadowRoot) { + nextElement = currentElement.shadowRoot.querySelector(parts[i]); + } + + // Check children's shadow roots if still not found + if (!nextElement) { + const children = Array.from(currentElement.children || []); + for (const child of children) { + if (child.shadowRoot) { + nextElement = child.shadowRoot.querySelector(parts[i]); + if (nextElement) break; + } + } + } + + currentElement = nextElement; + } + + return currentElement; + }; + + // Enhanced query all function for both contexts + const queryElementAll = (rootElement, selector) => { + if (!selector.includes('>>') && !selector.includes(':>>')) { + return rootElement.querySelectorAll(selector); + } + + const parts = selector.split(/(?:>>|:>>)/).map(part => part.trim()); + let currentElements = [rootElement]; + + for (const part of parts) { + const nextElements = []; + + for (const element of currentElements) { + // Handle iframe traversal + if (element.tagName === 'IFRAME') { + try { + const iframeDoc = element.contentDocument || element.contentWindow.document; + nextElements.push(...iframeDoc.querySelectorAll(part)); + } catch (e) { + console.warn('Cannot access iframe content:', e); + continue; + } + } else { + // Regular DOM elements + if (element.querySelectorAll) { + nextElements.push(...element.querySelectorAll(part)); + } + + // Shadow DOM elements + if (element.shadowRoot) { + nextElements.push(...element.shadowRoot.querySelectorAll(part)); + } + + // Check children's shadow roots + const children = Array.from(element.children || []); + for (const child of children) { + if (child.shadowRoot) { + nextElements.push(...child.shadowRoot.querySelectorAll(part)); + } + } + } + } + + currentElements = nextElements; + } + + return currentElements; + }; + + // Enhanced value extraction with context awareness function extractValue(element, attribute) { - if (!element) return null; - - if (attribute === 'innerText') { - return element.innerText.trim(); - } else if (attribute === 'innerHTML') { - return element.innerHTML.trim(); - } else if (attribute === 'src' || attribute === 'href') { - const attrValue = element.getAttribute(attribute); - return attrValue ? new URL(attrValue, window.location.origin).href : null; - } - return element.getAttribute(attribute); + if (!element) return null; + + // Get context-aware base URL + const baseURL = element.ownerDocument?.location?.href || window.location.origin; + + // Check shadow root first + if (element.shadowRoot) { + const shadowContent = element.shadowRoot.textContent; + if (shadowContent?.trim()) { + return shadowContent.trim(); + } + } + + if (attribute === 'innerText') { + return element.innerText.trim(); + } else if (attribute === 'innerHTML') { + return element.innerHTML.trim(); + } else if (attribute === 'src' || attribute === 'href') { + const attrValue = element.getAttribute(attribute); + return attrValue ? new URL(attrValue, baseURL).href : null; + } + return element.getAttribute(attribute); } - // Helper function to find table ancestors + // Enhanced table ancestor finding with context support function findTableAncestor(element) { - let currentElement = element; - const MAX_DEPTH = 5; - let depth = 0; - - while (currentElement && depth < MAX_DEPTH) { - if (currentElement.tagName === 'TD') { - return { type: 'TD', element: currentElement }; - } else if (currentElement.tagName === 'TR') { - return { type: 'TR', element: currentElement }; - } - currentElement = currentElement.parentElement; - depth++; - } - return null; + let currentElement = element; + const MAX_DEPTH = 5; + let depth = 0; + + while (currentElement && depth < MAX_DEPTH) { + // Handle shadow DOM + if (currentElement.getRootNode() instanceof ShadowRoot) { + currentElement = currentElement.getRootNode().host; + continue; + } + + if (currentElement.tagName === 'TD') { + return { type: 'TD', element: currentElement }; + } else if (currentElement.tagName === 'TR') { + return { type: 'TR', element: currentElement }; + } + + // Handle iframe crossing + if (currentElement.tagName === 'IFRAME') { + try { + currentElement = currentElement.contentDocument.body; + } catch (e) { + return null; + } + } else { + currentElement = currentElement.parentElement; + } + depth++; + } + return null; } + // Helper function to get cell index function getCellIndex(td) { - let index = 0; - let sibling = td; - while (sibling = sibling.previousElementSibling) { - index++; - } - return index; + if (td.getRootNode() instanceof ShadowRoot) { + const shadowRoot = td.getRootNode(); + const allCells = Array.from(shadowRoot.querySelectorAll('td')); + return allCells.indexOf(td); + } + + let index = 0; + let sibling = td; + while (sibling = sibling.previousElementSibling) { + index++; + } + return index; } + // Helper function to check for TH elements function hasThElement(row, tableFields) { - for (const [label, { selector }] of Object.entries(tableFields)) { - const element = row.querySelector(selector); - if (element) { - let current = element; - while (current && current !== row) { - if (current.tagName === 'TH') { - return true; - } - current = current.parentElement; - } - } - } - return false; + for (const [_, { selector }] of Object.entries(tableFields)) { + const element = queryElement(row, selector); + if (element) { + let current = element; + while (current && current !== row) { + if (current.getRootNode() instanceof ShadowRoot) { + current = current.getRootNode().host; + continue; + } + + if (current.tagName === 'TH') return true; + + if (current.tagName === 'IFRAME') { + try { + current = current.contentDocument.body; + } catch (e) { + break; + } + } else { + current = current.parentElement; + } + } + } + } + return false; } + // Helper function to filter rows function filterRowsBasedOnTag(rows, tableFields) { for (const row of rows) { if (hasThElement(row, tableFields)) { return rows; } } - return rows.filter(row => row.getElementsByTagName('TH').length === 0); + // Include shadow DOM in TH search + return rows.filter(row => { + const directTH = row.getElementsByTagName('TH').length === 0; + const shadowTH = row.shadowRoot ? + row.shadowRoot.querySelector('th') === null : true; + return directTH && shadowTH; + }); } + // Class similarity comparison functions function calculateClassSimilarity(classList1, classList2) { - const set1 = new Set(classList1); - const set2 = new Set(classList2); - - // Calculate intersection - const intersection = new Set([...set1].filter(x => set2.has(x))); - - // Calculate union - const union = new Set([...set1, ...set2]); - - // Return Jaccard similarity coefficient - return intersection.size / union.size; - } + const set1 = new Set(classList1); + const set2 = new Set(classList2); + const intersection = new Set([...set1].filter(x => set2.has(x))); + const union = new Set([...set1, ...set2]); + return intersection.size / union.size; + } - // New helper function to find elements with similar classes + // Enhanced similar elements finding with context support function findSimilarElements(baseElement, similarityThreshold = 0.7) { const baseClasses = Array.from(baseElement.classList); - if (baseClasses.length === 0) return []; + + const allElements = []; - const potentialElements = document.getElementsByTagName(baseElement.tagName); + // Get elements from main document + allElements.push(...document.getElementsByTagName(baseElement.tagName)); - return Array.from(potentialElements).filter(element => { - if (element === baseElement) return false; - - const similarity = calculateClassSimilarity( - baseClasses, - Array.from(element.classList) - ); - - return similarity >= similarityThreshold; + // Get elements from shadow DOM + if (baseElement.getRootNode() instanceof ShadowRoot) { + const shadowHost = baseElement.getRootNode().host; + allElements.push(...shadowHost.getElementsByTagName(baseElement.tagName)); + } + + // Get elements from iframes + const iframes = document.getElementsByTagName('iframe'); + for (const iframe of iframes) { + try { + const iframeDoc = iframe.contentDocument || iframe.contentWindow.document; + allElements.push(...iframeDoc.getElementsByTagName(baseElement.tagName)); + } catch (e) { + console.warn('Cannot access iframe content:', e); + } + } + + return allElements.filter(element => { + if (element === baseElement) return false; + const similarity = calculateClassSimilarity( + baseClasses, + Array.from(element.classList) + ); + return similarity >= similarityThreshold; }); } - let containers = Array.from(document.querySelectorAll(listSelector)); + // Main scraping logic with context support + let containers = queryElementAll(document, listSelector); + containers = Array.from(containers); + if (containers.length === 0) return []; if (limit > 1 && containers.length === 1) { @@ -374,115 +673,157 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, const newContainers = similarContainers.filter(container => !container.matches(listSelector) ); - containers = [...containers, ...newContainers]; } } - // Initialize arrays to store field classifications for each container const containerFields = containers.map(() => ({ - tableFields: {}, - nonTableFields: {} + tableFields: {}, + nonTableFields: {} })); - // Analyze field types for each container + // Classify fields containers.forEach((container, containerIndex) => { - for (const [label, field] of Object.entries(fields)) { - const sampleElement = container.querySelector(field.selector); - - if (sampleElement) { - const ancestor = findTableAncestor(sampleElement); - if (ancestor) { - containerFields[containerIndex].tableFields[label] = { - ...field, - tableContext: ancestor.type, - cellIndex: ancestor.type === 'TD' ? getCellIndex(ancestor.element) : -1 - }; - } else { - containerFields[containerIndex].nonTableFields[label] = field; - } + for (const [label, field] of Object.entries(fields)) { + const sampleElement = queryElement(container, field.selector); + + if (sampleElement) { + const ancestor = findTableAncestor(sampleElement); + if (ancestor) { + containerFields[containerIndex].tableFields[label] = { + ...field, + tableContext: ancestor.type, + cellIndex: ancestor.type === 'TD' ? getCellIndex(ancestor.element) : -1 + }; } else { containerFields[containerIndex].nonTableFields[label] = field; } + } else { + containerFields[containerIndex].nonTableFields[label] = field; } + } }); const tableData = []; const nonTableData = []; - - // Process table fields across all containers + + // Process table data with both iframe and shadow DOM support for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) { const container = containers[containerIndex]; const { tableFields } = containerFields[containerIndex]; if (Object.keys(tableFields).length > 0) { - const firstField = Object.values(tableFields)[0]; - const firstElement = container.querySelector(firstField.selector); - let tableContext = firstElement; - - while (tableContext && tableContext.tagName !== 'TABLE' && tableContext !== container) { - tableContext = tableContext.parentElement; - } - - if (tableContext) { - const rows = Array.from(tableContext.getElementsByTagName('TR')); - const processedRows = filterRowsBasedOnTag(rows, tableFields); - - for (let rowIndex = 0; rowIndex < Math.min(processedRows.length, limit); rowIndex++) { - const record = {}; - const currentRow = processedRows[rowIndex]; - - for (const [label, { selector, attribute, cellIndex }] of Object.entries(tableFields)) { - let element = null; - - if (cellIndex >= 0) { - const td = currentRow.children[cellIndex]; - if (td) { - element = td.querySelector(selector); - - if (!element && selector.split(">").pop().includes('td:nth-child')) { - element = td; + const firstField = Object.values(tableFields)[0]; + const firstElement = queryElement(container, firstField.selector); + let tableContext = firstElement; + + // Find table context including both iframe and shadow DOM + while (tableContext && tableContext.tagName !== 'TABLE' && tableContext !== container) { + if (tableContext.getRootNode() instanceof ShadowRoot) { + tableContext = tableContext.getRootNode().host; + continue; + } + + if (tableContext.tagName === 'IFRAME') { + try { + tableContext = tableContext.contentDocument.body; + } catch (e) { + break; } + } else { + tableContext = tableContext.parentElement; + } + } - if (!element) { - const tagOnlySelector = selector.split('.')[0]; - element = td.querySelector(tagOnlySelector); + if (tableContext) { + // Get rows from all contexts + const rows = []; + + // Get rows from regular DOM + rows.push(...tableContext.getElementsByTagName('TR')); + + // Get rows from shadow DOM + if (tableContext.shadowRoot) { + rows.push(...tableContext.shadowRoot.getElementsByTagName('TR')); + } + + // Get rows from iframes + if (tableContext.tagName === 'IFRAME') { + try { + const iframeDoc = tableContext.contentDocument || tableContext.contentWindow.document; + rows.push(...iframeDoc.getElementsByTagName('TR')); + } catch (e) { + console.warn('Cannot access iframe rows:', e); } + } + + const processedRows = filterRowsBasedOnTag(rows, tableFields); + + for (let rowIndex = 0; rowIndex < Math.min(processedRows.length, limit); rowIndex++) { + const record = {}; + const currentRow = processedRows[rowIndex]; - if (!element) { - let currentElement = td; - while (currentElement && currentElement.children.length > 0) { - let foundContentChild = false; - for (const child of currentElement.children) { - if (extractValue(child, attribute)) { - currentElement = child; - foundContentChild = true; - break; + for (const [label, { selector, attribute, cellIndex }] of Object.entries(tableFields)) { + let element = null; + + if (cellIndex >= 0) { + // Get TD element considering both contexts + let td = currentRow.children[cellIndex]; + + // Check shadow DOM for td + if (!td && currentRow.shadowRoot) { + const shadowCells = currentRow.shadowRoot.children; + if (shadowCells && shadowCells.length > cellIndex) { + td = shadowCells[cellIndex]; } } - if (!foundContentChild) break; - } - element = currentElement; - } - } - } else { - element = currentRow.querySelector(selector); - } - - if (element) { - record[label] = extractValue(element, attribute); - } - } + + if (td) { + element = queryElement(td, selector); + + if (!element && selector.split(/(?:>>|:>>)/).pop().includes('td:nth-child')) { + element = td; + } - if (Object.keys(record).length > 0) { - tableData.push(record); - } + if (!element) { + const tagOnlySelector = selector.split('.')[0]; + element = queryElement(td, tagOnlySelector); + } + + if (!element) { + let currentElement = td; + while (currentElement && currentElement.children.length > 0) { + let foundContentChild = false; + for (const child of currentElement.children) { + if (extractValue(child, attribute)) { + currentElement = child; + foundContentChild = true; + break; + } + } + if (!foundContentChild) break; + } + element = currentElement; + } + } + } else { + element = queryElement(currentRow, selector); + } + + if (element) { + record[label] = extractValue(element, attribute); + } + } + + if (Object.keys(record).length > 0) { + tableData.push(record); + } + } } - } } } - - // Process non-table fields across all containers + + // Process non-table data with both contexts support for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) { if (nonTableData.length >= limit) break; @@ -490,26 +831,28 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, const { nonTableFields } = containerFields[containerIndex]; if (Object.keys(nonTableFields).length > 0) { - const record = {}; + const record = {}; - for (const [label, { selector, attribute }] of Object.entries(nonTableFields)) { - const element = container.querySelector(selector); - - if (element) { - record[label] = extractValue(element, attribute); - } - } + for (const [label, { selector, attribute }] of Object.entries(nonTableFields)) { + // Get the last part of the selector after any context delimiter + const relativeSelector = selector.split(/(?:>>|:>>)/).slice(-1)[0]; + const element = queryElement(container, relativeSelector); - if (Object.keys(record).length > 0) { - nonTableData.push(record); - } - } + if (element) { + record[label] = extractValue(element, attribute); + } + } + + if (Object.keys(record).length > 0) { + nonTableData.push(record); + } + } } // Merge and limit the results const scrapedData = [...tableData, ...nonTableData]; return scrapedData; -}; + }; /** * Gets all children of the elements matching the listSelector, diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index c581954d..e09ac5d5 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -403,7 +403,7 @@ export default class Interpreter extends EventEmitter { await this.options.serializableCallback(scrapeResults); }, - scrapeSchema: async (schema: Record) => { + scrapeSchema: async (schema: Record) => { await this.ensureScriptsLoaded(page); const scrapeResult = await page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema); @@ -663,11 +663,42 @@ export default class Interpreter extends EventEmitter { if (isApplicable) { return actionId; } + } } + + private removeShadowSelectors(workflow: Workflow) { + for (let actionId = workflow.length - 1; actionId >= 0; actionId--) { + const step = workflow[actionId]; + + // Check if step has where and selectors + if (step.where && Array.isArray(step.where.selectors)) { + // Filter out selectors that contain ">>" + step.where.selectors = step.where.selectors.filter(selector => !selector.includes('>>')); + } + } + + return workflow; + } + + private removeSpecialSelectors(workflow: Workflow) { + for (let actionId = workflow.length - 1; actionId >= 0; actionId--) { + const step = workflow[actionId]; + + if (step.where && Array.isArray(step.where.selectors)) { + // Filter out if selector has EITHER ":>>" OR ">>" + step.where.selectors = step.where.selectors.filter(selector => + !(selector.includes(':>>') || selector.includes('>>')) + ); + } + } + + return workflow; } private async runLoop(p: Page, workflow: Workflow) { - const workflowCopy: Workflow = JSON.parse(JSON.stringify(workflow)); + let workflowCopy: Workflow = JSON.parse(JSON.stringify(workflow)); + + workflowCopy = this.removeSpecialSelectors(workflowCopy); // apply ad-blocker to the current page try { diff --git a/package.json b/package.json index e89f13de..36062666 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "maxun", - "version": "0.0.5", + "version": "0.0.6", "author": "Maxun", "license": "AGPL-3.0-or-later", "dependencies": { @@ -44,9 +44,10 @@ "joi": "^17.6.0", "jsonwebtoken": "^9.0.2", "jwt-decode": "^4.0.0", + "lodash": "^4.17.21", "loglevel": "^1.8.0", "loglevel-plugin-remote": "^0.6.8", - "maxun-core": "^0.0.7", + "maxun-core": "^0.0.8", "minio": "^8.0.1", "moment-timezone": "^0.5.45", "node-cron": "^3.0.3", @@ -66,6 +67,7 @@ "react-transition-group": "^4.4.2", "sequelize": "^6.37.3", "sequelize-typescript": "^2.1.6", + "sharp": "^0.33.5", "socket.io": "^4.4.1", "socket.io-client": "^4.4.1", "styled-components": "^5.3.3", @@ -97,6 +99,7 @@ "@types/cookie-parser": "^1.4.7", "@types/express": "^4.17.13", "@types/js-cookie": "^3.0.6", + "@types/lodash": "^4.17.14", "@types/loglevel": "^1.6.3", "@types/node": "22.7.9", "@types/node-cron": "^3.0.11", diff --git a/perf/performance.ts b/perf/performance.ts new file mode 100644 index 00000000..c50ef850 --- /dev/null +++ b/perf/performance.ts @@ -0,0 +1,181 @@ +// Frontend Performance Monitoring +export class FrontendPerformanceMonitor { + private metrics: { + fps: number[]; + memoryUsage: MemoryInfo[]; + renderTime: number[]; + eventLatency: number[]; + }; + private lastFrameTime: number; + private frameCount: number; + + constructor() { + this.metrics = { + fps: [], + memoryUsage: [], + renderTime: [], + eventLatency: [], + }; + this.lastFrameTime = performance.now(); + this.frameCount = 0; + + // Start monitoring + this.startMonitoring(); + } + + private startMonitoring(): void { + // Monitor FPS + const measureFPS = () => { + const currentTime = performance.now(); + const elapsed = currentTime - this.lastFrameTime; + this.frameCount++; + + if (elapsed >= 1000) { // Calculate FPS every second + const fps = Math.round((this.frameCount * 1000) / elapsed); + this.metrics.fps.push(fps); + this.frameCount = 0; + this.lastFrameTime = currentTime; + } + requestAnimationFrame(measureFPS); + }; + requestAnimationFrame(measureFPS); + + // Monitor Memory Usage + if (window.performance && (performance as any).memory) { + setInterval(() => { + const memory = (performance as any).memory; + this.metrics.memoryUsage.push({ + usedJSHeapSize: memory.usedJSHeapSize, + totalJSHeapSize: memory.totalJSHeapSize, + timestamp: Date.now() + }); + }, 1000); + } + } + + // Monitor Canvas Render Time + public measureRenderTime(renderFunction: () => void): void { + const startTime = performance.now(); + renderFunction(); + const endTime = performance.now(); + this.metrics.renderTime.push(endTime - startTime); + } + + // Monitor Event Latency + public measureEventLatency(event: MouseEvent | KeyboardEvent): void { + const latency = performance.now() - event.timeStamp; + this.metrics.eventLatency.push(latency); + } + + // Get Performance Report + public getPerformanceReport(): PerformanceReport { + return { + averageFPS: this.calculateAverage(this.metrics.fps), + averageRenderTime: this.calculateAverage(this.metrics.renderTime), + averageEventLatency: this.calculateAverage(this.metrics.eventLatency), + memoryTrend: this.getMemoryTrend(), + lastMemoryUsage: this.metrics.memoryUsage[this.metrics.memoryUsage.length - 1] + }; + } + + private calculateAverage(array: number[]): number { + return array.length ? array.reduce((a, b) => a + b) / array.length : 0; + } + + private getMemoryTrend(): MemoryTrend { + if (this.metrics.memoryUsage.length < 2) return 'stable'; + const latest = this.metrics.memoryUsage[this.metrics.memoryUsage.length - 1]; + const previous = this.metrics.memoryUsage[this.metrics.memoryUsage.length - 2]; + const change = latest.usedJSHeapSize - previous.usedJSHeapSize; + if (change > 1000000) return 'increasing'; // 1MB threshold + if (change < -1000000) return 'decreasing'; + return 'stable'; + } +} + +// Backend Performance Monitoring +export class BackendPerformanceMonitor { + private metrics: { + screenshotTimes: number[]; + emitTimes: number[]; + memoryUsage: NodeJS.MemoryUsage[]; + }; + + constructor() { + this.metrics = { + screenshotTimes: [], + emitTimes: [], + memoryUsage: [] + }; + this.startMonitoring(); + } + + private startMonitoring(): void { + // Monitor Memory Usage + setInterval(() => { + this.metrics.memoryUsage.push(process.memoryUsage()); + }, 1000); + } + + public async measureScreenshotPerformance( + makeScreenshot: () => Promise + ): Promise { + const startTime = process.hrtime(); + await makeScreenshot(); + const [seconds, nanoseconds] = process.hrtime(startTime); + this.metrics.screenshotTimes.push(seconds * 1000 + nanoseconds / 1000000); + } + + public measureEmitPerformance(emitFunction: () => void): void { + const startTime = process.hrtime(); + emitFunction(); + const [seconds, nanoseconds] = process.hrtime(startTime); + this.metrics.emitTimes.push(seconds * 1000 + nanoseconds / 1000000); + } + + public getPerformanceReport(): BackendPerformanceReport { + return { + averageScreenshotTime: this.calculateAverage(this.metrics.screenshotTimes), + averageEmitTime: this.calculateAverage(this.metrics.emitTimes), + currentMemoryUsage: this.metrics.memoryUsage[this.metrics.memoryUsage.length - 1], + memoryTrend: this.getMemoryTrend() + }; + } + + private calculateAverage(array: number[]): number { + return array.length ? array.reduce((a, b) => a + b) / array.length : 0; + } + + private getMemoryTrend(): MemoryTrend { + if (this.metrics.memoryUsage.length < 2) return 'stable'; + const latest = this.metrics.memoryUsage[this.metrics.memoryUsage.length - 1]; + const previous = this.metrics.memoryUsage[this.metrics.memoryUsage.length - 2]; + const change = latest.heapUsed - previous.heapUsed; + if (change > 1000000) return 'increasing'; + if (change < -1000000) return 'decreasing'; + return 'stable'; + } +} + +interface MemoryInfo { + usedJSHeapSize: number; + totalJSHeapSize: number; + timestamp: number; +} + +type MemoryTrend = 'increasing' | 'decreasing' | 'stable'; + +interface PerformanceReport { + averageFPS: number; + averageRenderTime: number; + averageEventLatency: number; + memoryTrend: MemoryTrend; + lastMemoryUsage: MemoryInfo; +} + +interface BackendPerformanceReport { + averageScreenshotTime: number; + averageEmitTime: number; + currentMemoryUsage: NodeJS.MemoryUsage; + memoryTrend: MemoryTrend; +} \ No newline at end of file diff --git a/public/locales/de.json b/public/locales/de.json index 072d9606..2453d482 100644 --- a/public/locales/de.json +++ b/public/locales/de.json @@ -158,11 +158,13 @@ "confirm": "Bestätigen", "discard": "Verwerfen", "confirm_capture": "Erfassung bestätigen", - "confirm_pagination": "Paginierung bestätigen", - "confirm_limit": "Limit bestätigen", + "confirm_pagination": "Bestätigen", + "confirm_limit": "Bestätigen", "finish_capture": "Erfassung abschließen", + "back": "Zurück", "finish": "Fertig", - "cancel": "Abbrechen" + "cancel": "Abbrechen", + "delete": "Löschen" }, "screenshot": { "capture_fullpage": "Vollständige Seite erfassen", diff --git a/public/locales/en.json b/public/locales/en.json index 4eceee3d..3d4c2b54 100644 --- a/public/locales/en.json +++ b/public/locales/en.json @@ -159,11 +159,13 @@ "confirm": "Confirm", "discard": "Discard", "confirm_capture": "Confirm Capture", - "confirm_pagination": "Confirm Pagination", - "confirm_limit": "Confirm Limit", + "confirm_pagination": "Confirm", + "confirm_limit": "Confirm", "finish_capture": "Finish Capture", + "back": "Back", "finish": "Finish", - "cancel": "Cancel" + "cancel": "Cancel", + "delete": "Delete" }, "screenshot": { "capture_fullpage": "Capture Fullpage", diff --git a/public/locales/es.json b/public/locales/es.json index 5f79c7cf..a971a8a8 100644 --- a/public/locales/es.json +++ b/public/locales/es.json @@ -159,11 +159,13 @@ "confirm": "Confirmar", "discard": "Descartar", "confirm_capture": "Confirmar Captura", - "confirm_pagination": "Confirmar Paginación", - "confirm_limit": "Confirmar Límite", + "confirm_pagination": "Confirmar", + "confirm_limit": "Confirmar", "finish_capture": "Finalizar Captura", + "back": "Atrás", "finish": "Finalizar", - "cancel": "Cancelar" + "cancel": "Cancelar", + "delete": "Eliminar" }, "screenshot": { "capture_fullpage": "Capturar Página Completa", diff --git a/public/locales/ja.json b/public/locales/ja.json index f664e03e..1d79de2b 100644 --- a/public/locales/ja.json +++ b/public/locales/ja.json @@ -159,11 +159,13 @@ "confirm": "確認", "discard": "破棄", "confirm_capture": "取得を確認", - "confirm_pagination": "ページネーションを確認", - "confirm_limit": "制限を確認", + "confirm_pagination": "確認", + "confirm_limit": "確認", "finish_capture": "取得を完了", + "back": "戻る", "finish": "完了", - "cancel": "キャンセル" + "cancel": "キャンセル", + "delete": "削除" }, "screenshot": { "capture_fullpage": "フルページを取得", diff --git a/public/locales/zh.json b/public/locales/zh.json index ead470d2..72596475 100644 --- a/public/locales/zh.json +++ b/public/locales/zh.json @@ -159,11 +159,13 @@ "confirm": "确认", "discard": "放弃", "confirm_capture": "确认捕获", - "confirm_pagination": "确认分页", - "confirm_limit": "确认限制", + "confirm_pagination": "确认", + "confirm_limit": "确认", "finish_capture": "完成捕获", + "back": "返回", "finish": "完成", - "cancel": "取消" + "cancel": "取消", + "delete": "删除" }, "screenshot": { "capture_fullpage": "捕获整页", diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index 2c45d146..8ff4f601 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -9,6 +9,8 @@ import { chromium } from 'playwright-extra'; import stealthPlugin from 'puppeteer-extra-plugin-stealth'; import { PlaywrightBlocker } from '@cliqz/adblocker-playwright'; import fetch from 'cross-fetch'; +import { throttle } from 'lodash'; +import sharp from 'sharp'; import logger from '../../logger'; import { InterpreterSettings, RemoteBrowserOptions } from "../../types"; @@ -16,8 +18,30 @@ import { WorkflowGenerator } from "../../workflow-management/classes/Generator"; import { WorkflowInterpreter } from "../../workflow-management/classes/Interpreter"; import { getDecryptedProxyConfig } from '../../routes/proxy'; import { getInjectableScript } from 'idcac-playwright'; + chromium.use(stealthPlugin()); +const MEMORY_CONFIG = { + gcInterval: 60000, // 1 minute + maxHeapSize: 2048 * 1024 * 1024, // 2GB + heapUsageThreshold: 0.85 // 85% +}; + +const SCREENCAST_CONFIG: { + format: "jpeg" | "png"; + maxWidth: number; + maxHeight: number; + targetFPS: number; + compressionQuality: number; + maxQueueSize: number; +} = { + format: 'jpeg', + maxWidth: 900, + maxHeight: 400, + targetFPS: 30, + compressionQuality: 0.8, + maxQueueSize: 2 +}; /** * This class represents a remote browser instance. @@ -78,6 +102,11 @@ export class RemoteBrowser { */ public interpreter: WorkflowInterpreter; + + private screenshotQueue: Buffer[] = []; + private isProcessingScreenshot = false; + private screencastInterval: NodeJS.Timeout | null = null + /** * Initializes a new instances of the {@link Generator} and {@link WorkflowInterpreter} classes and * assigns the socket instance everywhere. @@ -90,6 +119,46 @@ export class RemoteBrowser { this.generator = new WorkflowGenerator(socket); } + private initializeMemoryManagement(): void { + setInterval(() => { + const memoryUsage = process.memoryUsage(); + const heapUsageRatio = memoryUsage.heapUsed / MEMORY_CONFIG.maxHeapSize; + + if (heapUsageRatio > MEMORY_CONFIG.heapUsageThreshold) { + logger.warn('High memory usage detected, triggering cleanup'); + this.performMemoryCleanup(); + } + + // Clear screenshot queue if it's too large + if (this.screenshotQueue.length > SCREENCAST_CONFIG.maxQueueSize) { + this.screenshotQueue = this.screenshotQueue.slice(-SCREENCAST_CONFIG.maxQueueSize); + } + }, MEMORY_CONFIG.gcInterval); + } + + private async performMemoryCleanup(): Promise { + this.screenshotQueue = []; + this.isProcessingScreenshot = false; + + if (global.gc) { + global.gc(); + } + + // Reset CDP session if needed + if (this.client) { + try { + await this.stopScreencast(); + this.client = null; + if (this.currentPage) { + this.client = await this.currentPage.context().newCDPSession(this.currentPage); + await this.startScreencast(); + } + } catch (error) { + logger.error('Error resetting CDP session:', error); + } + } + } + /** * Normalizes URLs to prevent navigation loops while maintaining consistent format */ @@ -157,7 +226,7 @@ export class RemoteBrowser { 'Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.5938.62 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:118.0) Gecko/20100101 Firefox/118.0', ]; - + return userAgents[Math.floor(Math.random() * userAgents.length)]; } @@ -178,7 +247,7 @@ export class RemoteBrowser { "--disable-extensions", "--no-sandbox", "--disable-dev-shm-usage", - ], + ], })); const proxyConfig = await getDecryptedProxyConfig(userId); let proxyOptions: { server: string, username?: string, password?: string } = { server: '' }; @@ -251,11 +320,11 @@ export class RemoteBrowser { this.client = await this.currentPage.context().newCDPSession(this.currentPage); await blocker.disableBlockingInPage(this.currentPage); console.log('Adblocker initialized'); - } catch (error: any) { + } catch (error: any) { console.warn('Failed to initialize adblocker, continuing without it:', error.message); // Still need to set up the CDP session even if blocker fails this.client = await this.currentPage.context().newCDPSession(this.currentPage); - } + } }; /** @@ -319,7 +388,7 @@ export class RemoteBrowser { return; } this.client.on('Page.screencastFrame', ({ data: base64, sessionId }) => { - this.emitScreenshot(base64) + this.emitScreenshot(Buffer.from(base64, 'base64')) setTimeout(async () => { try { if (!this.client) { @@ -339,16 +408,49 @@ export class RemoteBrowser { * If an interpretation was running it will be stopped. * @returns {Promise} */ - public switchOff = async (): Promise => { - await this.interpreter.stopInterpretation(); - if (this.browser) { - await this.stopScreencast(); - await this.browser.close(); - } else { - logger.log('error', 'Browser wasn\'t initialized'); - logger.log('error', 'Switching off the browser failed'); + public async switchOff(): Promise { + try { + await this.interpreter.stopInterpretation(); + + if (this.screencastInterval) { + clearInterval(this.screencastInterval); + } + + if (this.client) { + await this.stopScreencast(); + } + + if (this.browser) { + await this.browser.close(); + } + + this.screenshotQueue = []; + //this.performanceMonitor.reset(); + + } catch (error) { + logger.error('Error during browser shutdown:', error); } - }; + } + + private async optimizeScreenshot(screenshot: Buffer): Promise { + try { + return await sharp(screenshot) + .jpeg({ + quality: Math.round(SCREENCAST_CONFIG.compressionQuality * 100), + progressive: true + }) + .resize({ + width: SCREENCAST_CONFIG.maxWidth, + height: SCREENCAST_CONFIG.maxHeight, + fit: 'inside', + withoutEnlargement: true + }) + .toBuffer(); + } catch (error) { + logger.error('Screenshot optimization failed:', error); + return screenshot; + } + } /** * Makes and emits a single screenshot to the client side. @@ -358,7 +460,7 @@ export class RemoteBrowser { try { const screenshot = await this.currentPage?.screenshot(); if (screenshot) { - this.emitScreenshot(screenshot.toString('base64')); + this.emitScreenshot(screenshot); } } catch (e) { const { message } = e as Error; @@ -490,37 +592,85 @@ export class RemoteBrowser { * Should be called only once after the browser is fully initialized. * @returns {Promise} */ - private startScreencast = async (): Promise => { + private async startScreencast(): Promise { if (!this.client) { - logger.log('warn', 'client is not initialized'); + logger.warn('Client is not initialized'); return; } - await this.client.send('Page.startScreencast', { format: 'jpeg', quality: 75 }); - logger.log('info', `Browser started with screencasting a page.`); - }; - /** - * Unsubscribes the current page from the screencast session. - * @returns {Promise} - */ - private stopScreencast = async (): Promise => { - if (!this.client) { - logger.log('error', 'client is not initialized'); - logger.log('error', 'Screencast stop failed'); - } else { - await this.client.send('Page.stopScreencast'); - logger.log('info', `Browser stopped with screencasting.`); + try { + await this.client.send('Page.startScreencast', { + format: SCREENCAST_CONFIG.format, + }); + + // Set up screencast frame handler + this.client.on('Page.screencastFrame', async ({ data, sessionId }) => { + try { + const buffer = Buffer.from(data, 'base64'); + await this.emitScreenshot(buffer); + await this.client?.send('Page.screencastFrameAck', { sessionId }); + } catch (error) { + logger.error('Screencast frame processing failed:', error); + } + }); + + logger.info('Screencast started successfully'); + } catch (error) { + logger.error('Failed to start screencast:', error); } - }; + } + + private async stopScreencast(): Promise { + if (!this.client) { + logger.error('Client is not initialized'); + return; + } + + try { + await this.client.send('Page.stopScreencast'); + this.screenshotQueue = []; + this.isProcessingScreenshot = false; + logger.info('Screencast stopped successfully'); + } catch (error) { + logger.error('Failed to stop screencast:', error); + } + } + /** * Helper for emitting the screenshot of browser's active page through websocket. * @param payload the screenshot binary data * @returns void */ - private emitScreenshot = (payload: any): void => { - const dataWithMimeType = ('data:image/jpeg;base64,').concat(payload); - this.socket.emit('screencast', dataWithMimeType); - logger.log('debug', `Screenshot emitted`); + private emitScreenshot = async (payload: Buffer): Promise => { + if (this.isProcessingScreenshot) { + if (this.screenshotQueue.length < SCREENCAST_CONFIG.maxQueueSize) { + this.screenshotQueue.push(payload); + } + return; + } + + this.isProcessingScreenshot = true; + + try { + const optimizedScreenshot = await this.optimizeScreenshot(payload); + const base64Data = optimizedScreenshot.toString('base64'); + const dataWithMimeType = `data:image/jpeg;base64,${base64Data}`; + + this.socket.emit('screencast', dataWithMimeType); + logger.debug('Screenshot emitted'); + } catch (error) { + logger.error('Screenshot emission failed:', error); + } finally { + this.isProcessingScreenshot = false; + + if (this.screenshotQueue.length > 0) { + const nextScreenshot = this.screenshotQueue.shift(); + if (nextScreenshot) { + setTimeout(() => this.emitScreenshot(nextScreenshot), 1000 / SCREENCAST_CONFIG.targetFPS); + } + } + } }; + } diff --git a/server/src/types/index.ts b/server/src/types/index.ts index f2e327ef..75aac802 100644 --- a/server/src/types/index.ts +++ b/server/src/types/index.ts @@ -129,6 +129,17 @@ export interface BaseActionInfo { hasOnlyText: boolean; } + +interface IframeSelector { + full: string; + isIframe: boolean; +} + +interface ShadowSelector { + full: string; + mode: string; +} + /** * Holds all the possible css selectors that has been found for an element. * @category Types @@ -143,6 +154,8 @@ export interface Selectors { hrefSelector: string|null; accessibilitySelector: string|null; formSelector: string|null; + iframeSelector: IframeSelector|null; + shadowSelector: ShadowSelector|null; } /** @@ -156,7 +169,7 @@ export interface BaseAction extends BaseActionInfo{ associatedActions: ActionType[]; inputType: string | undefined; value: string | undefined; - selectors: { [key: string]: string | null }; + selectors: Selectors; timestamp: number; isPassword: boolean; /** diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index 609541de..d1bccbe4 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -730,15 +730,26 @@ export class WorkflowGenerator { const displaySelector = await this.generateSelector(page, coordinates, ActionType.Click); const elementInfo = await getElementInformation(page, coordinates, this.listSelector, this.getList); if (rect) { + const highlighterData = { + rect, + selector: displaySelector, + elementInfo, + // Include shadow DOM specific information + shadowInfo: elementInfo?.isShadowRoot ? { + mode: elementInfo.shadowRootMode, + content: elementInfo.shadowRootContent + } : null + }; + if (this.getList === true) { if (this.listSelector !== '') { const childSelectors = await getChildSelectors(page, this.listSelector || ''); - this.socket.emit('highlighter', { rect, selector: displaySelector, elementInfo, childSelectors }) + this.socket.emit('highlighter', { ...highlighterData, childSelectors }) } else { - this.socket.emit('highlighter', { rect, selector: displaySelector, elementInfo }); + this.socket.emit('highlighter', { ...highlighterData }); } } else { - this.socket.emit('highlighter', { rect, selector: displaySelector, elementInfo }); + this.socket.emit('highlighter', { ...highlighterData }); } } } diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index c9390f77..8a9096ec 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -23,10 +23,88 @@ export const getElementInformation = async ( if (!getList || listSelector !== '') { const elementInfo = await page.evaluate( async ({ x, y }) => { - const el = document.elementFromPoint(x, y) as HTMLElement; + const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { + // First, get the element at the clicked coordinates in the main document + let element = document.elementFromPoint(x, y) as HTMLElement; + if (!element) return null; + + // Track the deepest element found + let deepestElement = element; + + // Function to traverse shadow DOM + const traverseShadowDOM = (element: HTMLElement): HTMLElement => { + let current = element; + let shadowRoot = current.shadowRoot; + let deepest = current; + + while (shadowRoot) { + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement || shadowElement === current) break; + + deepest = shadowElement; + current = shadowElement; + shadowRoot = current.shadowRoot; + } + + return deepest; + }; + + // Handle iframe traversal + if (element.tagName === 'IFRAME') { + let currentIframe = element as HTMLIFrameElement; + + while (currentIframe) { + try { + // Convert coordinates to iframe's local space + const iframeRect = currentIframe.getBoundingClientRect(); + const iframeX = x - iframeRect.left; + const iframeY = y - iframeRect.top; + + const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document; + if (!iframeDocument) break; + + const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement; + if (!iframeElement) break; + + // Update deepest element and check for shadow DOM + deepestElement = traverseShadowDOM(iframeElement); + + // Continue traversing if we found another iframe + if (iframeElement.tagName === 'IFRAME') { + currentIframe = iframeElement as HTMLIFrameElement; + } else { + break; + } + } catch (error) { + console.warn('Cannot access iframe content:', error); + break; + } + } + } else { + // If not an iframe, check for shadow DOM + deepestElement = traverseShadowDOM(element); + } + + return deepestElement; + }; + + // Get the element and its iframe path + const el = getDeepestElementFromPoint(x, y); + if (el) { + // Handle potential anchor parent const { parentElement } = el; - const element = parentElement?.tagName === 'A' ? parentElement : el; + const targetElement = parentElement?.tagName === 'A' ? parentElement : el; + + // Get containing context information + const ownerDocument = targetElement.ownerDocument; + const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement; + const isIframeContent = Boolean(frameElement); + + // Get the containing shadow root if any + const containingShadowRoot = targetElement.getRootNode() as ShadowRoot; + const isShadowRoot = containingShadowRoot instanceof ShadowRoot; + let info: { tagName: string; hasOnlyText?: boolean; @@ -36,51 +114,165 @@ export const getElementInformation = async ( attributes?: Record; innerHTML?: string; outerHTML?: string; + isIframeContent?: boolean; + iframeURL?: string; + iframeIndex?: number; + frameHierarchy?: string[]; + isShadowRoot?: boolean; + shadowRootMode?: string; + shadowRootContent?: string; } = { - tagName: element?.tagName ?? '', + tagName: targetElement?.tagName ?? '', + isIframeContent, + isShadowRoot }; - if (element) { - info.attributes = Array.from(element.attributes).reduce( + + if (isIframeContent) { + // Include iframe specific information + info.iframeURL = frameElement.src; + + // Calculate the frame's position in the hierarchy + let currentFrame = frameElement; + const frameHierarchy: string[] = []; + let frameIndex = 0; + + while (currentFrame) { + // Store the frame's identifier (src, id, or index) + frameHierarchy.unshift( + currentFrame.id || + currentFrame.src || + `iframe[${frameIndex}]` + ); + + // Move up to parent frame if it exists + const parentDoc = currentFrame.ownerDocument; + currentFrame = parentDoc?.defaultView?.frameElement as HTMLIFrameElement; + frameIndex++; + } + + info.frameHierarchy = frameHierarchy; + info.iframeIndex = frameIndex - 1; // Adjust for 0-based index + } + + if (isShadowRoot) { + // Include shadow root specific information + info.shadowRootMode = containingShadowRoot.mode; + info.shadowRootContent = containingShadowRoot.innerHTML; + } + + // Collect element attributes and properties + if (targetElement) { + info.attributes = Array.from(targetElement.attributes).reduce( (acc, attr) => { acc[attr.name] = attr.value; return acc; }, {} as Record ); + + if (targetElement.tagName === 'A') { + info.url = (targetElement as HTMLAnchorElement).href; + info.innerText = targetElement.textContent ?? ''; + } else if (targetElement.tagName === 'IMG') { + info.imageUrl = (targetElement as HTMLImageElement).src; + } else if (targetElement?.tagName === 'SELECT') { + const selectElement = targetElement as HTMLSelectElement; + info.innerText = selectElement.options[selectElement.selectedIndex]?.text ?? ''; + info.attributes = { + ...info.attributes, + selectedValue: selectElement.value, + }; + } else if (targetElement?.tagName === 'INPUT' && (targetElement as HTMLInputElement).type === 'time' || (targetElement as HTMLInputElement).type === 'date') { + info.innerText = (targetElement as HTMLInputElement).value; + } + else { + info.hasOnlyText = targetElement.children.length === 0 && + (targetElement.textContent !== null && + targetElement.textContent.trim().length > 0); + info.innerText = targetElement.textContent ?? ''; + } + + info.innerHTML = targetElement.innerHTML; + info.outerHTML = targetElement.outerHTML; } - // Gather specific information based on the tag - if (element?.tagName === 'A') { - info.url = (element as HTMLAnchorElement).href; - info.innerText = element.innerText ?? ''; - } else if (element?.tagName === 'IMG') { - info.imageUrl = (element as HTMLImageElement).src; - } else if (element?.tagName === 'SELECT') { - const selectElement = element as HTMLSelectElement; - info.innerText = selectElement.options[selectElement.selectedIndex]?.text ?? ''; - info.attributes = { - ...info.attributes, - selectedValue: selectElement.value, - }; - } else if (element?.tagName === 'INPUT' && (element as HTMLInputElement).type === 'time' || (element as HTMLInputElement).type === 'date') { - info.innerText = (element as HTMLInputElement).value; - } else { - info.hasOnlyText = element?.children?.length === 0 && - element?.innerText?.length > 0; - info.innerText = element?.innerText ?? ''; - } - info.innerHTML = element.innerHTML; - info.outerHTML = element.outerHTML; + return info; } return null; }, - { x: coordinates.x, y: coordinates.y }, + { x: coordinates.x, y: coordinates.y } ); return elementInfo; } else { const elementInfo = await page.evaluate( async ({ x, y }) => { - const originalEl = document.elementFromPoint(x, y) as HTMLElement; + // Enhanced helper function to get element from point including shadow DOM + const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { + // First, get the element at the clicked coordinates in the main document + let element = document.elementFromPoint(x, y) as HTMLElement; + if (!element) return null; + + // Track the deepest element found + let deepestElement = element; + + // Function to traverse shadow DOM + const traverseShadowDOM = (element: HTMLElement): HTMLElement => { + let current = element; + let shadowRoot = current.shadowRoot; + let deepest = current; + + while (shadowRoot) { + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement || shadowElement === current) break; + + deepest = shadowElement; + current = shadowElement; + shadowRoot = current.shadowRoot; + } + + return deepest; + }; + + // Handle iframe traversal + if (element.tagName === 'IFRAME') { + let currentIframe = element as HTMLIFrameElement; + + while (currentIframe) { + try { + // Convert coordinates to iframe's local space + const iframeRect = currentIframe.getBoundingClientRect(); + const iframeX = x - iframeRect.left; + const iframeY = y - iframeRect.top; + + const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document; + if (!iframeDocument) break; + + const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement; + if (!iframeElement) break; + + // Update deepest element and check for shadow DOM + deepestElement = traverseShadowDOM(iframeElement); + + // Continue traversing if we found another iframe + if (iframeElement.tagName === 'IFRAME') { + currentIframe = iframeElement as HTMLIFrameElement; + } else { + break; + } + } catch (error) { + console.warn('Cannot access iframe content:', error); + break; + } + } + } else { + // If not an iframe, check for shadow DOM + deepestElement = traverseShadowDOM(element); + } + + return deepestElement; + }; + + const originalEl = getDeepestElementFromPoint(x, y); if (originalEl) { let element = originalEl; @@ -124,6 +316,13 @@ export const getElementInformation = async ( } } } + + const ownerDocument = element.ownerDocument; + const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement; + const isIframeContent = Boolean(frameElement); + + const containingShadowRoot = element.getRootNode() as ShadowRoot; + const isShadowRoot = containingShadowRoot instanceof ShadowRoot; let info: { tagName: string; @@ -134,11 +333,54 @@ export const getElementInformation = async ( attributes?: Record; innerHTML?: string; outerHTML?: string; + isIframeContent?: boolean; + iframeURL?: string; + iframeIndex?: number; + frameHierarchy?: string[]; + isShadowRoot?: boolean; + shadowRootMode?: string; + shadowRootContent?: string; } = { tagName: element?.tagName ?? '', + isIframeContent, + isShadowRoot }; + if (isIframeContent) { + // Include iframe specific information + info.iframeURL = frameElement.src; + + // Calculate the frame's position in the hierarchy + let currentFrame = frameElement; + const frameHierarchy: string[] = []; + let frameIndex = 0; + + while (currentFrame) { + // Store the frame's identifier (src, id, or index) + frameHierarchy.unshift( + currentFrame.id || + currentFrame.src || + `iframe[${frameIndex}]` + ); + + // Move up to parent frame if it exists + const parentDoc = currentFrame.ownerDocument; + currentFrame = parentDoc?.defaultView?.frameElement as HTMLIFrameElement; + frameIndex++; + } + + info.frameHierarchy = frameHierarchy; + info.iframeIndex = frameIndex - 1; // Adjust for 0-based index + }; + + if (isShadowRoot) { + // Include shadow root specific information + info.shadowRootMode = containingShadowRoot.mode; + info.shadowRootContent = containingShadowRoot.innerHTML; + } + if (element) { + // Get attributes including those from shadow DOM context info.attributes = Array.from(element.attributes).reduce( (acc, attr) => { acc[attr.name] = attr.value; @@ -146,21 +388,25 @@ export const getElementInformation = async ( }, {} as Record ); + + // Handle specific element types + if (element.tagName === 'A') { + info.url = (element as HTMLAnchorElement).href; + info.innerText = element.textContent ?? ''; + } else if (element.tagName === 'IMG') { + info.imageUrl = (element as HTMLImageElement).src; + } else { + // Handle text content with proper null checking + info.hasOnlyText = element.children.length === 0 && + (element.textContent !== null && + element.textContent.trim().length > 0); + info.innerText = element.textContent ?? ''; + } + + info.innerHTML = element.innerHTML; + info.outerHTML = element.outerHTML; } - - if (element?.tagName === 'A') { - info.url = (element as HTMLAnchorElement).href; - info.innerText = element.innerText ?? ''; - } else if (element?.tagName === 'IMG') { - info.imageUrl = (element as HTMLImageElement).src; - } else { - info.hasOnlyText = element?.children?.length === 0 && - element?.innerText?.length > 0; - info.innerText = element?.innerText ?? ''; - } - - info.innerHTML = element.innerHTML; - info.outerHTML = element.outerHTML; + return info; } return null; @@ -176,47 +422,206 @@ export const getElementInformation = async ( } }; -/** - * Returns a {@link Rectangle} object representing - * the coordinates, width, height and corner points of the element. - * If an element is not found, returns null. - * @param page The page instance. - * @param coordinates Coordinates of an element. - * @category WorkflowManagement-Selectors - * @returns {Promise} - */ export const getRect = async (page: Page, coordinates: Coordinates, listSelector: string, getList: boolean) => { try { if (!getList || listSelector !== '') { const rect = await page.evaluate( async ({ x, y }) => { - const el = document.elementFromPoint(x, y) as HTMLElement; + // Enhanced helper function to get element from point including iframes + const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { + // First, get the element at the clicked coordinates in the main document + let element = document.elementFromPoint(x, y) as HTMLElement; + if (!element) return null; + + // Track the deepest element found + let deepestElement = element; + + // Function to traverse shadow DOM + const traverseShadowDOM = (element: HTMLElement): HTMLElement => { + let current = element; + let shadowRoot = current.shadowRoot; + let deepest = current; + + while (shadowRoot) { + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement || shadowElement === current) break; + + deepest = shadowElement; + current = shadowElement; + shadowRoot = current.shadowRoot; + } + + return deepest; + }; + + // Handle iframe traversal + if (element.tagName === 'IFRAME') { + let currentIframe = element as HTMLIFrameElement; + + while (currentIframe) { + try { + // Convert coordinates to iframe's local space + const iframeRect = currentIframe.getBoundingClientRect(); + const iframeX = x - iframeRect.left; + const iframeY = y - iframeRect.top; + + const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document; + if (!iframeDocument) break; + + const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement; + if (!iframeElement) break; + + // Update deepest element and check for shadow DOM + deepestElement = traverseShadowDOM(iframeElement); + + // Continue traversing if we found another iframe + if (iframeElement.tagName === 'IFRAME') { + currentIframe = iframeElement as HTMLIFrameElement; + } else { + break; + } + } catch (error) { + console.warn('Cannot access iframe content:', error); + break; + } + } + } else { + // If not an iframe, check for shadow DOM + deepestElement = traverseShadowDOM(element); + } + + return deepestElement; + }; + + const el = getDeepestElementFromPoint(x, y); if (el) { const { parentElement } = el; - // Match the logic in recorder.ts for link clicks const element = parentElement?.tagName === 'A' ? parentElement : el; const rectangle = element?.getBoundingClientRect(); if (rectangle) { - return { - x: rectangle.x, - y: rectangle.y, - width: rectangle.width, - height: rectangle.height, - top: rectangle.top, - right: rectangle.right, - bottom: rectangle.bottom, - left: rectangle.left, - }; + const createRectObject = (rect: DOMRect) => ({ + x: rect.x, + y: rect.y, + width: rect.width, + height: rect.height, + top: rect.top, + right: rect.right, + bottom: rect.bottom, + left: rect.left, + toJSON() { + return { + x: this.x, + y: this.y, + width: this.width, + height: this.height, + top: this.top, + right: this.right, + bottom: this.bottom, + left: this.left + }; + } + }); + + // For elements inside iframes, adjust coordinates relative to the top window + let adjustedRect = createRectObject(rectangle); + let currentWindow = element.ownerDocument.defaultView; + + while (currentWindow !== window.top) { + const frameElement = currentWindow?.frameElement as HTMLIFrameElement; + if (!frameElement) break; + + const frameRect = frameElement.getBoundingClientRect(); + adjustedRect = createRectObject({ + x: adjustedRect.x + frameRect.x, + y: adjustedRect.y + frameRect.y, + width: adjustedRect.width, + height: adjustedRect.height, + top: adjustedRect.top + frameRect.top, + right: adjustedRect.right + frameRect.left, + bottom: adjustedRect.bottom + frameRect.top, + left: adjustedRect.left + frameRect.left, + } as DOMRect); + + currentWindow = frameElement.ownerDocument.defaultView; + } + + return adjustedRect; } } + return null; }, - { x: coordinates.x, y: coordinates.y }, + { x: coordinates.x, y: coordinates.y } ); return rect; } else { const rect = await page.evaluate( async ({ x, y }) => { - const originalEl = document.elementFromPoint(x, y) as HTMLElement; + const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { + // First, get the element at the clicked coordinates in the main document + let element = document.elementFromPoint(x, y) as HTMLElement; + if (!element) return null; + + // Track the deepest element found + let deepestElement = element; + + // Function to traverse shadow DOM + const traverseShadowDOM = (element: HTMLElement): HTMLElement => { + let current = element; + let shadowRoot = current.shadowRoot; + let deepest = current; + + while (shadowRoot) { + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement || shadowElement === current) break; + + deepest = shadowElement; + current = shadowElement; + shadowRoot = current.shadowRoot; + } + + return deepest; + }; + + // Handle iframe traversal + if (element.tagName === 'IFRAME') { + let currentIframe = element as HTMLIFrameElement; + + while (currentIframe) { + try { + // Convert coordinates to iframe's local space + const iframeRect = currentIframe.getBoundingClientRect(); + const iframeX = x - iframeRect.left; + const iframeY = y - iframeRect.top; + + const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document; + if (!iframeDocument) break; + + const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement; + if (!iframeElement) break; + + // Update deepest element and check for shadow DOM + deepestElement = traverseShadowDOM(iframeElement); + + // Continue traversing if we found another iframe + if (iframeElement.tagName === 'IFRAME') { + currentIframe = iframeElement as HTMLIFrameElement; + } else { + break; + } + } catch (error) { + console.warn('Cannot access iframe content:', error); + break; + } + } + } else { + // If not an iframe, check for shadow DOM + deepestElement = traverseShadowDOM(element); + } + + return deepestElement; + }; + + const originalEl = getDeepestElementFromPoint(x, y); if (originalEl) { let element = originalEl; @@ -262,34 +667,69 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector } const rectangle = element?.getBoundingClientRect(); - if (rectangle) { - return { - x: rectangle.x, - y: rectangle.y, - width: rectangle.width, - height: rectangle.height, - top: rectangle.top, - right: rectangle.right, - bottom: rectangle.bottom, - left: rectangle.left, - }; + const createRectObject = (rect: DOMRect) => ({ + x: rect.x, + y: rect.y, + width: rect.width, + height: rect.height, + top: rect.top, + right: rect.right, + bottom: rect.bottom, + left: rect.left, + toJSON() { + return { + x: this.x, + y: this.y, + width: this.width, + height: this.height, + top: this.top, + right: this.right, + bottom: this.bottom, + left: this.left + }; + } + }); + + // Same coordinate adjustment for iframe elements as above + let adjustedRect = createRectObject(rectangle); + let currentWindow = element.ownerDocument.defaultView; + + while (currentWindow !== window.top) { + const frameElement = currentWindow?.frameElement as HTMLIFrameElement; + if (!frameElement) break; + + const frameRect = frameElement.getBoundingClientRect(); + adjustedRect = createRectObject({ + x: adjustedRect.x + frameRect.x, + y: adjustedRect.y + frameRect.y, + width: adjustedRect.width, + height: adjustedRect.height, + top: adjustedRect.top + frameRect.top, + right: adjustedRect.right + frameRect.left, + bottom: adjustedRect.bottom + frameRect.top, + left: adjustedRect.left + frameRect.left, + } as DOMRect); + + currentWindow = frameElement.ownerDocument.defaultView; + } + + return adjustedRect; } } return null; }, - { x: coordinates.x, y: coordinates.y }, + { x: coordinates.x, y: coordinates.y } ); return rect; } } catch (error) { const { message, stack } = error as Error; - logger.log('error', `Error while retrieving selector: ${message}`); - logger.log('error', `Stack: ${stack}`); + console.error('Error while retrieving selector:', message); + console.error('Stack:', stack); } }; - /** * Returns the best and unique css {@link Selectors} for the element on the page. * Internally uses a finder function from https://github.com/antonmedv/finder/blob/master/finder.ts @@ -759,6 +1199,206 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { } return output; } + + const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { + // Helper function to traverse shadow DOM + const traverseShadowDOM = (element: HTMLElement, depth: number = 0): HTMLElement => { + const MAX_SHADOW_DEPTH = 4; + let current = element; + let deepest = current; + + while (current && depth < MAX_SHADOW_DEPTH) { + const shadowRoot = current.shadowRoot; + if (!shadowRoot) break; + + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement || shadowElement === current) break; + + deepest = shadowElement; + current = shadowElement; + depth++; + } + + return deepest; + }; + + // Start with the element at the specified coordinates + let element = document.elementFromPoint(x, y) as HTMLElement; + if (!element) return null; + + // Initialize tracking variables + let deepestElement = element; + let depth = 0; + const MAX_IFRAME_DEPTH = 4; + + // First check if the initial element has a shadow root + deepestElement = traverseShadowDOM(element); + + // If it's an iframe, traverse through iframe hierarchy + if (deepestElement.tagName === 'IFRAME') { + let currentIframe = deepestElement as HTMLIFrameElement; + + while (currentIframe && depth < MAX_IFRAME_DEPTH) { + try { + // Convert coordinates to iframe's local space + const iframeRect = currentIframe.getBoundingClientRect(); + const iframeX = x - iframeRect.left; + const iframeY = y - iframeRect.top; + + // Access iframe's document + const iframeDoc = currentIframe.contentDocument || currentIframe.contentWindow?.document; + if (!iframeDoc) break; + + // Get element at transformed coordinates in iframe + const iframeElement = iframeDoc.elementFromPoint(iframeX, iframeY) as HTMLElement; + if (!iframeElement) break; + + // Check for shadow DOM within iframe + const shadowResult = traverseShadowDOM(iframeElement); + deepestElement = shadowResult; + + // If we found another iframe, continue traversing + if (shadowResult.tagName === 'IFRAME') { + currentIframe = shadowResult as HTMLIFrameElement; + depth++; + } else { + break; + } + } catch (error) { + console.warn('Cannot access iframe content:', error); + break; + } + } + } + + return deepestElement; + }; + + const genSelectorForIframe = (element: HTMLElement) => { + // Helper function to get the complete iframe path up to document root + const getIframePath = (el: HTMLElement) => { + const path = []; + let current = el; + let depth = 0; + const MAX_DEPTH = 4; + + while (current && depth < MAX_DEPTH) { + // Get the owner document of the current element + const ownerDocument = current.ownerDocument; + + // Check if this document belongs to an iframe + const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement; + + if (frameElement) { + path.unshift({ + frame: frameElement, + document: ownerDocument, + element: current + }); + // Move up to the parent document's element (the iframe) + current = frameElement; + depth++; + } else { + break; + } + } + return path; + }; + + const iframePath = getIframePath(element); + if (iframePath.length === 0) return null; + + try { + const selectorParts: string[] = []; + + // Generate selector for each iframe boundary + iframePath.forEach((context, index) => { + // Get selector for the iframe element + const frameSelector = finder(context.frame, { + root: index === 0 ? document.body : + (iframePath[index - 1].document.body as Element) + }); + + // For the last context, get selector for target element + if (index === iframePath.length - 1) { + const elementSelector = finder(element, { + root: context.document.body as Element + }); + selectorParts.push(`${frameSelector} :>> ${elementSelector}`); + } else { + selectorParts.push(frameSelector); + } + }); + + return { + fullSelector: selectorParts.join(' :>> '), + isFrameContent: true + }; + } catch (e) { + console.warn('Error generating iframe selector:', e); + return null; + } + }; + + // Helper function to generate selectors for shadow DOM elements + const genSelectorForShadowDOM = (element: HTMLElement) => { + // Get complete path up to document root + const getShadowPath = (el: HTMLElement) => { + const path = []; + let current = el; + let depth = 0; + const MAX_DEPTH = 4; + + while (current && depth < MAX_DEPTH) { + const rootNode = current.getRootNode(); + if (rootNode instanceof ShadowRoot) { + path.unshift({ + host: rootNode.host as HTMLElement, + root: rootNode, + element: current + }); + current = rootNode.host as HTMLElement; + depth++; + } else { + break; + } + } + return path; + }; + + const shadowPath = getShadowPath(element); + if (shadowPath.length === 0) return null; + + try { + const selectorParts: string[] = []; + + // Generate selector for each shadow DOM boundary + shadowPath.forEach((context, index) => { + // Get selector for the host element + const hostSelector = finder(context.host, { + root: index === 0 ? document.body : (shadowPath[index - 1].root as unknown as Element) + }); + + // For the last context, get selector for target element + if (index === shadowPath.length - 1) { + const elementSelector = finder(element, { + root: context.root as unknown as Element + }); + selectorParts.push(`${hostSelector} >> ${elementSelector}`); + } else { + selectorParts.push(hostSelector); + } + }); + + return { + fullSelector: selectorParts.join(' >> '), + mode: shadowPath[shadowPath.length - 1].root.mode + }; + } catch (e) { + console.warn('Error generating shadow DOM selector:', e); + return null; + } + }; const genSelectors = (element: HTMLElement | null) => { if (element == null) { @@ -779,6 +1419,10 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { } catch (e) { } + + const iframeSelector = genSelectorForIframe(element); + const shadowSelector = genSelectorForShadowDOM(element); + const hrefSelector = genSelectorForAttributes(element, ['href']); const formSelector = genSelectorForAttributes(element, [ 'name', @@ -825,9 +1469,19 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { hrefSelector, accessibilitySelector, formSelector, + iframeSelector: iframeSelector ? { + full: iframeSelector.fullSelector, + isIframe: iframeSelector.isFrameContent, + } : null, + shadowSelector: shadowSelector ? { + full: shadowSelector.fullSelector, + mode: shadowSelector.mode + } : null }; } + + function genAttributeSet(element: HTMLElement, attributes: string[]) { return new Set( attributes.filter((attr) => { @@ -867,7 +1521,8 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { return char.length === 1 && char.match(/[0-9]/); } - const hoveredElement = document.elementFromPoint(x, y) as HTMLElement; + const hoveredElement = getDeepestElementFromPoint(x, y) as HTMLElement; + if ( hoveredElement != null && !hoveredElement.closest('#overlay-controls') != null @@ -902,9 +1557,83 @@ interface SelectorResult { */ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates, listSelector: string): Promise => { + interface DOMContext { + type: 'iframe' | 'shadow'; + element: HTMLElement; + container: HTMLIFrameElement | ShadowRoot; + host?: HTMLElement; + document?: Document; + } + try { if (!listSelector) { const selectors = await page.evaluate(({ x, y }: { x: number, y: number }) => { + const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { + // Function to traverse shadow DOM + const traverseShadowDOM = (element: HTMLElement): HTMLElement => { + let current = element; + let deepest = current; + let shadowRoot = current.shadowRoot; + + while (shadowRoot) { + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement || shadowElement === current) break; + + deepest = shadowElement; + current = shadowElement; + shadowRoot = current.shadowRoot; + } + + return deepest; + }; + + // Start with the element at coordinates + let element = document.elementFromPoint(x, y) as HTMLElement; + if (!element) return null; + + let deepestElement = element; + let depth = 0; + const MAX_DEPTH = 4; + + // Handle iframe traversal + if (element.tagName === 'IFRAME') { + let currentIframe = element as HTMLIFrameElement; + + while (currentIframe && depth < MAX_DEPTH) { + try { + const iframeRect = currentIframe.getBoundingClientRect(); + const iframeX = x - iframeRect.left; + const iframeY = y - iframeRect.top; + + const iframeDoc = currentIframe.contentDocument || currentIframe.contentWindow?.document; + if (!iframeDoc) break; + + const iframeElement = iframeDoc.elementFromPoint(iframeX, iframeY) as HTMLElement; + if (!iframeElement) break; + + // Check for shadow DOM within iframe + deepestElement = traverseShadowDOM(iframeElement); + + if (deepestElement.tagName === 'IFRAME') { + currentIframe = deepestElement as HTMLIFrameElement; + depth++; + } else { + break; + } + } catch (error) { + console.warn('Cannot access iframe content:', error); + break; + } + } + } else { + // If not an iframe, check for shadow DOM + deepestElement = traverseShadowDOM(element); + } + + return deepestElement; + }; + + // Basic selector generation function getNonUniqueSelector(element: HTMLElement): string { let selector = element.tagName.toLowerCase(); @@ -928,22 +1657,95 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates return selector; } - function getSelectorPath(element: HTMLElement | null): string { - const path: string[] = []; - let depth = 0; - const maxDepth = 2; - while (element && element !== document.body && depth < maxDepth) { - const selector = getNonUniqueSelector(element); + function getContextPath(element: HTMLElement): DOMContext[] { + const path: DOMContext[] = []; + let current = element; + let depth = 0; + const MAX_DEPTH = 4; + + while (current && depth < MAX_DEPTH) { + // Check for shadow DOM + const rootNode = current.getRootNode(); + if (rootNode instanceof ShadowRoot) { + path.unshift({ + type: 'shadow', + element: current, + container: rootNode, + host: rootNode.host as HTMLElement + }); + current = rootNode.host as HTMLElement; + depth++; + continue; + } + + // Check for iframe + const ownerDocument = current.ownerDocument; + const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement; + + if (frameElement) { + path.unshift({ + type: 'iframe', + element: current, + container: frameElement, + document: ownerDocument + }); + current = frameElement; + depth++; + continue; + } + + break; + } + + return path; + } + + function getSelectorPath(element: HTMLElement | null): string { + if (!element) return ''; + + // Get the complete context path + const contextPath = getContextPath(element); + if (contextPath.length > 0) { + const selectorParts: string[] = []; + + contextPath.forEach((context, index) => { + const containerSelector = getNonUniqueSelector( + context.type === 'shadow' ? context.host! : context.container as HTMLElement + ); + + if (index === contextPath.length - 1) { + const elementSelector = getNonUniqueSelector(element); + const delimiter = context.type === 'shadow' ? ' >> ' : ' :>> '; + selectorParts.push(`${containerSelector}${delimiter}${elementSelector}`); + } else { + selectorParts.push(containerSelector); + } + }); + + return selectorParts.join(contextPath[0].type === 'shadow' ? ' >> ' : ' :>> '); + } + + // Regular DOM path generation + const path: string[] = []; + let currentElement = element; + const MAX_DEPTH = 2; + let depth = 0; + + while (currentElement && currentElement !== document.body && depth < MAX_DEPTH) { + const selector = getNonUniqueSelector(currentElement); path.unshift(selector); - element = element.parentElement; + + if (!currentElement.parentElement) break; + currentElement = currentElement.parentElement; depth++; } return path.join(' > '); } - const originalEl = document.elementFromPoint(x, y) as HTMLElement; + // Main logic to get element and generate selector + const originalEl = getDeepestElementFromPoint(x, y); if (!originalEl) return null; let element = originalEl; @@ -989,16 +1791,90 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates } } } - // } const generalSelector = getSelectorPath(element); - return { - generalSelector, - }; + return { generalSelector }; }, coordinates); + return selectors || { generalSelector: '' }; } else { + // When we have a list selector, we need special handling while maintaining shadow DOM support const selectors = await page.evaluate(({ x, y }: { x: number, y: number }) => { + const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { + // Helper function to traverse shadow DOM + const traverseShadowDOM = (element: HTMLElement, depth: number = 0): HTMLElement => { + const MAX_SHADOW_DEPTH = 4; + let current = element; + let deepest = current; + + while (current && depth < MAX_SHADOW_DEPTH) { + const shadowRoot = current.shadowRoot; + if (!shadowRoot) break; + + const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; + if (!shadowElement || shadowElement === current) break; + + deepest = shadowElement; + current = shadowElement; + depth++; + } + + return deepest; + }; + + // Start with the element at the specified coordinates + let element = document.elementFromPoint(x, y) as HTMLElement; + if (!element) return null; + + // Initialize tracking variables + let deepestElement = element; + let depth = 0; + const MAX_IFRAME_DEPTH = 4; + + // First check if the initial element has a shadow root + deepestElement = traverseShadowDOM(element); + + // If it's an iframe, traverse through iframe hierarchy + if (deepestElement.tagName === 'IFRAME') { + let currentIframe = deepestElement as HTMLIFrameElement; + + while (currentIframe && depth < MAX_IFRAME_DEPTH) { + try { + // Convert coordinates to iframe's local space + const iframeRect = currentIframe.getBoundingClientRect(); + const iframeX = x - iframeRect.left; + const iframeY = y - iframeRect.top; + + // Access iframe's document + const iframeDoc = currentIframe.contentDocument || currentIframe.contentWindow?.document; + if (!iframeDoc) break; + + // Get element at transformed coordinates in iframe + const iframeElement = iframeDoc.elementFromPoint(iframeX, iframeY) as HTMLElement; + if (!iframeElement) break; + + // Check for shadow DOM within iframe + const shadowResult = traverseShadowDOM(iframeElement); + deepestElement = shadowResult; + + // If we found another iframe, continue traversing + if (shadowResult.tagName === 'IFRAME') { + currentIframe = shadowResult as HTMLIFrameElement; + depth++; + } else { + break; + } + } catch (error) { + console.warn('Cannot access iframe content:', error); + break; + } + } + } + + return deepestElement; + }; + + // Generate basic selector from element's tag and classes function getNonUniqueSelector(element: HTMLElement): string { let selector = element.tagName.toLowerCase(); @@ -1009,9 +1885,9 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates } if (element.className) { - const classes = element.className.split(/\s+/).filter((cls: string) => Boolean(cls)); + const classes = element.className.split(/\s+/).filter(Boolean); if (classes.length > 0) { - const validClasses = classes.filter((cls: string) => !cls.startsWith('!') && !cls.includes(':')); + const validClasses = classes.filter(cls => !cls.startsWith('!') && !cls.includes(':')); if (validClasses.length > 0) { selector += '.' + validClasses.map(cls => CSS.escape(cls)).join('.'); } @@ -1021,34 +1897,104 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates return selector; } - function getSelectorPath(element: HTMLElement | null): string { - const path: string[] = []; + // Get complete context path (both iframe and shadow DOM) + function getContextPath(element: HTMLElement): DOMContext[] { + const path: DOMContext[] = []; + let current = element; let depth = 0; - const maxDepth = 2; + const MAX_DEPTH = 4; + + while (current && depth < MAX_DEPTH) { + // Check for shadow DOM + const rootNode = current.getRootNode(); + if (rootNode instanceof ShadowRoot) { + path.unshift({ + type: 'shadow', + element: current, + container: rootNode, + host: rootNode.host as HTMLElement + }); + current = rootNode.host as HTMLElement; + depth++; + continue; + } - while (element && element !== document.body && depth < maxDepth) { - const selector = getNonUniqueSelector(element); + // Check for iframe + const ownerDocument = current.ownerDocument; + const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement; + + if (frameElement) { + path.unshift({ + type: 'iframe', + element: current, + container: frameElement, + document: ownerDocument + }); + current = frameElement; + depth++; + continue; + } + + break; + } + + return path; + } + + function getSelectorPath(element: HTMLElement | null): string { + if (!element) return ''; + + // Get the complete context path + const contextPath = getContextPath(element); + if (contextPath.length > 0) { + const selectorParts: string[] = []; + + contextPath.forEach((context, index) => { + const containerSelector = getNonUniqueSelector( + context.type === 'shadow' ? context.host! : context.container as HTMLElement + ); + + if (index === contextPath.length - 1) { + const elementSelector = getNonUniqueSelector(element); + const delimiter = context.type === 'shadow' ? ' >> ' : ' :>> '; + selectorParts.push(`${containerSelector}${delimiter}${elementSelector}`); + } else { + selectorParts.push(containerSelector); + } + }); + + return selectorParts.join(contextPath[0].type === 'shadow' ? ' >> ' : ' :>> '); + } + + // Regular DOM path generation + const path: string[] = []; + let currentElement = element; + const MAX_DEPTH = 2; + let depth = 0; + + while (currentElement && currentElement !== document.body && depth < MAX_DEPTH) { + const selector = getNonUniqueSelector(currentElement); path.unshift(selector); - element = element.parentElement; + + if (!currentElement.parentElement) break; + currentElement = currentElement.parentElement; depth++; } return path.join(' > '); } - const originalEl = document.elementFromPoint(x, y) as HTMLElement; - if (!originalEl) return null; + const originalEl = getDeepestElementFromPoint(x, y); + if (!originalEl) return { generalSelector: '' }; let element = originalEl; const generalSelector = getSelectorPath(element); - return { - generalSelector, - }; - }, coordinates); - return selectors || { generalSelector: '' }; - } + return { generalSelector }; + }, coordinates); + return selectors || { generalSelector: '' }; + } } catch (error) { console.error('Error in getNonUniqueSelectors:', error); return { generalSelector: '' }; @@ -1083,42 +2029,158 @@ export const getChildSelectors = async (page: Page, parentSelector: string): Pro } // Function to generate selector path from an element to its parent - function getSelectorPath(element: HTMLElement | null): string { + function getSelectorPath(element: HTMLElement): string { if (!element || !element.parentElement) return ''; - const parentSelector = getNonUniqueSelector(element.parentElement); const elementSelector = getNonUniqueSelector(element); + + // Check for shadow DOM context + const rootNode = element.getRootNode(); + if (rootNode instanceof ShadowRoot) { + const hostSelector = getNonUniqueSelector(rootNode.host as HTMLElement); + return `${hostSelector} >> ${elementSelector}`; + } + // Check for iframe context + const ownerDocument = element.ownerDocument; + const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement; + if (frameElement) { + const frameSelector = getNonUniqueSelector(frameElement); + return `${frameSelector} :>> ${elementSelector}`; + } + + // Regular DOM context + const parentSelector = getNonUniqueSelector(element.parentElement); return `${parentSelector} > ${elementSelector}`; } - // Function to recursively get all descendant selectors + + // Function to get all children from special contexts + function getSpecialContextChildren(element: HTMLElement): HTMLElement[] { + const children: HTMLElement[] = []; + + // Get shadow DOM children + const shadowRoot = element.shadowRoot; + if (shadowRoot) { + const shadowElements = Array.from(shadowRoot.querySelectorAll('*')) as HTMLElement[]; + children.push(...shadowElements); + } + + // Get iframe children + const iframes = Array.from(element.querySelectorAll('iframe')) as HTMLIFrameElement[]; + for (const iframe of iframes) { + try { + const iframeDoc = iframe.contentDocument || iframe.contentWindow?.document; + if (iframeDoc) { + const iframeElements = Array.from(iframeDoc.querySelectorAll('*')) as HTMLElement[]; + children.push(...iframeElements); + } + } catch (error) { + console.warn('Cannot access iframe content:', error); + continue; + } + } + + return children; + } + + // Function to recursively get all descendant selectors including shadow DOM and iframes function getAllDescendantSelectors(element: HTMLElement): string[] { let selectors: string[] = []; + + // Handle regular DOM children const children = Array.from(element.children) as HTMLElement[]; - for (const child of children) { const childPath = getSelectorPath(child); if (childPath) { - selectors.push(childPath); // Add direct child path - selectors = selectors.concat(getAllDescendantSelectors(child)); // Recursively process descendants + selectors.push(childPath); + + // Process regular descendants + selectors = selectors.concat(getAllDescendantSelectors(child)); + + // Process special context children (shadow DOM and iframes) + const specialChildren = getSpecialContextChildren(child); + for (const specialChild of specialChildren) { + const specialPath = getSelectorPath(specialChild); + if (specialPath) { + selectors.push(specialPath); + selectors = selectors.concat(getAllDescendantSelectors(specialChild)); + } + } + } + } + + // Handle direct special context children + const specialChildren = getSpecialContextChildren(element); + for (const specialChild of specialChildren) { + const specialPath = getSelectorPath(specialChild); + if (specialPath) { + selectors.push(specialPath); + selectors = selectors.concat(getAllDescendantSelectors(specialChild)); } } return selectors; } - // Find all occurrences of the parent selector in the DOM - const parentElements = Array.from(document.querySelectorAll(parentSelector)) as HTMLElement[]; + // Handle both shadow DOM and iframe parent selectors + let parentElements: HTMLElement[] = []; + + // Check for special context traversal in parent selector + if (parentSelector.includes('>>') || parentSelector.includes(':>>')) { + // Split by both types of delimiters + const selectorParts = parentSelector.split(/(?:>>|:>>)/).map(part => part.trim()); + + // Start with initial elements + parentElements = Array.from(document.querySelectorAll(selectorParts[0])) as HTMLElement[]; + + // Traverse through parts + for (let i = 1; i < selectorParts.length; i++) { + const newParentElements: HTMLElement[] = []; + + for (const element of parentElements) { + // Check for shadow DOM + if (element.shadowRoot) { + const shadowChildren = Array.from( + element.shadowRoot.querySelectorAll(selectorParts[i]) + ) as HTMLElement[]; + newParentElements.push(...shadowChildren); + } + + // Check for iframe + if (element.tagName === 'IFRAME') { + try { + const iframeDoc = (element as HTMLIFrameElement).contentDocument || + (element as HTMLIFrameElement).contentWindow?.document; + if (iframeDoc) { + const iframeChildren = Array.from( + iframeDoc.querySelectorAll(selectorParts[i]) + ) as HTMLElement[]; + newParentElements.push(...iframeChildren); + } + } catch (error) { + console.warn('Cannot access iframe content during traversal:', error); + continue; + } + } + } + + parentElements = newParentElements; + } + } else { + // Regular DOM selector + parentElements = Array.from(document.querySelectorAll(parentSelector)) as HTMLElement[]; + } + const allChildSelectors = new Set(); // Use a set to ensure uniqueness // Process each parent element and its descendants parentElements.forEach((parentElement) => { const descendantSelectors = getAllDescendantSelectors(parentElement); - descendantSelectors.forEach((selector) => allChildSelectors.add(selector)); // Add selectors to the set + descendantSelectors.forEach((selector) => allChildSelectors.add(selector)); }); - return Array.from(allChildSelectors); // Convert the set back to an array + return Array.from(allChildSelectors); }, parentSelector); return childSelectors || []; diff --git a/server/src/workflow-management/utils.ts b/server/src/workflow-management/utils.ts index b3dadd60..0804aa78 100644 --- a/server/src/workflow-management/utils.ts +++ b/server/src/workflow-management/utils.ts @@ -12,6 +12,16 @@ export const getBestSelectorForAction = (action: Action) => { case ActionType.Hover: case ActionType.DragAndDrop: { const selectors = action.selectors; + + + if (selectors?.iframeSelector?.full) { + return selectors.iframeSelector.full; + } + + if (selectors?.shadowSelector?.full) { + return selectors.shadowSelector.full; + } + // less than 25 characters, and element only has text inside const textSelector = selectors?.text?.length != null && @@ -75,6 +85,11 @@ export const getBestSelectorForAction = (action: Action) => { case ActionType.Input: case ActionType.Keydown: { const selectors = action.selectors; + + if (selectors?.shadowSelector?.full) { + return selectors.shadowSelector.full; + } + return ( selectors.testIdSelector ?? selectors?.id ?? diff --git a/src/api/storage.ts b/src/api/storage.ts index 4b2f4e80..18c793c0 100644 --- a/src/api/storage.ts +++ b/src/api/storage.ts @@ -5,11 +5,6 @@ import { ScheduleSettings } from "../components/molecules/ScheduleSettings"; import { CreateRunResponse, ScheduleRunResponse } from "../pages/MainPage"; import { apiUrl } from "../apiConfig"; - - - - - export const getStoredRecordings = async (): Promise => { try { const response = await axios.get(`${apiUrl}/storage/recordings`); @@ -82,11 +77,7 @@ export const getStoredRecording = async (id: string) => { } } - - export const checkRunsForRecording = async (id: string): Promise => { - - try { const response = await axios.get(`${apiUrl}/storage/recordings/${id}/runs`); @@ -99,32 +90,26 @@ export const checkRunsForRecording = async (id: string): Promise => { } }; - export const deleteRecordingFromStorage = async (id: string): Promise => { - const hasRuns = await checkRunsForRecording(id); - + if (hasRuns) { - + return false; } try { const response = await axios.delete(`${apiUrl}/storage/recordings/${id}`); if (response.status === 200) { - + return true; } else { throw new Error(`Couldn't delete stored recording ${id}`); } } catch (error: any) { console.log(error); - + return false; } - - - - }; export const deleteRunFromStorage = async (id: string): Promise => { @@ -159,7 +144,7 @@ export const createRunForStoredRecording = async (id: string, settings: RunSetti try { const response = await axios.put( `${apiUrl}/storage/runs/${id}`, - { ...settings }); + { ...settings }); if (response.status === 200) { return response.data; } else { diff --git a/src/api/workflow.ts b/src/api/workflow.ts index 03b677b1..40ac0d99 100644 --- a/src/api/workflow.ts +++ b/src/api/workflow.ts @@ -3,7 +3,7 @@ import { emptyWorkflow } from "../shared/constants"; import { default as axios, AxiosResponse } from "axios"; import { apiUrl } from "../apiConfig"; -export const getActiveWorkflow = async(id: string) : Promise => { +export const getActiveWorkflow = async (id: string): Promise => { try { const response = await axios.get(`${apiUrl}/workflow/${id}`) if (response.status === 200) { @@ -11,13 +11,13 @@ export const getActiveWorkflow = async(id: string) : Promise => { } else { throw new Error('Something went wrong when fetching a recorded workflow'); } - } catch(error: any) { + } catch (error: any) { console.log(error); return emptyWorkflow; } }; -export const getParamsOfActiveWorkflow = async(id: string) : Promise => { +export const getParamsOfActiveWorkflow = async (id: string): Promise => { try { const response = await axios.get(`${apiUrl}/workflow/params/${id}`) if (response.status === 200) { @@ -25,15 +25,15 @@ export const getParamsOfActiveWorkflow = async(id: string) : Promise => { +export const deletePair = async (index: number): Promise => { try { - const response = await axios.delete(`${apiUrl}/workflow/pair/${index}`); + const response = await axios.delete(`${apiUrl}/workflow/pair/${index}`); if (response.status === 200) { return response.data; } else { @@ -45,11 +45,11 @@ export const deletePair = async(index: number): Promise => { } }; -export const AddPair = async(index: number, pair: WhereWhatPair): Promise => { +export const AddPair = async (index: number, pair: WhereWhatPair): Promise => { try { const response = await axios.post(`${apiUrl}/workflow/pair/${index}`, { pair, - }, {headers: {'Content-Type': 'application/json'}}); + }, { headers: { 'Content-Type': 'application/json' } }); if (response.status === 200) { return response.data; } else { @@ -61,11 +61,11 @@ export const AddPair = async(index: number, pair: WhereWhatPair): Promise => { +export const UpdatePair = async (index: number, pair: WhereWhatPair): Promise => { try { const response = await axios.put(`${apiUrl}/workflow/pair/${index}`, { pair, - }, {headers: {'Content-Type': 'application/json'}}); + }, { headers: { 'Content-Type': 'application/json' } }); if (response.status === 200) { return response.data; } else { diff --git a/src/components/atoms/canvas.tsx b/src/components/atoms/canvas.tsx index e71a4d93..8fd4f791 100644 --- a/src/components/atoms/canvas.tsx +++ b/src/components/atoms/canvas.tsx @@ -1,21 +1,147 @@ -import React, { useCallback, useEffect, useRef } from 'react'; +import React, { useCallback, useEffect, useRef, useMemo, Suspense } from 'react'; import { useSocketStore } from '../../context/socket'; -import { getMappedCoordinates } from "../../helpers/inputHelpers"; import { useGlobalInfoStore } from "../../context/globalInfo"; import { useActionContext } from '../../context/browserActions'; -import DatePicker from './DatePicker'; -import Dropdown from './Dropdown'; -import TimePicker from './TimePicker'; -import DateTimeLocalPicker from './DateTimeLocalPicker'; +const DatePicker = React.lazy(() => import('./DatePicker')); +const Dropdown = React.lazy(() => import('./Dropdown')); +const TimePicker = React.lazy(() => import('./TimePicker')); +const DateTimeLocalPicker = React.lazy(() => import('./DateTimeLocalPicker')); -interface CreateRefCallback { - (ref: React.RefObject): void; +class RAFScheduler { + private queue: Set<() => void> = new Set(); + private isProcessing: boolean = false; + private frameId: number | null = null; + + schedule(callback: () => void): void { + this.queue.add(callback); + if (!this.isProcessing) { + this.process(); + } + } + + private process = (): void => { + this.isProcessing = true; + this.frameId = requestAnimationFrame(() => { + const callbacks = Array.from(this.queue); + this.queue.clear(); + + callbacks.forEach(callback => { + try { + callback(); + } catch (error) { + console.error('RAF Scheduler error:', error); + } + }); + + this.isProcessing = false; + this.frameId = null; + + if (this.queue.size > 0) { + this.process(); + } + }); + } + + clear(): void { + this.queue.clear(); + if (this.frameId !== null) { + cancelAnimationFrame(this.frameId); + this.frameId = null; + } + this.isProcessing = false; + } +} + +class EventDebouncer { + private highPriorityQueue: Array<() => void> = []; + private lowPriorityQueue: Array<() => void> = []; + private processing: boolean = false; + private scheduler: RAFScheduler; + + constructor(scheduler: RAFScheduler) { + this.scheduler = scheduler; + } + + add(callback: () => void, highPriority: boolean = false): void { + if (highPriority) { + this.highPriorityQueue.push(callback); + } else { + this.lowPriorityQueue.push(callback); + } + + if (!this.processing) { + this.process(); + } + } + + private process(): void { + this.processing = true; + this.scheduler.schedule(() => { + while (this.highPriorityQueue.length > 0) { + const callback = this.highPriorityQueue.shift(); + callback?.(); + } + + if (this.lowPriorityQueue.length > 0) { + const callback = this.lowPriorityQueue.shift(); + callback?.(); + + if (this.lowPriorityQueue.length > 0) { + this.process(); + } + } + + this.processing = false; + }); + } + + clear(): void { + this.highPriorityQueue = []; + this.lowPriorityQueue = []; + this.processing = false; + } +} + +// Optimized measurement cache with LRU +class MeasurementCache { + private cache: Map; + private maxSize: number; + + constructor(maxSize: number = 100) { + this.cache = new Map(); + this.maxSize = maxSize; + } + + get(element: HTMLElement): DOMRect | undefined { + const cached = this.cache.get(element); + if (cached) { + // Refresh the entry + this.cache.delete(element); + this.cache.set(element, cached); + } + return cached; + } + + set(element: HTMLElement, rect: DOMRect): void { + if (this.cache.size >= this.maxSize) { + // Remove oldest entry + const firstKey = this.cache.keys().next().value; + if (firstKey !== undefined) { + this.cache.delete(firstKey); + } + } + this.cache.set(element, rect); + } + + clear(): void { + this.cache.clear(); + } } interface CanvasProps { width: number; height: number; - onCreateRef: CreateRefCallback; + onCreateRef: (ref: React.RefObject) => void; } /** @@ -26,225 +152,229 @@ export interface Coordinates { y: number; }; -const Canvas = ({ width, height, onCreateRef }: CanvasProps) => { - +const Canvas = React.memo(({ width, height, onCreateRef }: CanvasProps) => { const canvasRef = useRef(null); const { socket } = useSocketStore(); const { setLastAction, lastAction } = useGlobalInfoStore(); const { getText, getList } = useActionContext(); - const getTextRef = useRef(getText); - const getListRef = useRef(getList); - const [datePickerInfo, setDatePickerInfo] = React.useState<{ - coordinates: Coordinates; - selector: string; - } | null>(null); + const scheduler = useRef(new RAFScheduler()); + const debouncer = useRef(new EventDebouncer(scheduler.current)); + const measurementCache = useRef(new MeasurementCache(50)); + //const performanceMonitor = useRef(new FrontendPerformanceMonitor()); - const [dropdownInfo, setDropdownInfo] = React.useState<{ - coordinates: Coordinates; - selector: string; - options: Array<{ - value: string; - text: string; - disabled: boolean; - selected: boolean; - }>; - } | null>(null); + const refs = useRef({ + getText, + getList, + lastMousePosition: { x: 0, y: 0 }, + lastFrameTime: 0, + context: null as CanvasRenderingContext2D | null, + }); - const [timePickerInfo, setTimePickerInfo] = React.useState<{ - coordinates: Coordinates; - selector: string; - } | null>(null); - - const [dateTimeLocalInfo, setDateTimeLocalInfo] = React.useState<{ - coordinates: Coordinates; - selector: string; - } | null>(null); - - const notifyLastAction = (action: string) => { - if (lastAction !== action) { - setLastAction(action); + const [state, dispatch] = React.useReducer((state: any, action: any) => { + switch (action.type) { + case 'BATCH_UPDATE': + return { ...state, ...action.payload }; + default: + return state; } - }; + }, { + datePickerInfo: null, + dropdownInfo: null, + timePickerInfo: null, + dateTimeLocalInfo: null + }); - const lastMousePosition = useRef({ x: 0, y: 0 }); + const getEventCoordinates = useCallback((event: MouseEvent): { x: number; y: number } => { + if (!canvasRef.current) return { x: 0, y: 0 }; - useEffect(() => { - getTextRef.current = getText; - getListRef.current = getList; - }, [getText, getList]); - - useEffect(() => { - if (socket) { - socket.on('showDatePicker', (info: {coordinates: Coordinates, selector: string}) => { - setDatePickerInfo(info); - }); - - socket.on('showDropdown', (info: { - coordinates: Coordinates, - selector: string, - options: Array<{ - value: string; - text: string; - disabled: boolean; - selected: boolean; - }>; - }) => { - setDropdownInfo(info); - }); - - socket.on('showTimePicker', (info: {coordinates: Coordinates, selector: string}) => { - setTimePickerInfo(info); - }); - - socket.on('showDateTimePicker', (info: {coordinates: Coordinates, selector: string}) => { - setDateTimeLocalInfo(info); - }); - - return () => { - socket.off('showDatePicker'); - socket.off('showDropdown'); - socket.off('showTimePicker'); - socket.off('showDateTimePicker'); - }; + let rect = measurementCache.current.get(canvasRef.current); + if (!rect) { + rect = canvasRef.current.getBoundingClientRect(); + measurementCache.current.set(canvasRef.current, rect); } - }, [socket]); - const onMouseEvent = useCallback((event: MouseEvent) => { - if (socket && canvasRef.current) { - // Get the canvas bounding rectangle - const rect = canvasRef.current.getBoundingClientRect(); - const clickCoordinates = { - x: event.clientX - rect.left, // Use relative x coordinate - y: event.clientY - rect.top, // Use relative y coordinate - }; + return { + x: event.clientX - rect.left, + y: event.clientY - rect.top + }; + }, []); - switch (event.type) { - case 'mousedown': - if (getTextRef.current === true) { + const handleMouseEvent = useCallback((event: MouseEvent) => { + if (!socket || !canvasRef.current) return; + + //performanceMonitor.current.measureEventLatency(event); + const coordinates = getEventCoordinates(event); + + switch (event.type) { + case 'mousedown': + debouncer.current.add(() => { + if (refs.current.getText) { console.log('Capturing Text...'); - } else if (getListRef.current === true) { + } else if (refs.current.getList) { console.log('Capturing List...'); } else { - socket.emit('input:mousedown', clickCoordinates); + socket.emit('input:mousedown', coordinates); } - notifyLastAction('click'); - break; - case 'mousemove': - if (lastMousePosition.current.x !== clickCoordinates.x || - lastMousePosition.current.y !== clickCoordinates.y) { - lastMousePosition.current = { - x: clickCoordinates.x, - y: clickCoordinates.y, - }; - socket.emit('input:mousemove', { - x: clickCoordinates.x, - y: clickCoordinates.y, - }); - notifyLastAction('move'); - } - break; - case 'wheel': - const wheelEvent = event as WheelEvent; - const deltas = { - deltaX: Math.round(wheelEvent.deltaX), - deltaY: Math.round(wheelEvent.deltaY), - }; - socket.emit('input:wheel', deltas); - notifyLastAction('scroll'); - break; - default: - console.log('Default mouseEvent registered'); - return; - } - } - }, [socket]); + setLastAction('click'); + }, true); // High priority + break; - const onKeyboardEvent = useCallback((event: KeyboardEvent) => { - if (socket) { + case 'mousemove': + if (refs.current.lastMousePosition.x !== coordinates.x || + refs.current.lastMousePosition.y !== coordinates.y) { + debouncer.current.add(() => { + refs.current.lastMousePosition = coordinates; + socket.emit('input:mousemove', coordinates); + setLastAction('move'); + }); + } + break; + + case 'wheel': + const wheelEvent = event as WheelEvent; + debouncer.current.add(() => { + socket.emit('input:wheel', { + deltaX: Math.round(wheelEvent.deltaX), + deltaY: Math.round(wheelEvent.deltaY) + }); + setLastAction('scroll'); + }); + break; + } + }, [socket, getEventCoordinates]); + + const handleKeyboardEvent = useCallback((event: KeyboardEvent) => { + if (!socket) return; + + debouncer.current.add(() => { switch (event.type) { case 'keydown': - socket.emit('input:keydown', { key: event.key, coordinates: lastMousePosition.current }); - notifyLastAction(`${event.key} pressed`); + socket.emit('input:keydown', { + key: event.key, + coordinates: refs.current.lastMousePosition + }); + setLastAction(`${event.key} pressed`); break; case 'keyup': socket.emit('input:keyup', event.key); break; - default: - console.log('Default keyEvent registered'); - return; } - } + }, event.type === 'keydown'); // High priority for keydown }, [socket]); + // Setup and cleanup + useEffect(() => { + if (!canvasRef.current) return; + + const canvas = canvasRef.current; + refs.current.context = canvas.getContext('2d', { + alpha: false, + desynchronized: true + }); + + onCreateRef(canvasRef); + + const options = { passive: true }; + canvas.addEventListener('mousedown', handleMouseEvent, options); + canvas.addEventListener('mousemove', handleMouseEvent, options); + canvas.addEventListener('wheel', handleMouseEvent, options); + canvas.addEventListener('keydown', handleKeyboardEvent, options); + canvas.addEventListener('keyup', handleKeyboardEvent, options); + + return () => { + canvas.removeEventListener('mousedown', handleMouseEvent); + canvas.removeEventListener('mousemove', handleMouseEvent); + canvas.removeEventListener('wheel', handleMouseEvent); + canvas.removeEventListener('keydown', handleKeyboardEvent); + canvas.removeEventListener('keyup', handleKeyboardEvent); + + scheduler.current.clear(); + debouncer.current.clear(); + measurementCache.current.clear(); + }; + }, [handleMouseEvent, handleKeyboardEvent, onCreateRef]); + + // Performance monitoring + // useEffect(() => { + // const intervalId = setInterval(() => { + // console.log('Performance Report:', performanceMonitor.current.getPerformanceReport()); + // }, 20000); + + // return () => clearInterval(intervalId); + // }, []); useEffect(() => { - if (canvasRef.current) { - onCreateRef(canvasRef); - canvasRef.current.addEventListener('mousedown', onMouseEvent); - canvasRef.current.addEventListener('mousemove', onMouseEvent); - canvasRef.current.addEventListener('wheel', onMouseEvent, { passive: true }); - canvasRef.current.addEventListener('keydown', onKeyboardEvent); - canvasRef.current.addEventListener('keyup', onKeyboardEvent); + if (!socket) return; - return () => { - if (canvasRef.current) { - canvasRef.current.removeEventListener('mousedown', onMouseEvent); - canvasRef.current.removeEventListener('mousemove', onMouseEvent); - canvasRef.current.removeEventListener('wheel', onMouseEvent); - canvasRef.current.removeEventListener('keydown', onKeyboardEvent); - canvasRef.current.removeEventListener('keyup', onKeyboardEvent); - } + const handlers = { + showDatePicker: (info: any) => dispatch({ type: 'BATCH_UPDATE', payload: { datePickerInfo: info } }), + showDropdown: (info: any) => dispatch({ type: 'BATCH_UPDATE', payload: { dropdownInfo: info } }), + showTimePicker: (info: any) => dispatch({ type: 'BATCH_UPDATE', payload: { timePickerInfo: info } }), + showDateTimePicker: (info: any) => dispatch({ type: 'BATCH_UPDATE', payload: { dateTimeLocalInfo: info } }) + }; - }; - } else { - console.log('Canvas not initialized'); - } + Object.entries(handlers).forEach(([event, handler]) => socket.on(event, handler)); + return () => { + Object.keys(handlers).forEach(event => socket.off(event)); + }; + }, [socket]); - }, [onMouseEvent]); + const memoizedDimensions = useMemo(() => ({ + width: width || 900, + height: height || 400 + }), [width, height]); return ( -

+
- {datePickerInfo && ( - setDatePickerInfo(null)} - /> - )} - {dropdownInfo && ( - setDropdownInfo(null)} - /> - )} - {timePickerInfo && ( - setTimePickerInfo(null)} - /> - )} - {dateTimeLocalInfo && ( - setDateTimeLocalInfo(null)} - /> - )} + + {state.datePickerInfo && ( + dispatch({ + type: 'BATCH_UPDATE', + payload: { datePickerInfo: null } + })} + /> + )} + {state.dropdownInfo && ( + dispatch({ + type: 'BATCH_UPDATE', + payload: { dropdownInfo: null } + })} + /> + )} + {state.timePickerInfo && ( + dispatch({ type: 'SET_TIME_PICKER', payload: null })} + /> + )} + {state.dateTimeLocalInfo && ( + dispatch({ type: 'SET_DATETIME_PICKER', payload: null })} + /> + )} +
); +}); -}; - +Canvas.displayName = 'Canvas'; export default Canvas; \ No newline at end of file diff --git a/src/components/molecules/ActionDescriptionBox.tsx b/src/components/molecules/ActionDescriptionBox.tsx index 190c5838..45ec1641 100644 --- a/src/components/molecules/ActionDescriptionBox.tsx +++ b/src/components/molecules/ActionDescriptionBox.tsx @@ -113,7 +113,7 @@ const ActionDescriptionBox = () => { return ( - + {renderActionDescription()} diff --git a/src/components/molecules/NavBar.tsx b/src/components/molecules/NavBar.tsx index 142d45ab..8aeeb05d 100644 --- a/src/components/molecules/NavBar.tsx +++ b/src/components/molecules/NavBar.tsx @@ -318,7 +318,7 @@ export const NavBar: React.FC = ({ { window.open('https://x.com/maxun_io?ref=app', '_blank'); }}> - Twiiter (X) + Twitter (X) {t('navbar.menu_items.language')} diff --git a/src/components/molecules/RecordingsTable.tsx b/src/components/molecules/RecordingsTable.tsx index 01bc524b..f8a0ba37 100644 --- a/src/components/molecules/RecordingsTable.tsx +++ b/src/components/molecules/RecordingsTable.tsx @@ -33,10 +33,6 @@ interface Column { format?: (value: string) => string; } - - - - interface Data { id: string; name: string; @@ -441,7 +437,6 @@ const OptionsButton = ({ handleEdit, handleDelete, handleDuplicate }: OptionsBut {t('recordingtable.duplicate')} - ); diff --git a/src/components/molecules/SaveRecording.tsx b/src/components/molecules/SaveRecording.tsx index 8e1eb462..cc51f238 100644 --- a/src/components/molecules/SaveRecording.tsx +++ b/src/components/molecules/SaveRecording.tsx @@ -101,7 +101,7 @@ export const SaveRecording = ({ fileName }: SaveRecordingProps) => { - {t('save_recording.warnings.robot_exists')} + {t('save_recording.errors.exists_warning')} ) : + )} - + )} @@ -454,7 +529,22 @@ export const RightSidePanel: React.FC = ({ onFinishCapture updateCustomLimit(e.target.value)} + onChange={(e: React.ChangeEvent) => { + const value = parseInt(e.target.value); + // Only update if the value is greater than or equal to 1 or if the field is empty + if (e.target.value === '' || value >= 1) { + updateCustomLimit(e.target.value); + } + }} + inputProps={{ + min: 1, + onKeyPress: (e: React.KeyboardEvent) => { + const value = (e.target as HTMLInputElement).value + e.key; + if (parseInt(value) < 1) { + e.preventDefault(); + } + } + }} placeholder={t('right_panel.limit.enter_number')} sx={{ marginLeft: '10px', @@ -470,7 +560,7 @@ export const RightSidePanel: React.FC = ({ onFinishCapture )} - {!getText && !getScreenshot && !getList && showCaptureText && } + {!getText && !getScreenshot && !getList && showCaptureText && } {getText && <> @@ -526,11 +616,21 @@ export const RightSidePanel: React.FC = ({ onFinishCapture ) }} /> - {!confirmedTextSteps[step.id] && ( + {!confirmedTextSteps[step.id] ? ( + ) : !isCaptureTextConfirmed && ( + + + )} )} @@ -548,61 +648,70 @@ export const RightSidePanel: React.FC = ({ onFinishCapture Object.entries(step.fields).length === 0 ? ( {t('right_panel.messages.list_empty')} ) : ( - <> - {t('right_panel.messages.list_selected')} - {Object.entries(step.fields).map(([key, field]) => ( - - handleTextLabelChange(field.id, e.target.value, step.id, key)} - fullWidth - margin="normal" - InputProps={{ - readOnly: confirmedListTextFields[field.id]?.[key], - startAdornment: ( - - - - ) - }} - /> - - - - ) - }} - /> - {!confirmedListTextFields[step.id]?.[key] && ( - - - - - )} - - ))} - - ) + <> + {t('right_panel.messages.list_selected')} + {Object.entries(step.fields).map(([key, field]) => ( + + handleTextLabelChange(field.id, e.target.value, step.id, key)} + fullWidth + margin="normal" + InputProps={{ + readOnly: confirmedListTextFields[field.id]?.[key], + startAdornment: ( + + + + ) + }} + /> + + + + ) + }} + /> + {!confirmedListTextFields[step.id]?.[key] ? ( + + + + + ) : !isCaptureListConfirmed && ( + + + + )} + + ))} + )} ))} diff --git a/src/context/browserSteps.tsx b/src/context/browserSteps.tsx index dd211199..fd311a35 100644 --- a/src/context/browserSteps.tsx +++ b/src/context/browserSteps.tsx @@ -32,6 +32,7 @@ export interface SelectorObject { selector: string; tag?: string; attribute?: string; + shadow?: boolean; [key: string]: any; }