diff --git a/README.md b/README.md
index 47e170b5..376bf25b 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,7 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web
+ Documentation |
Website |
Discord |
Twitter |
diff --git a/docker-compose.yml b/docker-compose.yml
index 874e48d6..91b72428 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -43,7 +43,7 @@ services:
#build:
#context: .
#dockerfile: server/Dockerfile
- image: getmaxun/maxun-backend:v0.0.9
+ image: getmaxun/maxun-backend:v0.0.10
ports:
- "${BACKEND_PORT:-8080}:${BACKEND_PORT:-8080}"
env_file: .env
@@ -70,7 +70,7 @@ services:
#build:
#context: .
#dockerfile: Dockerfile
- image: getmaxun/maxun-frontend:v0.0.5
+ image: getmaxun/maxun-frontend:v0.0.7
ports:
- "${FRONTEND_PORT:-5173}:${FRONTEND_PORT:-5173}"
env_file: .env
diff --git a/maxun-core/package.json b/maxun-core/package.json
index 7c92d08e..ddaaa510 100644
--- a/maxun-core/package.json
+++ b/maxun-core/package.json
@@ -1,6 +1,6 @@
{
"name": "maxun-core",
- "version": "0.0.7",
+ "version": "0.0.8",
"description": "Core package for Maxun, responsible for data extraction",
"main": "build/index.js",
"typings": "build/index.d.ts",
diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js
index 0c5d74ac..f6b53da2 100644
--- a/maxun-core/src/browserSide/scraper.js
+++ b/maxun-core/src/browserSide/scraper.js
@@ -188,69 +188,201 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
* @param {Object.} lists The named lists of HTML elements.
* @returns {Array.>}
*/
- window.scrapeSchema = function (lists) {
+ window.scrapeSchema = function(lists) {
+ // Utility functions remain the same
function omap(object, f, kf = (x) => x) {
return Object.fromEntries(
Object.entries(object)
- .map(([k, v]) => [kf(k), f(v)]),
+ .map(([k, v]) => [kf(k), f(v)]),
);
}
function ofilter(object, f) {
return Object.fromEntries(
Object.entries(object)
- .filter(([k, v]) => f(k, v)),
+ .filter(([k, v]) => f(k, v)),
);
}
- function getSeedKey(listObj) {
- const maxLength = Math.max(...Object.values(omap(listObj, (x) => document.querySelectorAll(x.selector).length)));
- return Object.keys(ofilter(listObj, (_, v) => document.querySelectorAll(v.selector).length === maxLength))[0];
+ function findAllElements(config) {
+ // Regular DOM query if no special delimiters
+ if (!config.selector.includes('>>') && !config.selector.includes(':>>')) {
+ return Array.from(document.querySelectorAll(config.selector));
+ }
+
+ // First handle iframe traversal if present
+ if (config.selector.includes(':>>')) {
+ const parts = config.selector.split(':>>').map(s => s.trim());
+ let currentElements = [document];
+
+ // Traverse through each part of the selector
+ for (let i = 0; i < parts.length; i++) {
+ const part = parts[i];
+ const nextElements = [];
+ const isLast = i === parts.length - 1;
+
+ for (const element of currentElements) {
+ try {
+ // For document or iframe document
+ const doc = element.contentDocument || element || element.contentWindow?.document;
+ if (!doc) continue;
+
+ // Query elements in current context
+ const found = Array.from(doc.querySelectorAll(part));
+
+ if (isLast) {
+ // If it's the last part, keep all matching elements
+ nextElements.push(...found);
+ } else {
+ // If not last, only keep iframes for next iteration
+ const iframes = found.filter(el => el.tagName === 'IFRAME');
+ nextElements.push(...iframes);
+ }
+ } catch (error) {
+ console.warn('Cannot access iframe content:', error, {
+ part,
+ element,
+ index: i
+ });
+ }
+ }
+
+ if (nextElements.length === 0) {
+ console.warn('No elements found for part:', part, 'at depth:', i);
+ return [];
+ }
+ currentElements = nextElements;
+ }
+
+ return currentElements;
+ }
+
+ // Handle shadow DOM traversal
+ if (config.selector.includes('>>')) {
+ const parts = config.selector.split('>>').map(s => s.trim());
+ let currentElements = [document];
+
+ for (const part of parts) {
+ const nextElements = [];
+ for (const element of currentElements) {
+ // Try regular DOM first
+ const found = Array.from(element.querySelectorAll(part));
+
+ // Then check shadow roots
+ for (const foundEl of found) {
+ if (foundEl.shadowRoot) {
+ nextElements.push(foundEl.shadowRoot);
+ } else {
+ nextElements.push(foundEl);
+ }
+ }
+ }
+ currentElements = nextElements;
+ }
+ return currentElements.filter(el => !(el instanceof ShadowRoot));
+ }
+
+ return [];
}
+ // Modified to handle iframe context for URL resolution
+ function getElementValue(element, attribute) {
+ if (!element) return null;
+
+ // Get the base URL for resolving relative URLs
+ const baseURL = element.ownerDocument?.location?.href || window.location.origin;
+
+ switch (attribute) {
+ case 'href': {
+ const relativeHref = element.getAttribute('href');
+ return relativeHref ? new URL(relativeHref, baseURL).href : null;
+ }
+ case 'src': {
+ const relativeSrc = element.getAttribute('src');
+ return relativeSrc ? new URL(relativeSrc, baseURL).href : null;
+ }
+ case 'innerText':
+ return element.innerText?.trim();
+ case 'textContent':
+ return element.textContent?.trim();
+ default:
+ return element.getAttribute(attribute) || element.innerText?.trim();
+ }
+ }
+
+ // Rest of the functions remain largely the same
+ function getSeedKey(listObj) {
+ const maxLength = Math.max(...Object.values(
+ omap(listObj, (x) => findAllElements(x).length)
+ ));
+ return Object.keys(
+ ofilter(listObj, (_, v) => findAllElements(v).length === maxLength)
+ )[0];
+ }
+
+ // Find minimal bounding elements
function getMBEs(elements) {
return elements.map((element) => {
let candidate = element;
const isUniqueChild = (e) => elements
- .filter((elem) => e.parentNode?.contains(elem))
+ .filter((elem) => {
+ // Handle both iframe and shadow DOM boundaries
+ const sameContext = elem.getRootNode() === e.getRootNode() &&
+ elem.ownerDocument === e.ownerDocument;
+ return sameContext && e.parentNode?.contains(elem);
+ })
.length === 1;
-
+
while (candidate && isUniqueChild(candidate)) {
candidate = candidate.parentNode;
}
-
+
return candidate;
});
}
const seedName = getSeedKey(lists);
- const seedElements = Array.from(document.querySelectorAll(lists[seedName].selector));
+ const seedElements = findAllElements(lists[seedName]);
const MBEs = getMBEs(seedElements);
-
- return MBEs.map((mbe) => omap(
- lists,
- ({ selector, attribute }, key) => {
- const elem = Array.from(document.querySelectorAll(selector)).find((elem) => mbe.contains(elem));
- if (!elem) return undefined;
-
- switch (attribute) {
- case 'href':
- const relativeHref = elem.getAttribute('href');
- return relativeHref ? new URL(relativeHref, window.location.origin).href : null;
- case 'src':
- const relativeSrc = elem.getAttribute('src');
- return relativeSrc ? new URL(relativeSrc, window.location.origin).href : null;
- case 'innerText':
- return elem.innerText;
- case 'textContent':
- return elem.textContent;
- default:
- return elem.innerText;
- }
- },
- (key) => key // Use the original key in the output
+
+ const mbeResults = MBEs.map((mbe) => omap(
+ lists,
+ (config) => {
+ const elem = findAllElements(config)
+ .find((elem) => mbe.contains(elem));
+
+ return elem ? getElementValue(elem, config.attribute) : undefined;
+ },
+ (key) => key
)) || [];
- }
+
+ // If MBE approach didn't find all elements, try independent scraping
+ if (mbeResults.some(result => Object.values(result).some(v => v === undefined))) {
+ // Fall back to independent scraping
+ const results = [];
+ const foundElements = new Map();
+
+ // Find all elements for each selector
+ Object.entries(lists).forEach(([key, config]) => {
+ const elements = findAllElements(config);
+ foundElements.set(key, elements);
+ });
+
+ // Create result objects for each found element
+ foundElements.forEach((elements, key) => {
+ elements.forEach((element, index) => {
+ if (!results[index]) {
+ results[index] = {};
+ }
+ results[index][key] = getElementValue(element, lists[key].attribute);
+ });
+ });
+
+ return results.filter(result => Object.keys(result).length > 0);
+ }
+
+ return mbeResults;
+ };
/**
* Scrapes multiple lists of similar items based on a template item.
@@ -262,108 +394,275 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
* @returns {Array.>} Array of arrays of scraped items, one sub-array per list
*/
window.scrapeList = async function ({ listSelector, fields, limit = 10 }) {
- // Helper function to extract values from elements
+ // Enhanced query function to handle both iframe and shadow DOM
+ const queryElement = (rootElement, selector) => {
+ if (!selector.includes('>>') && !selector.includes(':>>')) {
+ return rootElement.querySelector(selector);
+ }
+
+ const parts = selector.split(/(?:>>|:>>)/).map(part => part.trim());
+ let currentElement = rootElement;
+
+ for (let i = 0; i < parts.length; i++) {
+ if (!currentElement) return null;
+
+ // Handle iframe traversal
+ if (currentElement.tagName === 'IFRAME') {
+ try {
+ const iframeDoc = currentElement.contentDocument || currentElement.contentWindow.document;
+ currentElement = iframeDoc.querySelector(parts[i]);
+ continue;
+ } catch (e) {
+ console.warn('Cannot access iframe content:', e);
+ return null;
+ }
+ }
+
+ // Try regular DOM first
+ let nextElement = currentElement.querySelector(parts[i]);
+
+ // Try shadow DOM if not found
+ if (!nextElement && currentElement.shadowRoot) {
+ nextElement = currentElement.shadowRoot.querySelector(parts[i]);
+ }
+
+ // Check children's shadow roots if still not found
+ if (!nextElement) {
+ const children = Array.from(currentElement.children || []);
+ for (const child of children) {
+ if (child.shadowRoot) {
+ nextElement = child.shadowRoot.querySelector(parts[i]);
+ if (nextElement) break;
+ }
+ }
+ }
+
+ currentElement = nextElement;
+ }
+
+ return currentElement;
+ };
+
+ // Enhanced query all function for both contexts
+ const queryElementAll = (rootElement, selector) => {
+ if (!selector.includes('>>') && !selector.includes(':>>')) {
+ return rootElement.querySelectorAll(selector);
+ }
+
+ const parts = selector.split(/(?:>>|:>>)/).map(part => part.trim());
+ let currentElements = [rootElement];
+
+ for (const part of parts) {
+ const nextElements = [];
+
+ for (const element of currentElements) {
+ // Handle iframe traversal
+ if (element.tagName === 'IFRAME') {
+ try {
+ const iframeDoc = element.contentDocument || element.contentWindow.document;
+ nextElements.push(...iframeDoc.querySelectorAll(part));
+ } catch (e) {
+ console.warn('Cannot access iframe content:', e);
+ continue;
+ }
+ } else {
+ // Regular DOM elements
+ if (element.querySelectorAll) {
+ nextElements.push(...element.querySelectorAll(part));
+ }
+
+ // Shadow DOM elements
+ if (element.shadowRoot) {
+ nextElements.push(...element.shadowRoot.querySelectorAll(part));
+ }
+
+ // Check children's shadow roots
+ const children = Array.from(element.children || []);
+ for (const child of children) {
+ if (child.shadowRoot) {
+ nextElements.push(...child.shadowRoot.querySelectorAll(part));
+ }
+ }
+ }
+ }
+
+ currentElements = nextElements;
+ }
+
+ return currentElements;
+ };
+
+ // Enhanced value extraction with context awareness
function extractValue(element, attribute) {
- if (!element) return null;
-
- if (attribute === 'innerText') {
- return element.innerText.trim();
- } else if (attribute === 'innerHTML') {
- return element.innerHTML.trim();
- } else if (attribute === 'src' || attribute === 'href') {
- const attrValue = element.getAttribute(attribute);
- return attrValue ? new URL(attrValue, window.location.origin).href : null;
- }
- return element.getAttribute(attribute);
+ if (!element) return null;
+
+ // Get context-aware base URL
+ const baseURL = element.ownerDocument?.location?.href || window.location.origin;
+
+ // Check shadow root first
+ if (element.shadowRoot) {
+ const shadowContent = element.shadowRoot.textContent;
+ if (shadowContent?.trim()) {
+ return shadowContent.trim();
+ }
+ }
+
+ if (attribute === 'innerText') {
+ return element.innerText.trim();
+ } else if (attribute === 'innerHTML') {
+ return element.innerHTML.trim();
+ } else if (attribute === 'src' || attribute === 'href') {
+ const attrValue = element.getAttribute(attribute);
+ return attrValue ? new URL(attrValue, baseURL).href : null;
+ }
+ return element.getAttribute(attribute);
}
- // Helper function to find table ancestors
+ // Enhanced table ancestor finding with context support
function findTableAncestor(element) {
- let currentElement = element;
- const MAX_DEPTH = 5;
- let depth = 0;
-
- while (currentElement && depth < MAX_DEPTH) {
- if (currentElement.tagName === 'TD') {
- return { type: 'TD', element: currentElement };
- } else if (currentElement.tagName === 'TR') {
- return { type: 'TR', element: currentElement };
- }
- currentElement = currentElement.parentElement;
- depth++;
- }
- return null;
+ let currentElement = element;
+ const MAX_DEPTH = 5;
+ let depth = 0;
+
+ while (currentElement && depth < MAX_DEPTH) {
+ // Handle shadow DOM
+ if (currentElement.getRootNode() instanceof ShadowRoot) {
+ currentElement = currentElement.getRootNode().host;
+ continue;
+ }
+
+ if (currentElement.tagName === 'TD') {
+ return { type: 'TD', element: currentElement };
+ } else if (currentElement.tagName === 'TR') {
+ return { type: 'TR', element: currentElement };
+ }
+
+ // Handle iframe crossing
+ if (currentElement.tagName === 'IFRAME') {
+ try {
+ currentElement = currentElement.contentDocument.body;
+ } catch (e) {
+ return null;
+ }
+ } else {
+ currentElement = currentElement.parentElement;
+ }
+ depth++;
+ }
+ return null;
}
+ // Helper function to get cell index
function getCellIndex(td) {
- let index = 0;
- let sibling = td;
- while (sibling = sibling.previousElementSibling) {
- index++;
- }
- return index;
+ if (td.getRootNode() instanceof ShadowRoot) {
+ const shadowRoot = td.getRootNode();
+ const allCells = Array.from(shadowRoot.querySelectorAll('td'));
+ return allCells.indexOf(td);
+ }
+
+ let index = 0;
+ let sibling = td;
+ while (sibling = sibling.previousElementSibling) {
+ index++;
+ }
+ return index;
}
+ // Helper function to check for TH elements
function hasThElement(row, tableFields) {
- for (const [label, { selector }] of Object.entries(tableFields)) {
- const element = row.querySelector(selector);
- if (element) {
- let current = element;
- while (current && current !== row) {
- if (current.tagName === 'TH') {
- return true;
- }
- current = current.parentElement;
- }
- }
- }
- return false;
+ for (const [_, { selector }] of Object.entries(tableFields)) {
+ const element = queryElement(row, selector);
+ if (element) {
+ let current = element;
+ while (current && current !== row) {
+ if (current.getRootNode() instanceof ShadowRoot) {
+ current = current.getRootNode().host;
+ continue;
+ }
+
+ if (current.tagName === 'TH') return true;
+
+ if (current.tagName === 'IFRAME') {
+ try {
+ current = current.contentDocument.body;
+ } catch (e) {
+ break;
+ }
+ } else {
+ current = current.parentElement;
+ }
+ }
+ }
+ }
+ return false;
}
+ // Helper function to filter rows
function filterRowsBasedOnTag(rows, tableFields) {
for (const row of rows) {
if (hasThElement(row, tableFields)) {
return rows;
}
}
- return rows.filter(row => row.getElementsByTagName('TH').length === 0);
+ // Include shadow DOM in TH search
+ return rows.filter(row => {
+ const directTH = row.getElementsByTagName('TH').length === 0;
+ const shadowTH = row.shadowRoot ?
+ row.shadowRoot.querySelector('th') === null : true;
+ return directTH && shadowTH;
+ });
}
+ // Class similarity comparison functions
function calculateClassSimilarity(classList1, classList2) {
- const set1 = new Set(classList1);
- const set2 = new Set(classList2);
-
- // Calculate intersection
- const intersection = new Set([...set1].filter(x => set2.has(x)));
-
- // Calculate union
- const union = new Set([...set1, ...set2]);
-
- // Return Jaccard similarity coefficient
- return intersection.size / union.size;
- }
+ const set1 = new Set(classList1);
+ const set2 = new Set(classList2);
+ const intersection = new Set([...set1].filter(x => set2.has(x)));
+ const union = new Set([...set1, ...set2]);
+ return intersection.size / union.size;
+ }
- // New helper function to find elements with similar classes
+ // Enhanced similar elements finding with context support
function findSimilarElements(baseElement, similarityThreshold = 0.7) {
const baseClasses = Array.from(baseElement.classList);
-
if (baseClasses.length === 0) return [];
+
+ const allElements = [];
- const potentialElements = document.getElementsByTagName(baseElement.tagName);
+ // Get elements from main document
+ allElements.push(...document.getElementsByTagName(baseElement.tagName));
- return Array.from(potentialElements).filter(element => {
- if (element === baseElement) return false;
-
- const similarity = calculateClassSimilarity(
- baseClasses,
- Array.from(element.classList)
- );
-
- return similarity >= similarityThreshold;
+ // Get elements from shadow DOM
+ if (baseElement.getRootNode() instanceof ShadowRoot) {
+ const shadowHost = baseElement.getRootNode().host;
+ allElements.push(...shadowHost.getElementsByTagName(baseElement.tagName));
+ }
+
+ // Get elements from iframes
+ const iframes = document.getElementsByTagName('iframe');
+ for (const iframe of iframes) {
+ try {
+ const iframeDoc = iframe.contentDocument || iframe.contentWindow.document;
+ allElements.push(...iframeDoc.getElementsByTagName(baseElement.tagName));
+ } catch (e) {
+ console.warn('Cannot access iframe content:', e);
+ }
+ }
+
+ return allElements.filter(element => {
+ if (element === baseElement) return false;
+ const similarity = calculateClassSimilarity(
+ baseClasses,
+ Array.from(element.classList)
+ );
+ return similarity >= similarityThreshold;
});
}
- let containers = Array.from(document.querySelectorAll(listSelector));
+ // Main scraping logic with context support
+ let containers = queryElementAll(document, listSelector);
+ containers = Array.from(containers);
+
if (containers.length === 0) return [];
if (limit > 1 && containers.length === 1) {
@@ -374,115 +673,157 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
const newContainers = similarContainers.filter(container =>
!container.matches(listSelector)
);
-
containers = [...containers, ...newContainers];
}
}
- // Initialize arrays to store field classifications for each container
const containerFields = containers.map(() => ({
- tableFields: {},
- nonTableFields: {}
+ tableFields: {},
+ nonTableFields: {}
}));
- // Analyze field types for each container
+ // Classify fields
containers.forEach((container, containerIndex) => {
- for (const [label, field] of Object.entries(fields)) {
- const sampleElement = container.querySelector(field.selector);
-
- if (sampleElement) {
- const ancestor = findTableAncestor(sampleElement);
- if (ancestor) {
- containerFields[containerIndex].tableFields[label] = {
- ...field,
- tableContext: ancestor.type,
- cellIndex: ancestor.type === 'TD' ? getCellIndex(ancestor.element) : -1
- };
- } else {
- containerFields[containerIndex].nonTableFields[label] = field;
- }
+ for (const [label, field] of Object.entries(fields)) {
+ const sampleElement = queryElement(container, field.selector);
+
+ if (sampleElement) {
+ const ancestor = findTableAncestor(sampleElement);
+ if (ancestor) {
+ containerFields[containerIndex].tableFields[label] = {
+ ...field,
+ tableContext: ancestor.type,
+ cellIndex: ancestor.type === 'TD' ? getCellIndex(ancestor.element) : -1
+ };
} else {
containerFields[containerIndex].nonTableFields[label] = field;
}
+ } else {
+ containerFields[containerIndex].nonTableFields[label] = field;
}
+ }
});
const tableData = [];
const nonTableData = [];
-
- // Process table fields across all containers
+
+ // Process table data with both iframe and shadow DOM support
for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) {
const container = containers[containerIndex];
const { tableFields } = containerFields[containerIndex];
if (Object.keys(tableFields).length > 0) {
- const firstField = Object.values(tableFields)[0];
- const firstElement = container.querySelector(firstField.selector);
- let tableContext = firstElement;
-
- while (tableContext && tableContext.tagName !== 'TABLE' && tableContext !== container) {
- tableContext = tableContext.parentElement;
- }
-
- if (tableContext) {
- const rows = Array.from(tableContext.getElementsByTagName('TR'));
- const processedRows = filterRowsBasedOnTag(rows, tableFields);
-
- for (let rowIndex = 0; rowIndex < Math.min(processedRows.length, limit); rowIndex++) {
- const record = {};
- const currentRow = processedRows[rowIndex];
-
- for (const [label, { selector, attribute, cellIndex }] of Object.entries(tableFields)) {
- let element = null;
-
- if (cellIndex >= 0) {
- const td = currentRow.children[cellIndex];
- if (td) {
- element = td.querySelector(selector);
-
- if (!element && selector.split(">").pop().includes('td:nth-child')) {
- element = td;
+ const firstField = Object.values(tableFields)[0];
+ const firstElement = queryElement(container, firstField.selector);
+ let tableContext = firstElement;
+
+ // Find table context including both iframe and shadow DOM
+ while (tableContext && tableContext.tagName !== 'TABLE' && tableContext !== container) {
+ if (tableContext.getRootNode() instanceof ShadowRoot) {
+ tableContext = tableContext.getRootNode().host;
+ continue;
+ }
+
+ if (tableContext.tagName === 'IFRAME') {
+ try {
+ tableContext = tableContext.contentDocument.body;
+ } catch (e) {
+ break;
}
+ } else {
+ tableContext = tableContext.parentElement;
+ }
+ }
- if (!element) {
- const tagOnlySelector = selector.split('.')[0];
- element = td.querySelector(tagOnlySelector);
+ if (tableContext) {
+ // Get rows from all contexts
+ const rows = [];
+
+ // Get rows from regular DOM
+ rows.push(...tableContext.getElementsByTagName('TR'));
+
+ // Get rows from shadow DOM
+ if (tableContext.shadowRoot) {
+ rows.push(...tableContext.shadowRoot.getElementsByTagName('TR'));
+ }
+
+ // Get rows from iframes
+ if (tableContext.tagName === 'IFRAME') {
+ try {
+ const iframeDoc = tableContext.contentDocument || tableContext.contentWindow.document;
+ rows.push(...iframeDoc.getElementsByTagName('TR'));
+ } catch (e) {
+ console.warn('Cannot access iframe rows:', e);
}
+ }
+
+ const processedRows = filterRowsBasedOnTag(rows, tableFields);
+
+ for (let rowIndex = 0; rowIndex < Math.min(processedRows.length, limit); rowIndex++) {
+ const record = {};
+ const currentRow = processedRows[rowIndex];
- if (!element) {
- let currentElement = td;
- while (currentElement && currentElement.children.length > 0) {
- let foundContentChild = false;
- for (const child of currentElement.children) {
- if (extractValue(child, attribute)) {
- currentElement = child;
- foundContentChild = true;
- break;
+ for (const [label, { selector, attribute, cellIndex }] of Object.entries(tableFields)) {
+ let element = null;
+
+ if (cellIndex >= 0) {
+ // Get TD element considering both contexts
+ let td = currentRow.children[cellIndex];
+
+ // Check shadow DOM for td
+ if (!td && currentRow.shadowRoot) {
+ const shadowCells = currentRow.shadowRoot.children;
+ if (shadowCells && shadowCells.length > cellIndex) {
+ td = shadowCells[cellIndex];
}
}
- if (!foundContentChild) break;
- }
- element = currentElement;
- }
- }
- } else {
- element = currentRow.querySelector(selector);
- }
-
- if (element) {
- record[label] = extractValue(element, attribute);
- }
- }
+
+ if (td) {
+ element = queryElement(td, selector);
+
+ if (!element && selector.split(/(?:>>|:>>)/).pop().includes('td:nth-child')) {
+ element = td;
+ }
- if (Object.keys(record).length > 0) {
- tableData.push(record);
- }
+ if (!element) {
+ const tagOnlySelector = selector.split('.')[0];
+ element = queryElement(td, tagOnlySelector);
+ }
+
+ if (!element) {
+ let currentElement = td;
+ while (currentElement && currentElement.children.length > 0) {
+ let foundContentChild = false;
+ for (const child of currentElement.children) {
+ if (extractValue(child, attribute)) {
+ currentElement = child;
+ foundContentChild = true;
+ break;
+ }
+ }
+ if (!foundContentChild) break;
+ }
+ element = currentElement;
+ }
+ }
+ } else {
+ element = queryElement(currentRow, selector);
+ }
+
+ if (element) {
+ record[label] = extractValue(element, attribute);
+ }
+ }
+
+ if (Object.keys(record).length > 0) {
+ tableData.push(record);
+ }
+ }
}
- }
}
}
-
- // Process non-table fields across all containers
+
+ // Process non-table data with both contexts support
for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) {
if (nonTableData.length >= limit) break;
@@ -490,26 +831,28 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
const { nonTableFields } = containerFields[containerIndex];
if (Object.keys(nonTableFields).length > 0) {
- const record = {};
+ const record = {};
- for (const [label, { selector, attribute }] of Object.entries(nonTableFields)) {
- const element = container.querySelector(selector);
-
- if (element) {
- record[label] = extractValue(element, attribute);
- }
- }
+ for (const [label, { selector, attribute }] of Object.entries(nonTableFields)) {
+ // Get the last part of the selector after any context delimiter
+ const relativeSelector = selector.split(/(?:>>|:>>)/).slice(-1)[0];
+ const element = queryElement(container, relativeSelector);
- if (Object.keys(record).length > 0) {
- nonTableData.push(record);
- }
- }
+ if (element) {
+ record[label] = extractValue(element, attribute);
+ }
+ }
+
+ if (Object.keys(record).length > 0) {
+ nonTableData.push(record);
+ }
+ }
}
// Merge and limit the results
const scrapedData = [...tableData, ...nonTableData];
return scrapedData;
-};
+ };
/**
* Gets all children of the elements matching the listSelector,
diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts
index c581954d..e09ac5d5 100644
--- a/maxun-core/src/interpret.ts
+++ b/maxun-core/src/interpret.ts
@@ -403,7 +403,7 @@ export default class Interpreter extends EventEmitter {
await this.options.serializableCallback(scrapeResults);
},
- scrapeSchema: async (schema: Record) => {
+ scrapeSchema: async (schema: Record) => {
await this.ensureScriptsLoaded(page);
const scrapeResult = await page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema);
@@ -663,11 +663,42 @@ export default class Interpreter extends EventEmitter {
if (isApplicable) {
return actionId;
}
+ }
}
+
+ private removeShadowSelectors(workflow: Workflow) {
+ for (let actionId = workflow.length - 1; actionId >= 0; actionId--) {
+ const step = workflow[actionId];
+
+ // Check if step has where and selectors
+ if (step.where && Array.isArray(step.where.selectors)) {
+ // Filter out selectors that contain ">>"
+ step.where.selectors = step.where.selectors.filter(selector => !selector.includes('>>'));
+ }
+ }
+
+ return workflow;
+ }
+
+ private removeSpecialSelectors(workflow: Workflow) {
+ for (let actionId = workflow.length - 1; actionId >= 0; actionId--) {
+ const step = workflow[actionId];
+
+ if (step.where && Array.isArray(step.where.selectors)) {
+ // Filter out if selector has EITHER ":>>" OR ">>"
+ step.where.selectors = step.where.selectors.filter(selector =>
+ !(selector.includes(':>>') || selector.includes('>>'))
+ );
+ }
+ }
+
+ return workflow;
}
private async runLoop(p: Page, workflow: Workflow) {
- const workflowCopy: Workflow = JSON.parse(JSON.stringify(workflow));
+ let workflowCopy: Workflow = JSON.parse(JSON.stringify(workflow));
+
+ workflowCopy = this.removeSpecialSelectors(workflowCopy);
// apply ad-blocker to the current page
try {
diff --git a/package.json b/package.json
index e89f13de..36062666 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "maxun",
- "version": "0.0.5",
+ "version": "0.0.6",
"author": "Maxun",
"license": "AGPL-3.0-or-later",
"dependencies": {
@@ -44,9 +44,10 @@
"joi": "^17.6.0",
"jsonwebtoken": "^9.0.2",
"jwt-decode": "^4.0.0",
+ "lodash": "^4.17.21",
"loglevel": "^1.8.0",
"loglevel-plugin-remote": "^0.6.8",
- "maxun-core": "^0.0.7",
+ "maxun-core": "^0.0.8",
"minio": "^8.0.1",
"moment-timezone": "^0.5.45",
"node-cron": "^3.0.3",
@@ -66,6 +67,7 @@
"react-transition-group": "^4.4.2",
"sequelize": "^6.37.3",
"sequelize-typescript": "^2.1.6",
+ "sharp": "^0.33.5",
"socket.io": "^4.4.1",
"socket.io-client": "^4.4.1",
"styled-components": "^5.3.3",
@@ -97,6 +99,7 @@
"@types/cookie-parser": "^1.4.7",
"@types/express": "^4.17.13",
"@types/js-cookie": "^3.0.6",
+ "@types/lodash": "^4.17.14",
"@types/loglevel": "^1.6.3",
"@types/node": "22.7.9",
"@types/node-cron": "^3.0.11",
diff --git a/perf/performance.ts b/perf/performance.ts
new file mode 100644
index 00000000..c50ef850
--- /dev/null
+++ b/perf/performance.ts
@@ -0,0 +1,181 @@
+// Frontend Performance Monitoring
+export class FrontendPerformanceMonitor {
+ private metrics: {
+ fps: number[];
+ memoryUsage: MemoryInfo[];
+ renderTime: number[];
+ eventLatency: number[];
+ };
+ private lastFrameTime: number;
+ private frameCount: number;
+
+ constructor() {
+ this.metrics = {
+ fps: [],
+ memoryUsage: [],
+ renderTime: [],
+ eventLatency: [],
+ };
+ this.lastFrameTime = performance.now();
+ this.frameCount = 0;
+
+ // Start monitoring
+ this.startMonitoring();
+ }
+
+ private startMonitoring(): void {
+ // Monitor FPS
+ const measureFPS = () => {
+ const currentTime = performance.now();
+ const elapsed = currentTime - this.lastFrameTime;
+ this.frameCount++;
+
+ if (elapsed >= 1000) { // Calculate FPS every second
+ const fps = Math.round((this.frameCount * 1000) / elapsed);
+ this.metrics.fps.push(fps);
+ this.frameCount = 0;
+ this.lastFrameTime = currentTime;
+ }
+ requestAnimationFrame(measureFPS);
+ };
+ requestAnimationFrame(measureFPS);
+
+ // Monitor Memory Usage
+ if (window.performance && (performance as any).memory) {
+ setInterval(() => {
+ const memory = (performance as any).memory;
+ this.metrics.memoryUsage.push({
+ usedJSHeapSize: memory.usedJSHeapSize,
+ totalJSHeapSize: memory.totalJSHeapSize,
+ timestamp: Date.now()
+ });
+ }, 1000);
+ }
+ }
+
+ // Monitor Canvas Render Time
+ public measureRenderTime(renderFunction: () => void): void {
+ const startTime = performance.now();
+ renderFunction();
+ const endTime = performance.now();
+ this.metrics.renderTime.push(endTime - startTime);
+ }
+
+ // Monitor Event Latency
+ public measureEventLatency(event: MouseEvent | KeyboardEvent): void {
+ const latency = performance.now() - event.timeStamp;
+ this.metrics.eventLatency.push(latency);
+ }
+
+ // Get Performance Report
+ public getPerformanceReport(): PerformanceReport {
+ return {
+ averageFPS: this.calculateAverage(this.metrics.fps),
+ averageRenderTime: this.calculateAverage(this.metrics.renderTime),
+ averageEventLatency: this.calculateAverage(this.metrics.eventLatency),
+ memoryTrend: this.getMemoryTrend(),
+ lastMemoryUsage: this.metrics.memoryUsage[this.metrics.memoryUsage.length - 1]
+ };
+ }
+
+ private calculateAverage(array: number[]): number {
+ return array.length ? array.reduce((a, b) => a + b) / array.length : 0;
+ }
+
+ private getMemoryTrend(): MemoryTrend {
+ if (this.metrics.memoryUsage.length < 2) return 'stable';
+ const latest = this.metrics.memoryUsage[this.metrics.memoryUsage.length - 1];
+ const previous = this.metrics.memoryUsage[this.metrics.memoryUsage.length - 2];
+ const change = latest.usedJSHeapSize - previous.usedJSHeapSize;
+ if (change > 1000000) return 'increasing'; // 1MB threshold
+ if (change < -1000000) return 'decreasing';
+ return 'stable';
+ }
+}
+
+// Backend Performance Monitoring
+export class BackendPerformanceMonitor {
+ private metrics: {
+ screenshotTimes: number[];
+ emitTimes: number[];
+ memoryUsage: NodeJS.MemoryUsage[];
+ };
+
+ constructor() {
+ this.metrics = {
+ screenshotTimes: [],
+ emitTimes: [],
+ memoryUsage: []
+ };
+ this.startMonitoring();
+ }
+
+ private startMonitoring(): void {
+ // Monitor Memory Usage
+ setInterval(() => {
+ this.metrics.memoryUsage.push(process.memoryUsage());
+ }, 1000);
+ }
+
+ public async measureScreenshotPerformance(
+ makeScreenshot: () => Promise
+ ): Promise {
+ const startTime = process.hrtime();
+ await makeScreenshot();
+ const [seconds, nanoseconds] = process.hrtime(startTime);
+ this.metrics.screenshotTimes.push(seconds * 1000 + nanoseconds / 1000000);
+ }
+
+ public measureEmitPerformance(emitFunction: () => void): void {
+ const startTime = process.hrtime();
+ emitFunction();
+ const [seconds, nanoseconds] = process.hrtime(startTime);
+ this.metrics.emitTimes.push(seconds * 1000 + nanoseconds / 1000000);
+ }
+
+ public getPerformanceReport(): BackendPerformanceReport {
+ return {
+ averageScreenshotTime: this.calculateAverage(this.metrics.screenshotTimes),
+ averageEmitTime: this.calculateAverage(this.metrics.emitTimes),
+ currentMemoryUsage: this.metrics.memoryUsage[this.metrics.memoryUsage.length - 1],
+ memoryTrend: this.getMemoryTrend()
+ };
+ }
+
+ private calculateAverage(array: number[]): number {
+ return array.length ? array.reduce((a, b) => a + b) / array.length : 0;
+ }
+
+ private getMemoryTrend(): MemoryTrend {
+ if (this.metrics.memoryUsage.length < 2) return 'stable';
+ const latest = this.metrics.memoryUsage[this.metrics.memoryUsage.length - 1];
+ const previous = this.metrics.memoryUsage[this.metrics.memoryUsage.length - 2];
+ const change = latest.heapUsed - previous.heapUsed;
+ if (change > 1000000) return 'increasing';
+ if (change < -1000000) return 'decreasing';
+ return 'stable';
+ }
+}
+
+interface MemoryInfo {
+ usedJSHeapSize: number;
+ totalJSHeapSize: number;
+ timestamp: number;
+}
+
+type MemoryTrend = 'increasing' | 'decreasing' | 'stable';
+
+interface PerformanceReport {
+ averageFPS: number;
+ averageRenderTime: number;
+ averageEventLatency: number;
+ memoryTrend: MemoryTrend;
+ lastMemoryUsage: MemoryInfo;
+}
+
+interface BackendPerformanceReport {
+ averageScreenshotTime: number;
+ averageEmitTime: number;
+ currentMemoryUsage: NodeJS.MemoryUsage;
+ memoryTrend: MemoryTrend;
+}
\ No newline at end of file
diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts
index 2c45d146..8ff4f601 100644
--- a/server/src/browser-management/classes/RemoteBrowser.ts
+++ b/server/src/browser-management/classes/RemoteBrowser.ts
@@ -9,6 +9,8 @@ import { chromium } from 'playwright-extra';
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
import { PlaywrightBlocker } from '@cliqz/adblocker-playwright';
import fetch from 'cross-fetch';
+import { throttle } from 'lodash';
+import sharp from 'sharp';
import logger from '../../logger';
import { InterpreterSettings, RemoteBrowserOptions } from "../../types";
@@ -16,8 +18,30 @@ import { WorkflowGenerator } from "../../workflow-management/classes/Generator";
import { WorkflowInterpreter } from "../../workflow-management/classes/Interpreter";
import { getDecryptedProxyConfig } from '../../routes/proxy';
import { getInjectableScript } from 'idcac-playwright';
+
chromium.use(stealthPlugin());
+const MEMORY_CONFIG = {
+ gcInterval: 60000, // 1 minute
+ maxHeapSize: 2048 * 1024 * 1024, // 2GB
+ heapUsageThreshold: 0.85 // 85%
+};
+
+const SCREENCAST_CONFIG: {
+ format: "jpeg" | "png";
+ maxWidth: number;
+ maxHeight: number;
+ targetFPS: number;
+ compressionQuality: number;
+ maxQueueSize: number;
+} = {
+ format: 'jpeg',
+ maxWidth: 900,
+ maxHeight: 400,
+ targetFPS: 30,
+ compressionQuality: 0.8,
+ maxQueueSize: 2
+};
/**
* This class represents a remote browser instance.
@@ -78,6 +102,11 @@ export class RemoteBrowser {
*/
public interpreter: WorkflowInterpreter;
+
+ private screenshotQueue: Buffer[] = [];
+ private isProcessingScreenshot = false;
+ private screencastInterval: NodeJS.Timeout | null = null
+
/**
* Initializes a new instances of the {@link Generator} and {@link WorkflowInterpreter} classes and
* assigns the socket instance everywhere.
@@ -90,6 +119,46 @@ export class RemoteBrowser {
this.generator = new WorkflowGenerator(socket);
}
+ private initializeMemoryManagement(): void {
+ setInterval(() => {
+ const memoryUsage = process.memoryUsage();
+ const heapUsageRatio = memoryUsage.heapUsed / MEMORY_CONFIG.maxHeapSize;
+
+ if (heapUsageRatio > MEMORY_CONFIG.heapUsageThreshold) {
+ logger.warn('High memory usage detected, triggering cleanup');
+ this.performMemoryCleanup();
+ }
+
+ // Clear screenshot queue if it's too large
+ if (this.screenshotQueue.length > SCREENCAST_CONFIG.maxQueueSize) {
+ this.screenshotQueue = this.screenshotQueue.slice(-SCREENCAST_CONFIG.maxQueueSize);
+ }
+ }, MEMORY_CONFIG.gcInterval);
+ }
+
+ private async performMemoryCleanup(): Promise {
+ this.screenshotQueue = [];
+ this.isProcessingScreenshot = false;
+
+ if (global.gc) {
+ global.gc();
+ }
+
+ // Reset CDP session if needed
+ if (this.client) {
+ try {
+ await this.stopScreencast();
+ this.client = null;
+ if (this.currentPage) {
+ this.client = await this.currentPage.context().newCDPSession(this.currentPage);
+ await this.startScreencast();
+ }
+ } catch (error) {
+ logger.error('Error resetting CDP session:', error);
+ }
+ }
+ }
+
/**
* Normalizes URLs to prevent navigation loops while maintaining consistent format
*/
@@ -157,7 +226,7 @@ export class RemoteBrowser {
'Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.5938.62 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:118.0) Gecko/20100101 Firefox/118.0',
];
-
+
return userAgents[Math.floor(Math.random() * userAgents.length)];
}
@@ -178,7 +247,7 @@ export class RemoteBrowser {
"--disable-extensions",
"--no-sandbox",
"--disable-dev-shm-usage",
- ],
+ ],
}));
const proxyConfig = await getDecryptedProxyConfig(userId);
let proxyOptions: { server: string, username?: string, password?: string } = { server: '' };
@@ -251,11 +320,11 @@ export class RemoteBrowser {
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
await blocker.disableBlockingInPage(this.currentPage);
console.log('Adblocker initialized');
- } catch (error: any) {
+ } catch (error: any) {
console.warn('Failed to initialize adblocker, continuing without it:', error.message);
// Still need to set up the CDP session even if blocker fails
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
- }
+ }
};
/**
@@ -319,7 +388,7 @@ export class RemoteBrowser {
return;
}
this.client.on('Page.screencastFrame', ({ data: base64, sessionId }) => {
- this.emitScreenshot(base64)
+ this.emitScreenshot(Buffer.from(base64, 'base64'))
setTimeout(async () => {
try {
if (!this.client) {
@@ -339,16 +408,49 @@ export class RemoteBrowser {
* If an interpretation was running it will be stopped.
* @returns {Promise}
*/
- public switchOff = async (): Promise => {
- await this.interpreter.stopInterpretation();
- if (this.browser) {
- await this.stopScreencast();
- await this.browser.close();
- } else {
- logger.log('error', 'Browser wasn\'t initialized');
- logger.log('error', 'Switching off the browser failed');
+ public async switchOff(): Promise {
+ try {
+ await this.interpreter.stopInterpretation();
+
+ if (this.screencastInterval) {
+ clearInterval(this.screencastInterval);
+ }
+
+ if (this.client) {
+ await this.stopScreencast();
+ }
+
+ if (this.browser) {
+ await this.browser.close();
+ }
+
+ this.screenshotQueue = [];
+ //this.performanceMonitor.reset();
+
+ } catch (error) {
+ logger.error('Error during browser shutdown:', error);
}
- };
+ }
+
+ private async optimizeScreenshot(screenshot: Buffer): Promise {
+ try {
+ return await sharp(screenshot)
+ .jpeg({
+ quality: Math.round(SCREENCAST_CONFIG.compressionQuality * 100),
+ progressive: true
+ })
+ .resize({
+ width: SCREENCAST_CONFIG.maxWidth,
+ height: SCREENCAST_CONFIG.maxHeight,
+ fit: 'inside',
+ withoutEnlargement: true
+ })
+ .toBuffer();
+ } catch (error) {
+ logger.error('Screenshot optimization failed:', error);
+ return screenshot;
+ }
+ }
/**
* Makes and emits a single screenshot to the client side.
@@ -358,7 +460,7 @@ export class RemoteBrowser {
try {
const screenshot = await this.currentPage?.screenshot();
if (screenshot) {
- this.emitScreenshot(screenshot.toString('base64'));
+ this.emitScreenshot(screenshot);
}
} catch (e) {
const { message } = e as Error;
@@ -490,37 +592,85 @@ export class RemoteBrowser {
* Should be called only once after the browser is fully initialized.
* @returns {Promise}
*/
- private startScreencast = async (): Promise => {
+ private async startScreencast(): Promise {
if (!this.client) {
- logger.log('warn', 'client is not initialized');
+ logger.warn('Client is not initialized');
return;
}
- await this.client.send('Page.startScreencast', { format: 'jpeg', quality: 75 });
- logger.log('info', `Browser started with screencasting a page.`);
- };
- /**
- * Unsubscribes the current page from the screencast session.
- * @returns {Promise}
- */
- private stopScreencast = async (): Promise => {
- if (!this.client) {
- logger.log('error', 'client is not initialized');
- logger.log('error', 'Screencast stop failed');
- } else {
- await this.client.send('Page.stopScreencast');
- logger.log('info', `Browser stopped with screencasting.`);
+ try {
+ await this.client.send('Page.startScreencast', {
+ format: SCREENCAST_CONFIG.format,
+ });
+
+ // Set up screencast frame handler
+ this.client.on('Page.screencastFrame', async ({ data, sessionId }) => {
+ try {
+ const buffer = Buffer.from(data, 'base64');
+ await this.emitScreenshot(buffer);
+ await this.client?.send('Page.screencastFrameAck', { sessionId });
+ } catch (error) {
+ logger.error('Screencast frame processing failed:', error);
+ }
+ });
+
+ logger.info('Screencast started successfully');
+ } catch (error) {
+ logger.error('Failed to start screencast:', error);
}
- };
+ }
+
+ private async stopScreencast(): Promise {
+ if (!this.client) {
+ logger.error('Client is not initialized');
+ return;
+ }
+
+ try {
+ await this.client.send('Page.stopScreencast');
+ this.screenshotQueue = [];
+ this.isProcessingScreenshot = false;
+ logger.info('Screencast stopped successfully');
+ } catch (error) {
+ logger.error('Failed to stop screencast:', error);
+ }
+ }
+
/**
* Helper for emitting the screenshot of browser's active page through websocket.
* @param payload the screenshot binary data
* @returns void
*/
- private emitScreenshot = (payload: any): void => {
- const dataWithMimeType = ('data:image/jpeg;base64,').concat(payload);
- this.socket.emit('screencast', dataWithMimeType);
- logger.log('debug', `Screenshot emitted`);
+ private emitScreenshot = async (payload: Buffer): Promise => {
+ if (this.isProcessingScreenshot) {
+ if (this.screenshotQueue.length < SCREENCAST_CONFIG.maxQueueSize) {
+ this.screenshotQueue.push(payload);
+ }
+ return;
+ }
+
+ this.isProcessingScreenshot = true;
+
+ try {
+ const optimizedScreenshot = await this.optimizeScreenshot(payload);
+ const base64Data = optimizedScreenshot.toString('base64');
+ const dataWithMimeType = `data:image/jpeg;base64,${base64Data}`;
+
+ this.socket.emit('screencast', dataWithMimeType);
+ logger.debug('Screenshot emitted');
+ } catch (error) {
+ logger.error('Screenshot emission failed:', error);
+ } finally {
+ this.isProcessingScreenshot = false;
+
+ if (this.screenshotQueue.length > 0) {
+ const nextScreenshot = this.screenshotQueue.shift();
+ if (nextScreenshot) {
+ setTimeout(() => this.emitScreenshot(nextScreenshot), 1000 / SCREENCAST_CONFIG.targetFPS);
+ }
+ }
+ }
};
+
}
diff --git a/server/src/types/index.ts b/server/src/types/index.ts
index f2e327ef..75aac802 100644
--- a/server/src/types/index.ts
+++ b/server/src/types/index.ts
@@ -129,6 +129,17 @@ export interface BaseActionInfo {
hasOnlyText: boolean;
}
+
+interface IframeSelector {
+ full: string;
+ isIframe: boolean;
+}
+
+interface ShadowSelector {
+ full: string;
+ mode: string;
+}
+
/**
* Holds all the possible css selectors that has been found for an element.
* @category Types
@@ -143,6 +154,8 @@ export interface Selectors {
hrefSelector: string|null;
accessibilitySelector: string|null;
formSelector: string|null;
+ iframeSelector: IframeSelector|null;
+ shadowSelector: ShadowSelector|null;
}
/**
@@ -156,7 +169,7 @@ export interface BaseAction extends BaseActionInfo{
associatedActions: ActionType[];
inputType: string | undefined;
value: string | undefined;
- selectors: { [key: string]: string | null };
+ selectors: Selectors;
timestamp: number;
isPassword: boolean;
/**
diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts
index 609541de..d1bccbe4 100644
--- a/server/src/workflow-management/classes/Generator.ts
+++ b/server/src/workflow-management/classes/Generator.ts
@@ -730,15 +730,26 @@ export class WorkflowGenerator {
const displaySelector = await this.generateSelector(page, coordinates, ActionType.Click);
const elementInfo = await getElementInformation(page, coordinates, this.listSelector, this.getList);
if (rect) {
+ const highlighterData = {
+ rect,
+ selector: displaySelector,
+ elementInfo,
+ // Include shadow DOM specific information
+ shadowInfo: elementInfo?.isShadowRoot ? {
+ mode: elementInfo.shadowRootMode,
+ content: elementInfo.shadowRootContent
+ } : null
+ };
+
if (this.getList === true) {
if (this.listSelector !== '') {
const childSelectors = await getChildSelectors(page, this.listSelector || '');
- this.socket.emit('highlighter', { rect, selector: displaySelector, elementInfo, childSelectors })
+ this.socket.emit('highlighter', { ...highlighterData, childSelectors })
} else {
- this.socket.emit('highlighter', { rect, selector: displaySelector, elementInfo });
+ this.socket.emit('highlighter', { ...highlighterData });
}
} else {
- this.socket.emit('highlighter', { rect, selector: displaySelector, elementInfo });
+ this.socket.emit('highlighter', { ...highlighterData });
}
}
}
diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts
index c9390f77..8a9096ec 100644
--- a/server/src/workflow-management/selector.ts
+++ b/server/src/workflow-management/selector.ts
@@ -23,10 +23,88 @@ export const getElementInformation = async (
if (!getList || listSelector !== '') {
const elementInfo = await page.evaluate(
async ({ x, y }) => {
- const el = document.elementFromPoint(x, y) as HTMLElement;
+ const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => {
+ // First, get the element at the clicked coordinates in the main document
+ let element = document.elementFromPoint(x, y) as HTMLElement;
+ if (!element) return null;
+
+ // Track the deepest element found
+ let deepestElement = element;
+
+ // Function to traverse shadow DOM
+ const traverseShadowDOM = (element: HTMLElement): HTMLElement => {
+ let current = element;
+ let shadowRoot = current.shadowRoot;
+ let deepest = current;
+
+ while (shadowRoot) {
+ const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement;
+ if (!shadowElement || shadowElement === current) break;
+
+ deepest = shadowElement;
+ current = shadowElement;
+ shadowRoot = current.shadowRoot;
+ }
+
+ return deepest;
+ };
+
+ // Handle iframe traversal
+ if (element.tagName === 'IFRAME') {
+ let currentIframe = element as HTMLIFrameElement;
+
+ while (currentIframe) {
+ try {
+ // Convert coordinates to iframe's local space
+ const iframeRect = currentIframe.getBoundingClientRect();
+ const iframeX = x - iframeRect.left;
+ const iframeY = y - iframeRect.top;
+
+ const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document;
+ if (!iframeDocument) break;
+
+ const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement;
+ if (!iframeElement) break;
+
+ // Update deepest element and check for shadow DOM
+ deepestElement = traverseShadowDOM(iframeElement);
+
+ // Continue traversing if we found another iframe
+ if (iframeElement.tagName === 'IFRAME') {
+ currentIframe = iframeElement as HTMLIFrameElement;
+ } else {
+ break;
+ }
+ } catch (error) {
+ console.warn('Cannot access iframe content:', error);
+ break;
+ }
+ }
+ } else {
+ // If not an iframe, check for shadow DOM
+ deepestElement = traverseShadowDOM(element);
+ }
+
+ return deepestElement;
+ };
+
+ // Get the element and its iframe path
+ const el = getDeepestElementFromPoint(x, y);
+
if (el) {
+ // Handle potential anchor parent
const { parentElement } = el;
- const element = parentElement?.tagName === 'A' ? parentElement : el;
+ const targetElement = parentElement?.tagName === 'A' ? parentElement : el;
+
+ // Get containing context information
+ const ownerDocument = targetElement.ownerDocument;
+ const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement;
+ const isIframeContent = Boolean(frameElement);
+
+ // Get the containing shadow root if any
+ const containingShadowRoot = targetElement.getRootNode() as ShadowRoot;
+ const isShadowRoot = containingShadowRoot instanceof ShadowRoot;
+
let info: {
tagName: string;
hasOnlyText?: boolean;
@@ -36,51 +114,165 @@ export const getElementInformation = async (
attributes?: Record;
innerHTML?: string;
outerHTML?: string;
+ isIframeContent?: boolean;
+ iframeURL?: string;
+ iframeIndex?: number;
+ frameHierarchy?: string[];
+ isShadowRoot?: boolean;
+ shadowRootMode?: string;
+ shadowRootContent?: string;
} = {
- tagName: element?.tagName ?? '',
+ tagName: targetElement?.tagName ?? '',
+ isIframeContent,
+ isShadowRoot
};
- if (element) {
- info.attributes = Array.from(element.attributes).reduce(
+
+ if (isIframeContent) {
+ // Include iframe specific information
+ info.iframeURL = frameElement.src;
+
+ // Calculate the frame's position in the hierarchy
+ let currentFrame = frameElement;
+ const frameHierarchy: string[] = [];
+ let frameIndex = 0;
+
+ while (currentFrame) {
+ // Store the frame's identifier (src, id, or index)
+ frameHierarchy.unshift(
+ currentFrame.id ||
+ currentFrame.src ||
+ `iframe[${frameIndex}]`
+ );
+
+ // Move up to parent frame if it exists
+ const parentDoc = currentFrame.ownerDocument;
+ currentFrame = parentDoc?.defaultView?.frameElement as HTMLIFrameElement;
+ frameIndex++;
+ }
+
+ info.frameHierarchy = frameHierarchy;
+ info.iframeIndex = frameIndex - 1; // Adjust for 0-based index
+ }
+
+ if (isShadowRoot) {
+ // Include shadow root specific information
+ info.shadowRootMode = containingShadowRoot.mode;
+ info.shadowRootContent = containingShadowRoot.innerHTML;
+ }
+
+ // Collect element attributes and properties
+ if (targetElement) {
+ info.attributes = Array.from(targetElement.attributes).reduce(
(acc, attr) => {
acc[attr.name] = attr.value;
return acc;
},
{} as Record
);
+
+ if (targetElement.tagName === 'A') {
+ info.url = (targetElement as HTMLAnchorElement).href;
+ info.innerText = targetElement.textContent ?? '';
+ } else if (targetElement.tagName === 'IMG') {
+ info.imageUrl = (targetElement as HTMLImageElement).src;
+ } else if (targetElement?.tagName === 'SELECT') {
+ const selectElement = targetElement as HTMLSelectElement;
+ info.innerText = selectElement.options[selectElement.selectedIndex]?.text ?? '';
+ info.attributes = {
+ ...info.attributes,
+ selectedValue: selectElement.value,
+ };
+ } else if (targetElement?.tagName === 'INPUT' && (targetElement as HTMLInputElement).type === 'time' || (targetElement as HTMLInputElement).type === 'date') {
+ info.innerText = (targetElement as HTMLInputElement).value;
+ }
+ else {
+ info.hasOnlyText = targetElement.children.length === 0 &&
+ (targetElement.textContent !== null &&
+ targetElement.textContent.trim().length > 0);
+ info.innerText = targetElement.textContent ?? '';
+ }
+
+ info.innerHTML = targetElement.innerHTML;
+ info.outerHTML = targetElement.outerHTML;
}
- // Gather specific information based on the tag
- if (element?.tagName === 'A') {
- info.url = (element as HTMLAnchorElement).href;
- info.innerText = element.innerText ?? '';
- } else if (element?.tagName === 'IMG') {
- info.imageUrl = (element as HTMLImageElement).src;
- } else if (element?.tagName === 'SELECT') {
- const selectElement = element as HTMLSelectElement;
- info.innerText = selectElement.options[selectElement.selectedIndex]?.text ?? '';
- info.attributes = {
- ...info.attributes,
- selectedValue: selectElement.value,
- };
- } else if (element?.tagName === 'INPUT' && (element as HTMLInputElement).type === 'time' || (element as HTMLInputElement).type === 'date') {
- info.innerText = (element as HTMLInputElement).value;
- } else {
- info.hasOnlyText = element?.children?.length === 0 &&
- element?.innerText?.length > 0;
- info.innerText = element?.innerText ?? '';
- }
- info.innerHTML = element.innerHTML;
- info.outerHTML = element.outerHTML;
+
return info;
}
return null;
},
- { x: coordinates.x, y: coordinates.y },
+ { x: coordinates.x, y: coordinates.y }
);
return elementInfo;
} else {
const elementInfo = await page.evaluate(
async ({ x, y }) => {
- const originalEl = document.elementFromPoint(x, y) as HTMLElement;
+ // Enhanced helper function to get element from point including shadow DOM
+ const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => {
+ // First, get the element at the clicked coordinates in the main document
+ let element = document.elementFromPoint(x, y) as HTMLElement;
+ if (!element) return null;
+
+ // Track the deepest element found
+ let deepestElement = element;
+
+ // Function to traverse shadow DOM
+ const traverseShadowDOM = (element: HTMLElement): HTMLElement => {
+ let current = element;
+ let shadowRoot = current.shadowRoot;
+ let deepest = current;
+
+ while (shadowRoot) {
+ const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement;
+ if (!shadowElement || shadowElement === current) break;
+
+ deepest = shadowElement;
+ current = shadowElement;
+ shadowRoot = current.shadowRoot;
+ }
+
+ return deepest;
+ };
+
+ // Handle iframe traversal
+ if (element.tagName === 'IFRAME') {
+ let currentIframe = element as HTMLIFrameElement;
+
+ while (currentIframe) {
+ try {
+ // Convert coordinates to iframe's local space
+ const iframeRect = currentIframe.getBoundingClientRect();
+ const iframeX = x - iframeRect.left;
+ const iframeY = y - iframeRect.top;
+
+ const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document;
+ if (!iframeDocument) break;
+
+ const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement;
+ if (!iframeElement) break;
+
+ // Update deepest element and check for shadow DOM
+ deepestElement = traverseShadowDOM(iframeElement);
+
+ // Continue traversing if we found another iframe
+ if (iframeElement.tagName === 'IFRAME') {
+ currentIframe = iframeElement as HTMLIFrameElement;
+ } else {
+ break;
+ }
+ } catch (error) {
+ console.warn('Cannot access iframe content:', error);
+ break;
+ }
+ }
+ } else {
+ // If not an iframe, check for shadow DOM
+ deepestElement = traverseShadowDOM(element);
+ }
+
+ return deepestElement;
+ };
+
+ const originalEl = getDeepestElementFromPoint(x, y);
if (originalEl) {
let element = originalEl;
@@ -124,6 +316,13 @@ export const getElementInformation = async (
}
}
}
+
+ const ownerDocument = element.ownerDocument;
+ const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement;
+ const isIframeContent = Boolean(frameElement);
+
+ const containingShadowRoot = element.getRootNode() as ShadowRoot;
+ const isShadowRoot = containingShadowRoot instanceof ShadowRoot;
let info: {
tagName: string;
@@ -134,11 +333,54 @@ export const getElementInformation = async (
attributes?: Record;
innerHTML?: string;
outerHTML?: string;
+ isIframeContent?: boolean;
+ iframeURL?: string;
+ iframeIndex?: number;
+ frameHierarchy?: string[];
+ isShadowRoot?: boolean;
+ shadowRootMode?: string;
+ shadowRootContent?: string;
} = {
tagName: element?.tagName ?? '',
+ isIframeContent,
+ isShadowRoot
};
+ if (isIframeContent) {
+ // Include iframe specific information
+ info.iframeURL = frameElement.src;
+
+ // Calculate the frame's position in the hierarchy
+ let currentFrame = frameElement;
+ const frameHierarchy: string[] = [];
+ let frameIndex = 0;
+
+ while (currentFrame) {
+ // Store the frame's identifier (src, id, or index)
+ frameHierarchy.unshift(
+ currentFrame.id ||
+ currentFrame.src ||
+ `iframe[${frameIndex}]`
+ );
+
+ // Move up to parent frame if it exists
+ const parentDoc = currentFrame.ownerDocument;
+ currentFrame = parentDoc?.defaultView?.frameElement as HTMLIFrameElement;
+ frameIndex++;
+ }
+
+ info.frameHierarchy = frameHierarchy;
+ info.iframeIndex = frameIndex - 1; // Adjust for 0-based index
+ };
+
+ if (isShadowRoot) {
+ // Include shadow root specific information
+ info.shadowRootMode = containingShadowRoot.mode;
+ info.shadowRootContent = containingShadowRoot.innerHTML;
+ }
+
if (element) {
+ // Get attributes including those from shadow DOM context
info.attributes = Array.from(element.attributes).reduce(
(acc, attr) => {
acc[attr.name] = attr.value;
@@ -146,21 +388,25 @@ export const getElementInformation = async (
},
{} as Record
);
+
+ // Handle specific element types
+ if (element.tagName === 'A') {
+ info.url = (element as HTMLAnchorElement).href;
+ info.innerText = element.textContent ?? '';
+ } else if (element.tagName === 'IMG') {
+ info.imageUrl = (element as HTMLImageElement).src;
+ } else {
+ // Handle text content with proper null checking
+ info.hasOnlyText = element.children.length === 0 &&
+ (element.textContent !== null &&
+ element.textContent.trim().length > 0);
+ info.innerText = element.textContent ?? '';
+ }
+
+ info.innerHTML = element.innerHTML;
+ info.outerHTML = element.outerHTML;
}
-
- if (element?.tagName === 'A') {
- info.url = (element as HTMLAnchorElement).href;
- info.innerText = element.innerText ?? '';
- } else if (element?.tagName === 'IMG') {
- info.imageUrl = (element as HTMLImageElement).src;
- } else {
- info.hasOnlyText = element?.children?.length === 0 &&
- element?.innerText?.length > 0;
- info.innerText = element?.innerText ?? '';
- }
-
- info.innerHTML = element.innerHTML;
- info.outerHTML = element.outerHTML;
+
return info;
}
return null;
@@ -176,47 +422,206 @@ export const getElementInformation = async (
}
};
-/**
- * Returns a {@link Rectangle} object representing
- * the coordinates, width, height and corner points of the element.
- * If an element is not found, returns null.
- * @param page The page instance.
- * @param coordinates Coordinates of an element.
- * @category WorkflowManagement-Selectors
- * @returns {Promise}
- */
export const getRect = async (page: Page, coordinates: Coordinates, listSelector: string, getList: boolean) => {
try {
if (!getList || listSelector !== '') {
const rect = await page.evaluate(
async ({ x, y }) => {
- const el = document.elementFromPoint(x, y) as HTMLElement;
+ // Enhanced helper function to get element from point including iframes
+ const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => {
+ // First, get the element at the clicked coordinates in the main document
+ let element = document.elementFromPoint(x, y) as HTMLElement;
+ if (!element) return null;
+
+ // Track the deepest element found
+ let deepestElement = element;
+
+ // Function to traverse shadow DOM
+ const traverseShadowDOM = (element: HTMLElement): HTMLElement => {
+ let current = element;
+ let shadowRoot = current.shadowRoot;
+ let deepest = current;
+
+ while (shadowRoot) {
+ const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement;
+ if (!shadowElement || shadowElement === current) break;
+
+ deepest = shadowElement;
+ current = shadowElement;
+ shadowRoot = current.shadowRoot;
+ }
+
+ return deepest;
+ };
+
+ // Handle iframe traversal
+ if (element.tagName === 'IFRAME') {
+ let currentIframe = element as HTMLIFrameElement;
+
+ while (currentIframe) {
+ try {
+ // Convert coordinates to iframe's local space
+ const iframeRect = currentIframe.getBoundingClientRect();
+ const iframeX = x - iframeRect.left;
+ const iframeY = y - iframeRect.top;
+
+ const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document;
+ if (!iframeDocument) break;
+
+ const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement;
+ if (!iframeElement) break;
+
+ // Update deepest element and check for shadow DOM
+ deepestElement = traverseShadowDOM(iframeElement);
+
+ // Continue traversing if we found another iframe
+ if (iframeElement.tagName === 'IFRAME') {
+ currentIframe = iframeElement as HTMLIFrameElement;
+ } else {
+ break;
+ }
+ } catch (error) {
+ console.warn('Cannot access iframe content:', error);
+ break;
+ }
+ }
+ } else {
+ // If not an iframe, check for shadow DOM
+ deepestElement = traverseShadowDOM(element);
+ }
+
+ return deepestElement;
+ };
+
+ const el = getDeepestElementFromPoint(x, y);
if (el) {
const { parentElement } = el;
- // Match the logic in recorder.ts for link clicks
const element = parentElement?.tagName === 'A' ? parentElement : el;
const rectangle = element?.getBoundingClientRect();
if (rectangle) {
- return {
- x: rectangle.x,
- y: rectangle.y,
- width: rectangle.width,
- height: rectangle.height,
- top: rectangle.top,
- right: rectangle.right,
- bottom: rectangle.bottom,
- left: rectangle.left,
- };
+ const createRectObject = (rect: DOMRect) => ({
+ x: rect.x,
+ y: rect.y,
+ width: rect.width,
+ height: rect.height,
+ top: rect.top,
+ right: rect.right,
+ bottom: rect.bottom,
+ left: rect.left,
+ toJSON() {
+ return {
+ x: this.x,
+ y: this.y,
+ width: this.width,
+ height: this.height,
+ top: this.top,
+ right: this.right,
+ bottom: this.bottom,
+ left: this.left
+ };
+ }
+ });
+
+ // For elements inside iframes, adjust coordinates relative to the top window
+ let adjustedRect = createRectObject(rectangle);
+ let currentWindow = element.ownerDocument.defaultView;
+
+ while (currentWindow !== window.top) {
+ const frameElement = currentWindow?.frameElement as HTMLIFrameElement;
+ if (!frameElement) break;
+
+ const frameRect = frameElement.getBoundingClientRect();
+ adjustedRect = createRectObject({
+ x: adjustedRect.x + frameRect.x,
+ y: adjustedRect.y + frameRect.y,
+ width: adjustedRect.width,
+ height: adjustedRect.height,
+ top: adjustedRect.top + frameRect.top,
+ right: adjustedRect.right + frameRect.left,
+ bottom: adjustedRect.bottom + frameRect.top,
+ left: adjustedRect.left + frameRect.left,
+ } as DOMRect);
+
+ currentWindow = frameElement.ownerDocument.defaultView;
+ }
+
+ return adjustedRect;
}
}
+ return null;
},
- { x: coordinates.x, y: coordinates.y },
+ { x: coordinates.x, y: coordinates.y }
);
return rect;
} else {
const rect = await page.evaluate(
async ({ x, y }) => {
- const originalEl = document.elementFromPoint(x, y) as HTMLElement;
+ const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => {
+ // First, get the element at the clicked coordinates in the main document
+ let element = document.elementFromPoint(x, y) as HTMLElement;
+ if (!element) return null;
+
+ // Track the deepest element found
+ let deepestElement = element;
+
+ // Function to traverse shadow DOM
+ const traverseShadowDOM = (element: HTMLElement): HTMLElement => {
+ let current = element;
+ let shadowRoot = current.shadowRoot;
+ let deepest = current;
+
+ while (shadowRoot) {
+ const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement;
+ if (!shadowElement || shadowElement === current) break;
+
+ deepest = shadowElement;
+ current = shadowElement;
+ shadowRoot = current.shadowRoot;
+ }
+
+ return deepest;
+ };
+
+ // Handle iframe traversal
+ if (element.tagName === 'IFRAME') {
+ let currentIframe = element as HTMLIFrameElement;
+
+ while (currentIframe) {
+ try {
+ // Convert coordinates to iframe's local space
+ const iframeRect = currentIframe.getBoundingClientRect();
+ const iframeX = x - iframeRect.left;
+ const iframeY = y - iframeRect.top;
+
+ const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document;
+ if (!iframeDocument) break;
+
+ const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement;
+ if (!iframeElement) break;
+
+ // Update deepest element and check for shadow DOM
+ deepestElement = traverseShadowDOM(iframeElement);
+
+ // Continue traversing if we found another iframe
+ if (iframeElement.tagName === 'IFRAME') {
+ currentIframe = iframeElement as HTMLIFrameElement;
+ } else {
+ break;
+ }
+ } catch (error) {
+ console.warn('Cannot access iframe content:', error);
+ break;
+ }
+ }
+ } else {
+ // If not an iframe, check for shadow DOM
+ deepestElement = traverseShadowDOM(element);
+ }
+
+ return deepestElement;
+ };
+
+ const originalEl = getDeepestElementFromPoint(x, y);
if (originalEl) {
let element = originalEl;
@@ -262,34 +667,69 @@ export const getRect = async (page: Page, coordinates: Coordinates, listSelector
}
const rectangle = element?.getBoundingClientRect();
-
if (rectangle) {
- return {
- x: rectangle.x,
- y: rectangle.y,
- width: rectangle.width,
- height: rectangle.height,
- top: rectangle.top,
- right: rectangle.right,
- bottom: rectangle.bottom,
- left: rectangle.left,
- };
+ const createRectObject = (rect: DOMRect) => ({
+ x: rect.x,
+ y: rect.y,
+ width: rect.width,
+ height: rect.height,
+ top: rect.top,
+ right: rect.right,
+ bottom: rect.bottom,
+ left: rect.left,
+ toJSON() {
+ return {
+ x: this.x,
+ y: this.y,
+ width: this.width,
+ height: this.height,
+ top: this.top,
+ right: this.right,
+ bottom: this.bottom,
+ left: this.left
+ };
+ }
+ });
+
+ // Same coordinate adjustment for iframe elements as above
+ let adjustedRect = createRectObject(rectangle);
+ let currentWindow = element.ownerDocument.defaultView;
+
+ while (currentWindow !== window.top) {
+ const frameElement = currentWindow?.frameElement as HTMLIFrameElement;
+ if (!frameElement) break;
+
+ const frameRect = frameElement.getBoundingClientRect();
+ adjustedRect = createRectObject({
+ x: adjustedRect.x + frameRect.x,
+ y: adjustedRect.y + frameRect.y,
+ width: adjustedRect.width,
+ height: adjustedRect.height,
+ top: adjustedRect.top + frameRect.top,
+ right: adjustedRect.right + frameRect.left,
+ bottom: adjustedRect.bottom + frameRect.top,
+ left: adjustedRect.left + frameRect.left,
+ } as DOMRect);
+
+ currentWindow = frameElement.ownerDocument.defaultView;
+ }
+
+ return adjustedRect;
}
}
return null;
},
- { x: coordinates.x, y: coordinates.y },
+ { x: coordinates.x, y: coordinates.y }
);
return rect;
}
} catch (error) {
const { message, stack } = error as Error;
- logger.log('error', `Error while retrieving selector: ${message}`);
- logger.log('error', `Stack: ${stack}`);
+ console.error('Error while retrieving selector:', message);
+ console.error('Stack:', stack);
}
};
-
/**
* Returns the best and unique css {@link Selectors} for the element on the page.
* Internally uses a finder function from https://github.com/antonmedv/finder/blob/master/finder.ts
@@ -759,6 +1199,206 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => {
}
return output;
}
+
+ const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => {
+ // Helper function to traverse shadow DOM
+ const traverseShadowDOM = (element: HTMLElement, depth: number = 0): HTMLElement => {
+ const MAX_SHADOW_DEPTH = 4;
+ let current = element;
+ let deepest = current;
+
+ while (current && depth < MAX_SHADOW_DEPTH) {
+ const shadowRoot = current.shadowRoot;
+ if (!shadowRoot) break;
+
+ const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement;
+ if (!shadowElement || shadowElement === current) break;
+
+ deepest = shadowElement;
+ current = shadowElement;
+ depth++;
+ }
+
+ return deepest;
+ };
+
+ // Start with the element at the specified coordinates
+ let element = document.elementFromPoint(x, y) as HTMLElement;
+ if (!element) return null;
+
+ // Initialize tracking variables
+ let deepestElement = element;
+ let depth = 0;
+ const MAX_IFRAME_DEPTH = 4;
+
+ // First check if the initial element has a shadow root
+ deepestElement = traverseShadowDOM(element);
+
+ // If it's an iframe, traverse through iframe hierarchy
+ if (deepestElement.tagName === 'IFRAME') {
+ let currentIframe = deepestElement as HTMLIFrameElement;
+
+ while (currentIframe && depth < MAX_IFRAME_DEPTH) {
+ try {
+ // Convert coordinates to iframe's local space
+ const iframeRect = currentIframe.getBoundingClientRect();
+ const iframeX = x - iframeRect.left;
+ const iframeY = y - iframeRect.top;
+
+ // Access iframe's document
+ const iframeDoc = currentIframe.contentDocument || currentIframe.contentWindow?.document;
+ if (!iframeDoc) break;
+
+ // Get element at transformed coordinates in iframe
+ const iframeElement = iframeDoc.elementFromPoint(iframeX, iframeY) as HTMLElement;
+ if (!iframeElement) break;
+
+ // Check for shadow DOM within iframe
+ const shadowResult = traverseShadowDOM(iframeElement);
+ deepestElement = shadowResult;
+
+ // If we found another iframe, continue traversing
+ if (shadowResult.tagName === 'IFRAME') {
+ currentIframe = shadowResult as HTMLIFrameElement;
+ depth++;
+ } else {
+ break;
+ }
+ } catch (error) {
+ console.warn('Cannot access iframe content:', error);
+ break;
+ }
+ }
+ }
+
+ return deepestElement;
+ };
+
+ const genSelectorForIframe = (element: HTMLElement) => {
+ // Helper function to get the complete iframe path up to document root
+ const getIframePath = (el: HTMLElement) => {
+ const path = [];
+ let current = el;
+ let depth = 0;
+ const MAX_DEPTH = 4;
+
+ while (current && depth < MAX_DEPTH) {
+ // Get the owner document of the current element
+ const ownerDocument = current.ownerDocument;
+
+ // Check if this document belongs to an iframe
+ const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement;
+
+ if (frameElement) {
+ path.unshift({
+ frame: frameElement,
+ document: ownerDocument,
+ element: current
+ });
+ // Move up to the parent document's element (the iframe)
+ current = frameElement;
+ depth++;
+ } else {
+ break;
+ }
+ }
+ return path;
+ };
+
+ const iframePath = getIframePath(element);
+ if (iframePath.length === 0) return null;
+
+ try {
+ const selectorParts: string[] = [];
+
+ // Generate selector for each iframe boundary
+ iframePath.forEach((context, index) => {
+ // Get selector for the iframe element
+ const frameSelector = finder(context.frame, {
+ root: index === 0 ? document.body :
+ (iframePath[index - 1].document.body as Element)
+ });
+
+ // For the last context, get selector for target element
+ if (index === iframePath.length - 1) {
+ const elementSelector = finder(element, {
+ root: context.document.body as Element
+ });
+ selectorParts.push(`${frameSelector} :>> ${elementSelector}`);
+ } else {
+ selectorParts.push(frameSelector);
+ }
+ });
+
+ return {
+ fullSelector: selectorParts.join(' :>> '),
+ isFrameContent: true
+ };
+ } catch (e) {
+ console.warn('Error generating iframe selector:', e);
+ return null;
+ }
+ };
+
+ // Helper function to generate selectors for shadow DOM elements
+ const genSelectorForShadowDOM = (element: HTMLElement) => {
+ // Get complete path up to document root
+ const getShadowPath = (el: HTMLElement) => {
+ const path = [];
+ let current = el;
+ let depth = 0;
+ const MAX_DEPTH = 4;
+
+ while (current && depth < MAX_DEPTH) {
+ const rootNode = current.getRootNode();
+ if (rootNode instanceof ShadowRoot) {
+ path.unshift({
+ host: rootNode.host as HTMLElement,
+ root: rootNode,
+ element: current
+ });
+ current = rootNode.host as HTMLElement;
+ depth++;
+ } else {
+ break;
+ }
+ }
+ return path;
+ };
+
+ const shadowPath = getShadowPath(element);
+ if (shadowPath.length === 0) return null;
+
+ try {
+ const selectorParts: string[] = [];
+
+ // Generate selector for each shadow DOM boundary
+ shadowPath.forEach((context, index) => {
+ // Get selector for the host element
+ const hostSelector = finder(context.host, {
+ root: index === 0 ? document.body : (shadowPath[index - 1].root as unknown as Element)
+ });
+
+ // For the last context, get selector for target element
+ if (index === shadowPath.length - 1) {
+ const elementSelector = finder(element, {
+ root: context.root as unknown as Element
+ });
+ selectorParts.push(`${hostSelector} >> ${elementSelector}`);
+ } else {
+ selectorParts.push(hostSelector);
+ }
+ });
+
+ return {
+ fullSelector: selectorParts.join(' >> '),
+ mode: shadowPath[shadowPath.length - 1].root.mode
+ };
+ } catch (e) {
+ console.warn('Error generating shadow DOM selector:', e);
+ return null;
+ }
+ };
const genSelectors = (element: HTMLElement | null) => {
if (element == null) {
@@ -779,6 +1419,10 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => {
} catch (e) {
}
+
+ const iframeSelector = genSelectorForIframe(element);
+ const shadowSelector = genSelectorForShadowDOM(element);
+
const hrefSelector = genSelectorForAttributes(element, ['href']);
const formSelector = genSelectorForAttributes(element, [
'name',
@@ -825,9 +1469,19 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => {
hrefSelector,
accessibilitySelector,
formSelector,
+ iframeSelector: iframeSelector ? {
+ full: iframeSelector.fullSelector,
+ isIframe: iframeSelector.isFrameContent,
+ } : null,
+ shadowSelector: shadowSelector ? {
+ full: shadowSelector.fullSelector,
+ mode: shadowSelector.mode
+ } : null
};
}
+
+
function genAttributeSet(element: HTMLElement, attributes: string[]) {
return new Set(
attributes.filter((attr) => {
@@ -867,7 +1521,8 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => {
return char.length === 1 && char.match(/[0-9]/);
}
- const hoveredElement = document.elementFromPoint(x, y) as HTMLElement;
+ const hoveredElement = getDeepestElementFromPoint(x, y) as HTMLElement;
+
if (
hoveredElement != null &&
!hoveredElement.closest('#overlay-controls') != null
@@ -902,9 +1557,83 @@ interface SelectorResult {
*/
export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates, listSelector: string): Promise => {
+ interface DOMContext {
+ type: 'iframe' | 'shadow';
+ element: HTMLElement;
+ container: HTMLIFrameElement | ShadowRoot;
+ host?: HTMLElement;
+ document?: Document;
+ }
+
try {
if (!listSelector) {
const selectors = await page.evaluate(({ x, y }: { x: number, y: number }) => {
+ const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => {
+ // Function to traverse shadow DOM
+ const traverseShadowDOM = (element: HTMLElement): HTMLElement => {
+ let current = element;
+ let deepest = current;
+ let shadowRoot = current.shadowRoot;
+
+ while (shadowRoot) {
+ const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement;
+ if (!shadowElement || shadowElement === current) break;
+
+ deepest = shadowElement;
+ current = shadowElement;
+ shadowRoot = current.shadowRoot;
+ }
+
+ return deepest;
+ };
+
+ // Start with the element at coordinates
+ let element = document.elementFromPoint(x, y) as HTMLElement;
+ if (!element) return null;
+
+ let deepestElement = element;
+ let depth = 0;
+ const MAX_DEPTH = 4;
+
+ // Handle iframe traversal
+ if (element.tagName === 'IFRAME') {
+ let currentIframe = element as HTMLIFrameElement;
+
+ while (currentIframe && depth < MAX_DEPTH) {
+ try {
+ const iframeRect = currentIframe.getBoundingClientRect();
+ const iframeX = x - iframeRect.left;
+ const iframeY = y - iframeRect.top;
+
+ const iframeDoc = currentIframe.contentDocument || currentIframe.contentWindow?.document;
+ if (!iframeDoc) break;
+
+ const iframeElement = iframeDoc.elementFromPoint(iframeX, iframeY) as HTMLElement;
+ if (!iframeElement) break;
+
+ // Check for shadow DOM within iframe
+ deepestElement = traverseShadowDOM(iframeElement);
+
+ if (deepestElement.tagName === 'IFRAME') {
+ currentIframe = deepestElement as HTMLIFrameElement;
+ depth++;
+ } else {
+ break;
+ }
+ } catch (error) {
+ console.warn('Cannot access iframe content:', error);
+ break;
+ }
+ }
+ } else {
+ // If not an iframe, check for shadow DOM
+ deepestElement = traverseShadowDOM(element);
+ }
+
+ return deepestElement;
+ };
+
+ // Basic selector generation
function getNonUniqueSelector(element: HTMLElement): string {
let selector = element.tagName.toLowerCase();
@@ -928,22 +1657,95 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates
return selector;
}
- function getSelectorPath(element: HTMLElement | null): string {
- const path: string[] = [];
- let depth = 0;
- const maxDepth = 2;
- while (element && element !== document.body && depth < maxDepth) {
- const selector = getNonUniqueSelector(element);
+ function getContextPath(element: HTMLElement): DOMContext[] {
+ const path: DOMContext[] = [];
+ let current = element;
+ let depth = 0;
+ const MAX_DEPTH = 4;
+
+ while (current && depth < MAX_DEPTH) {
+ // Check for shadow DOM
+ const rootNode = current.getRootNode();
+ if (rootNode instanceof ShadowRoot) {
+ path.unshift({
+ type: 'shadow',
+ element: current,
+ container: rootNode,
+ host: rootNode.host as HTMLElement
+ });
+ current = rootNode.host as HTMLElement;
+ depth++;
+ continue;
+ }
+
+ // Check for iframe
+ const ownerDocument = current.ownerDocument;
+ const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement;
+
+ if (frameElement) {
+ path.unshift({
+ type: 'iframe',
+ element: current,
+ container: frameElement,
+ document: ownerDocument
+ });
+ current = frameElement;
+ depth++;
+ continue;
+ }
+
+ break;
+ }
+
+ return path;
+ }
+
+ function getSelectorPath(element: HTMLElement | null): string {
+ if (!element) return '';
+
+ // Get the complete context path
+ const contextPath = getContextPath(element);
+ if (contextPath.length > 0) {
+ const selectorParts: string[] = [];
+
+ contextPath.forEach((context, index) => {
+ const containerSelector = getNonUniqueSelector(
+ context.type === 'shadow' ? context.host! : context.container as HTMLElement
+ );
+
+ if (index === contextPath.length - 1) {
+ const elementSelector = getNonUniqueSelector(element);
+ const delimiter = context.type === 'shadow' ? ' >> ' : ' :>> ';
+ selectorParts.push(`${containerSelector}${delimiter}${elementSelector}`);
+ } else {
+ selectorParts.push(containerSelector);
+ }
+ });
+
+ return selectorParts.join(contextPath[0].type === 'shadow' ? ' >> ' : ' :>> ');
+ }
+
+ // Regular DOM path generation
+ const path: string[] = [];
+ let currentElement = element;
+ const MAX_DEPTH = 2;
+ let depth = 0;
+
+ while (currentElement && currentElement !== document.body && depth < MAX_DEPTH) {
+ const selector = getNonUniqueSelector(currentElement);
path.unshift(selector);
- element = element.parentElement;
+
+ if (!currentElement.parentElement) break;
+ currentElement = currentElement.parentElement;
depth++;
}
return path.join(' > ');
}
- const originalEl = document.elementFromPoint(x, y) as HTMLElement;
+ // Main logic to get element and generate selector
+ const originalEl = getDeepestElementFromPoint(x, y);
if (!originalEl) return null;
let element = originalEl;
@@ -989,16 +1791,90 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates
}
}
}
- // }
const generalSelector = getSelectorPath(element);
- return {
- generalSelector,
- };
+ return { generalSelector };
}, coordinates);
+
return selectors || { generalSelector: '' };
} else {
+ // When we have a list selector, we need special handling while maintaining shadow DOM support
const selectors = await page.evaluate(({ x, y }: { x: number, y: number }) => {
+ const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => {
+ // Helper function to traverse shadow DOM
+ const traverseShadowDOM = (element: HTMLElement, depth: number = 0): HTMLElement => {
+ const MAX_SHADOW_DEPTH = 4;
+ let current = element;
+ let deepest = current;
+
+ while (current && depth < MAX_SHADOW_DEPTH) {
+ const shadowRoot = current.shadowRoot;
+ if (!shadowRoot) break;
+
+ const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement;
+ if (!shadowElement || shadowElement === current) break;
+
+ deepest = shadowElement;
+ current = shadowElement;
+ depth++;
+ }
+
+ return deepest;
+ };
+
+ // Start with the element at the specified coordinates
+ let element = document.elementFromPoint(x, y) as HTMLElement;
+ if (!element) return null;
+
+ // Initialize tracking variables
+ let deepestElement = element;
+ let depth = 0;
+ const MAX_IFRAME_DEPTH = 4;
+
+ // First check if the initial element has a shadow root
+ deepestElement = traverseShadowDOM(element);
+
+ // If it's an iframe, traverse through iframe hierarchy
+ if (deepestElement.tagName === 'IFRAME') {
+ let currentIframe = deepestElement as HTMLIFrameElement;
+
+ while (currentIframe && depth < MAX_IFRAME_DEPTH) {
+ try {
+ // Convert coordinates to iframe's local space
+ const iframeRect = currentIframe.getBoundingClientRect();
+ const iframeX = x - iframeRect.left;
+ const iframeY = y - iframeRect.top;
+
+ // Access iframe's document
+ const iframeDoc = currentIframe.contentDocument || currentIframe.contentWindow?.document;
+ if (!iframeDoc) break;
+
+ // Get element at transformed coordinates in iframe
+ const iframeElement = iframeDoc.elementFromPoint(iframeX, iframeY) as HTMLElement;
+ if (!iframeElement) break;
+
+ // Check for shadow DOM within iframe
+ const shadowResult = traverseShadowDOM(iframeElement);
+ deepestElement = shadowResult;
+
+ // If we found another iframe, continue traversing
+ if (shadowResult.tagName === 'IFRAME') {
+ currentIframe = shadowResult as HTMLIFrameElement;
+ depth++;
+ } else {
+ break;
+ }
+ } catch (error) {
+ console.warn('Cannot access iframe content:', error);
+ break;
+ }
+ }
+ }
+
+ return deepestElement;
+ };
+
+ // Generate basic selector from element's tag and classes
function getNonUniqueSelector(element: HTMLElement): string {
let selector = element.tagName.toLowerCase();
@@ -1009,9 +1885,9 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates
}
if (element.className) {
- const classes = element.className.split(/\s+/).filter((cls: string) => Boolean(cls));
+ const classes = element.className.split(/\s+/).filter(Boolean);
if (classes.length > 0) {
- const validClasses = classes.filter((cls: string) => !cls.startsWith('!') && !cls.includes(':'));
+ const validClasses = classes.filter(cls => !cls.startsWith('!') && !cls.includes(':'));
if (validClasses.length > 0) {
selector += '.' + validClasses.map(cls => CSS.escape(cls)).join('.');
}
@@ -1021,34 +1897,104 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates
return selector;
}
- function getSelectorPath(element: HTMLElement | null): string {
- const path: string[] = [];
+ // Get complete context path (both iframe and shadow DOM)
+ function getContextPath(element: HTMLElement): DOMContext[] {
+ const path: DOMContext[] = [];
+ let current = element;
let depth = 0;
- const maxDepth = 2;
+ const MAX_DEPTH = 4;
+
+ while (current && depth < MAX_DEPTH) {
+ // Check for shadow DOM
+ const rootNode = current.getRootNode();
+ if (rootNode instanceof ShadowRoot) {
+ path.unshift({
+ type: 'shadow',
+ element: current,
+ container: rootNode,
+ host: rootNode.host as HTMLElement
+ });
+ current = rootNode.host as HTMLElement;
+ depth++;
+ continue;
+ }
- while (element && element !== document.body && depth < maxDepth) {
- const selector = getNonUniqueSelector(element);
+ // Check for iframe
+ const ownerDocument = current.ownerDocument;
+ const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement;
+
+ if (frameElement) {
+ path.unshift({
+ type: 'iframe',
+ element: current,
+ container: frameElement,
+ document: ownerDocument
+ });
+ current = frameElement;
+ depth++;
+ continue;
+ }
+
+ break;
+ }
+
+ return path;
+ }
+
+ function getSelectorPath(element: HTMLElement | null): string {
+ if (!element) return '';
+
+ // Get the complete context path
+ const contextPath = getContextPath(element);
+ if (contextPath.length > 0) {
+ const selectorParts: string[] = [];
+
+ contextPath.forEach((context, index) => {
+ const containerSelector = getNonUniqueSelector(
+ context.type === 'shadow' ? context.host! : context.container as HTMLElement
+ );
+
+ if (index === contextPath.length - 1) {
+ const elementSelector = getNonUniqueSelector(element);
+ const delimiter = context.type === 'shadow' ? ' >> ' : ' :>> ';
+ selectorParts.push(`${containerSelector}${delimiter}${elementSelector}`);
+ } else {
+ selectorParts.push(containerSelector);
+ }
+ });
+
+ return selectorParts.join(contextPath[0].type === 'shadow' ? ' >> ' : ' :>> ');
+ }
+
+ // Regular DOM path generation
+ const path: string[] = [];
+ let currentElement = element;
+ const MAX_DEPTH = 2;
+ let depth = 0;
+
+ while (currentElement && currentElement !== document.body && depth < MAX_DEPTH) {
+ const selector = getNonUniqueSelector(currentElement);
path.unshift(selector);
- element = element.parentElement;
+
+ if (!currentElement.parentElement) break;
+ currentElement = currentElement.parentElement;
depth++;
}
return path.join(' > ');
}
- const originalEl = document.elementFromPoint(x, y) as HTMLElement;
- if (!originalEl) return null;
+ const originalEl = getDeepestElementFromPoint(x, y);
+ if (!originalEl) return { generalSelector: '' };
let element = originalEl;
const generalSelector = getSelectorPath(element);
- return {
- generalSelector,
- };
- }, coordinates);
- return selectors || { generalSelector: '' };
- }
+ return { generalSelector };
+ }, coordinates);
+ return selectors || { generalSelector: '' };
+ }
} catch (error) {
console.error('Error in getNonUniqueSelectors:', error);
return { generalSelector: '' };
@@ -1083,42 +2029,158 @@ export const getChildSelectors = async (page: Page, parentSelector: string): Pro
}
// Function to generate selector path from an element to its parent
- function getSelectorPath(element: HTMLElement | null): string {
+ function getSelectorPath(element: HTMLElement): string {
if (!element || !element.parentElement) return '';
- const parentSelector = getNonUniqueSelector(element.parentElement);
const elementSelector = getNonUniqueSelector(element);
+
+ // Check for shadow DOM context
+ const rootNode = element.getRootNode();
+ if (rootNode instanceof ShadowRoot) {
+ const hostSelector = getNonUniqueSelector(rootNode.host as HTMLElement);
+ return `${hostSelector} >> ${elementSelector}`;
+ }
+ // Check for iframe context
+ const ownerDocument = element.ownerDocument;
+ const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement;
+ if (frameElement) {
+ const frameSelector = getNonUniqueSelector(frameElement);
+ return `${frameSelector} :>> ${elementSelector}`;
+ }
+
+ // Regular DOM context
+ const parentSelector = getNonUniqueSelector(element.parentElement);
return `${parentSelector} > ${elementSelector}`;
}
- // Function to recursively get all descendant selectors
+
+ // Function to get all children from special contexts
+ function getSpecialContextChildren(element: HTMLElement): HTMLElement[] {
+ const children: HTMLElement[] = [];
+
+ // Get shadow DOM children
+ const shadowRoot = element.shadowRoot;
+ if (shadowRoot) {
+ const shadowElements = Array.from(shadowRoot.querySelectorAll('*')) as HTMLElement[];
+ children.push(...shadowElements);
+ }
+
+ // Get iframe children
+ const iframes = Array.from(element.querySelectorAll('iframe')) as HTMLIFrameElement[];
+ for (const iframe of iframes) {
+ try {
+ const iframeDoc = iframe.contentDocument || iframe.contentWindow?.document;
+ if (iframeDoc) {
+ const iframeElements = Array.from(iframeDoc.querySelectorAll('*')) as HTMLElement[];
+ children.push(...iframeElements);
+ }
+ } catch (error) {
+ console.warn('Cannot access iframe content:', error);
+ continue;
+ }
+ }
+
+ return children;
+ }
+
+ // Function to recursively get all descendant selectors including shadow DOM and iframes
function getAllDescendantSelectors(element: HTMLElement): string[] {
let selectors: string[] = [];
+
+ // Handle regular DOM children
const children = Array.from(element.children) as HTMLElement[];
-
for (const child of children) {
const childPath = getSelectorPath(child);
if (childPath) {
- selectors.push(childPath); // Add direct child path
- selectors = selectors.concat(getAllDescendantSelectors(child)); // Recursively process descendants
+ selectors.push(childPath);
+
+ // Process regular descendants
+ selectors = selectors.concat(getAllDescendantSelectors(child));
+
+ // Process special context children (shadow DOM and iframes)
+ const specialChildren = getSpecialContextChildren(child);
+ for (const specialChild of specialChildren) {
+ const specialPath = getSelectorPath(specialChild);
+ if (specialPath) {
+ selectors.push(specialPath);
+ selectors = selectors.concat(getAllDescendantSelectors(specialChild));
+ }
+ }
+ }
+ }
+
+ // Handle direct special context children
+ const specialChildren = getSpecialContextChildren(element);
+ for (const specialChild of specialChildren) {
+ const specialPath = getSelectorPath(specialChild);
+ if (specialPath) {
+ selectors.push(specialPath);
+ selectors = selectors.concat(getAllDescendantSelectors(specialChild));
}
}
return selectors;
}
- // Find all occurrences of the parent selector in the DOM
- const parentElements = Array.from(document.querySelectorAll(parentSelector)) as HTMLElement[];
+ // Handle both shadow DOM and iframe parent selectors
+ let parentElements: HTMLElement[] = [];
+
+ // Check for special context traversal in parent selector
+ if (parentSelector.includes('>>') || parentSelector.includes(':>>')) {
+ // Split by both types of delimiters
+ const selectorParts = parentSelector.split(/(?:>>|:>>)/).map(part => part.trim());
+
+ // Start with initial elements
+ parentElements = Array.from(document.querySelectorAll(selectorParts[0])) as HTMLElement[];
+
+ // Traverse through parts
+ for (let i = 1; i < selectorParts.length; i++) {
+ const newParentElements: HTMLElement[] = [];
+
+ for (const element of parentElements) {
+ // Check for shadow DOM
+ if (element.shadowRoot) {
+ const shadowChildren = Array.from(
+ element.shadowRoot.querySelectorAll(selectorParts[i])
+ ) as HTMLElement[];
+ newParentElements.push(...shadowChildren);
+ }
+
+ // Check for iframe
+ if (element.tagName === 'IFRAME') {
+ try {
+ const iframeDoc = (element as HTMLIFrameElement).contentDocument ||
+ (element as HTMLIFrameElement).contentWindow?.document;
+ if (iframeDoc) {
+ const iframeChildren = Array.from(
+ iframeDoc.querySelectorAll(selectorParts[i])
+ ) as HTMLElement[];
+ newParentElements.push(...iframeChildren);
+ }
+ } catch (error) {
+ console.warn('Cannot access iframe content during traversal:', error);
+ continue;
+ }
+ }
+ }
+
+ parentElements = newParentElements;
+ }
+ } else {
+ // Regular DOM selector
+ parentElements = Array.from(document.querySelectorAll(parentSelector)) as HTMLElement[];
+ }
+
const allChildSelectors = new Set(); // Use a set to ensure uniqueness
// Process each parent element and its descendants
parentElements.forEach((parentElement) => {
const descendantSelectors = getAllDescendantSelectors(parentElement);
- descendantSelectors.forEach((selector) => allChildSelectors.add(selector)); // Add selectors to the set
+ descendantSelectors.forEach((selector) => allChildSelectors.add(selector));
});
- return Array.from(allChildSelectors); // Convert the set back to an array
+ return Array.from(allChildSelectors);
}, parentSelector);
return childSelectors || [];
diff --git a/server/src/workflow-management/utils.ts b/server/src/workflow-management/utils.ts
index b3dadd60..0804aa78 100644
--- a/server/src/workflow-management/utils.ts
+++ b/server/src/workflow-management/utils.ts
@@ -12,6 +12,16 @@ export const getBestSelectorForAction = (action: Action) => {
case ActionType.Hover:
case ActionType.DragAndDrop: {
const selectors = action.selectors;
+
+
+ if (selectors?.iframeSelector?.full) {
+ return selectors.iframeSelector.full;
+ }
+
+ if (selectors?.shadowSelector?.full) {
+ return selectors.shadowSelector.full;
+ }
+
// less than 25 characters, and element only has text inside
const textSelector =
selectors?.text?.length != null &&
@@ -75,6 +85,11 @@ export const getBestSelectorForAction = (action: Action) => {
case ActionType.Input:
case ActionType.Keydown: {
const selectors = action.selectors;
+
+ if (selectors?.shadowSelector?.full) {
+ return selectors.shadowSelector.full;
+ }
+
return (
selectors.testIdSelector ??
selectors?.id ??
diff --git a/src/App.tsx b/src/App.tsx
index 02dff134..cdee8d40 100644
--- a/src/App.tsx
+++ b/src/App.tsx
@@ -4,6 +4,7 @@ import { ThemeProvider, createTheme } from "@mui/material/styles";
import { GlobalInfoProvider } from "./context/globalInfo";
import { PageWrapper } from "./pages/PageWrappper";
import i18n from "./i18n";
+import ThemeModeProvider from './context/theme-provider';
const theme = createTheme({
@@ -85,15 +86,23 @@ const theme = createTheme({
function App() {
return (
-
+
+
+
+ } />
+
+
+
+
+ //
-
-
- } />
-
-
+ //
+ //
+ // } />
+ //
+ //
-
+ //
);
}
diff --git a/src/api/storage.ts b/src/api/storage.ts
index 18c793c0..201a38cc 100644
--- a/src/api/storage.ts
+++ b/src/api/storage.ts
@@ -1,7 +1,7 @@
import { default as axios } from "axios";
import { WorkflowFile } from "maxun-core";
-import { RunSettings } from "../components/molecules/RunSettings";
-import { ScheduleSettings } from "../components/molecules/ScheduleSettings";
+import { RunSettings } from "../components/run/RunSettings";
+import { ScheduleSettings } from "../components/robot/ScheduleSettings";
import { CreateRunResponse, ScheduleRunResponse } from "../pages/MainPage";
import { apiUrl } from "../apiConfig";
diff --git a/src/components/molecules/ActionDescriptionBox.tsx b/src/components/action/ActionDescriptionBox.tsx
similarity index 74%
rename from src/components/molecules/ActionDescriptionBox.tsx
rename to src/components/action/ActionDescriptionBox.tsx
index 190c5838..97c979ec 100644
--- a/src/components/molecules/ActionDescriptionBox.tsx
+++ b/src/components/action/ActionDescriptionBox.tsx
@@ -5,19 +5,24 @@ import { useActionContext } from '../../context/browserActions';
import MaxunLogo from "../../assets/maxunlogo.png";
import { useTranslation } from 'react-i18next';
-const CustomBoxContainer = styled.div`
+interface CustomBoxContainerProps {
+ isDarkMode: boolean;
+}
+
+const CustomBoxContainer = styled.div`
position: relative;
min-width: 250px;
width: auto;
min-height: 100px;
height: auto;
- // border: 2px solid #ff00c3;
border-radius: 5px;
- background-color: white;
+ background-color: ${({ isDarkMode }) => (isDarkMode ? '#313438' : 'white')};
+ color: ${({ isDarkMode }) => (isDarkMode ? 'white' : 'black')};
margin: 80px 13px 25px 13px;
+ box-shadow: 0px 4px 10px rgba(0, 0, 0, 0.1);
`;
-const Triangle = styled.div`
+const Triangle = styled.div`
position: absolute;
top: -15px;
left: 50%;
@@ -26,7 +31,7 @@ const Triangle = styled.div`
height: 0;
border-left: 20px solid transparent;
border-right: 20px solid transparent;
- border-bottom: 20px solid white;
+ border-bottom: 20px solid ${({ isDarkMode }) => (isDarkMode ? '#313438' : 'white')};
`;
const Logo = styled.img`
@@ -44,7 +49,8 @@ const Content = styled.div`
text-align: left;
`;
-const ActionDescriptionBox = () => {
+
+const ActionDescriptionBox = ({ isDarkMode }: { isDarkMode: boolean }) => {
const { t } = useTranslation();
const { getText, getScreenshot, getList, captureStage } = useActionContext() as {
getText: boolean;
@@ -93,9 +99,19 @@ const ActionDescriptionBox = () => {
}
- label={{text}}
+ label={
+
+ {text}
+
+ }
/>
))}
@@ -112,9 +128,9 @@ const ActionDescriptionBox = () => {
};
return (
-
-
-
+
+
+
{renderActionDescription()}
diff --git a/src/components/molecules/ActionSettings.tsx b/src/components/action/ActionSettings.tsx
similarity index 70%
rename from src/components/molecules/ActionSettings.tsx
rename to src/components/action/ActionSettings.tsx
index 79e120b8..a0a3aa59 100644
--- a/src/components/molecules/ActionSettings.tsx
+++ b/src/components/action/ActionSettings.tsx
@@ -1,16 +1,15 @@
import React, { useRef } from 'react';
import styled from "styled-components";
import { Button } from "@mui/material";
-//import { ActionDescription } from "../organisms/RightSidePanel";
import * as Settings from "./action-settings";
import { useSocketStore } from "../../context/socket";
interface ActionSettingsProps {
action: string;
+ darkMode?: boolean;
}
-export const ActionSettings = ({ action }: ActionSettingsProps) => {
-
+export const ActionSettings = ({ action, darkMode = false }: ActionSettingsProps) => {
const settingsRef = useRef<{ getSettings: () => object }>(null);
const { socket } = useSocketStore();
@@ -20,30 +19,27 @@ export const ActionSettings = ({ action }: ActionSettingsProps) => {
return ;
case 'scroll':
return ;
- case 'scrape':
- return ;
+ case 'scrape':
+ return ;
case 'scrapeSchema':
return ;
default:
return null;
}
- }
+ };
const handleSubmit = (event: React.SyntheticEvent) => {
event.preventDefault();
- //get the data from settings
const settings = settingsRef.current?.getSettings();
- //Send notification to the server and generate the pair
socket?.emit(`action`, {
action,
settings
});
- }
+ };
return (