Merge branch 'develop' into listui-fix

This commit is contained in:
Rohit
2025-01-08 11:49:26 +05:30
committed by GitHub
29 changed files with 2784 additions and 682 deletions

View File

@@ -15,7 +15,7 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web
<p align="center">
<a href="https://maxun-website.vercel.app/?ref=ghread"><b>Website</b></a> |
<a href="https://www.maxun.dev/?ref=ghread"><b>Website</b></a> |
<a href="https://discord.gg/5GbPjBUkws"><b>Discord</b></a> |
<a href="https://x.com/maxun_io?ref=ghread"><b>Twitter</b></a> |
<a href="https://docs.google.com/forms/d/e/1FAIpQLSdbD2uhqC4sbg4eLZ9qrFbyrfkXZ2XsI6dQ0USRCQNZNn5pzg/viewform"><b>Join Maxun Cloud</b></a> |

View File

@@ -43,7 +43,7 @@ services:
#build:
#context: .
#dockerfile: server/Dockerfile
image: getmaxun/maxun-backend:v0.0.9
image: getmaxun/maxun-backend:v0.0.10
ports:
- "${BACKEND_PORT:-8080}:${BACKEND_PORT:-8080}"
env_file: .env
@@ -70,7 +70,7 @@ services:
#build:
#context: .
#dockerfile: Dockerfile
image: getmaxun/maxun-frontend:v0.0.5
image: getmaxun/maxun-frontend:v0.0.6
ports:
- "${FRONTEND_PORT:-5173}:${FRONTEND_PORT:-5173}"
env_file: .env

View File

@@ -1,6 +1,6 @@
{
"name": "maxun-core",
"version": "0.0.7",
"version": "0.0.8",
"description": "Core package for Maxun, responsible for data extraction",
"main": "build/index.js",
"typings": "build/index.d.ts",

View File

@@ -188,69 +188,201 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
* @param {Object.<string, {selector: string, tag: string}>} lists The named lists of HTML elements.
* @returns {Array.<Object.<string, string>>}
*/
window.scrapeSchema = function (lists) {
window.scrapeSchema = function(lists) {
// Utility functions remain the same
function omap(object, f, kf = (x) => x) {
return Object.fromEntries(
Object.entries(object)
.map(([k, v]) => [kf(k), f(v)]),
.map(([k, v]) => [kf(k), f(v)]),
);
}
function ofilter(object, f) {
return Object.fromEntries(
Object.entries(object)
.filter(([k, v]) => f(k, v)),
.filter(([k, v]) => f(k, v)),
);
}
function getSeedKey(listObj) {
const maxLength = Math.max(...Object.values(omap(listObj, (x) => document.querySelectorAll(x.selector).length)));
return Object.keys(ofilter(listObj, (_, v) => document.querySelectorAll(v.selector).length === maxLength))[0];
function findAllElements(config) {
// Regular DOM query if no special delimiters
if (!config.selector.includes('>>') && !config.selector.includes(':>>')) {
return Array.from(document.querySelectorAll(config.selector));
}
// First handle iframe traversal if present
if (config.selector.includes(':>>')) {
const parts = config.selector.split(':>>').map(s => s.trim());
let currentElements = [document];
// Traverse through each part of the selector
for (let i = 0; i < parts.length; i++) {
const part = parts[i];
const nextElements = [];
const isLast = i === parts.length - 1;
for (const element of currentElements) {
try {
// For document or iframe document
const doc = element.contentDocument || element || element.contentWindow?.document;
if (!doc) continue;
// Query elements in current context
const found = Array.from(doc.querySelectorAll(part));
if (isLast) {
// If it's the last part, keep all matching elements
nextElements.push(...found);
} else {
// If not last, only keep iframes for next iteration
const iframes = found.filter(el => el.tagName === 'IFRAME');
nextElements.push(...iframes);
}
} catch (error) {
console.warn('Cannot access iframe content:', error, {
part,
element,
index: i
});
}
}
if (nextElements.length === 0) {
console.warn('No elements found for part:', part, 'at depth:', i);
return [];
}
currentElements = nextElements;
}
return currentElements;
}
// Handle shadow DOM traversal
if (config.selector.includes('>>')) {
const parts = config.selector.split('>>').map(s => s.trim());
let currentElements = [document];
for (const part of parts) {
const nextElements = [];
for (const element of currentElements) {
// Try regular DOM first
const found = Array.from(element.querySelectorAll(part));
// Then check shadow roots
for (const foundEl of found) {
if (foundEl.shadowRoot) {
nextElements.push(foundEl.shadowRoot);
} else {
nextElements.push(foundEl);
}
}
}
currentElements = nextElements;
}
return currentElements.filter(el => !(el instanceof ShadowRoot));
}
return [];
}
// Modified to handle iframe context for URL resolution
function getElementValue(element, attribute) {
if (!element) return null;
// Get the base URL for resolving relative URLs
const baseURL = element.ownerDocument?.location?.href || window.location.origin;
switch (attribute) {
case 'href': {
const relativeHref = element.getAttribute('href');
return relativeHref ? new URL(relativeHref, baseURL).href : null;
}
case 'src': {
const relativeSrc = element.getAttribute('src');
return relativeSrc ? new URL(relativeSrc, baseURL).href : null;
}
case 'innerText':
return element.innerText?.trim();
case 'textContent':
return element.textContent?.trim();
default:
return element.getAttribute(attribute) || element.innerText?.trim();
}
}
// Rest of the functions remain largely the same
function getSeedKey(listObj) {
const maxLength = Math.max(...Object.values(
omap(listObj, (x) => findAllElements(x).length)
));
return Object.keys(
ofilter(listObj, (_, v) => findAllElements(v).length === maxLength)
)[0];
}
// Find minimal bounding elements
function getMBEs(elements) {
return elements.map((element) => {
let candidate = element;
const isUniqueChild = (e) => elements
.filter((elem) => e.parentNode?.contains(elem))
.filter((elem) => {
// Handle both iframe and shadow DOM boundaries
const sameContext = elem.getRootNode() === e.getRootNode() &&
elem.ownerDocument === e.ownerDocument;
return sameContext && e.parentNode?.contains(elem);
})
.length === 1;
while (candidate && isUniqueChild(candidate)) {
candidate = candidate.parentNode;
}
return candidate;
});
}
const seedName = getSeedKey(lists);
const seedElements = Array.from(document.querySelectorAll(lists[seedName].selector));
const seedElements = findAllElements(lists[seedName]);
const MBEs = getMBEs(seedElements);
return MBEs.map((mbe) => omap(
lists,
({ selector, attribute }, key) => {
const elem = Array.from(document.querySelectorAll(selector)).find((elem) => mbe.contains(elem));
if (!elem) return undefined;
switch (attribute) {
case 'href':
const relativeHref = elem.getAttribute('href');
return relativeHref ? new URL(relativeHref, window.location.origin).href : null;
case 'src':
const relativeSrc = elem.getAttribute('src');
return relativeSrc ? new URL(relativeSrc, window.location.origin).href : null;
case 'innerText':
return elem.innerText;
case 'textContent':
return elem.textContent;
default:
return elem.innerText;
}
},
(key) => key // Use the original key in the output
const mbeResults = MBEs.map((mbe) => omap(
lists,
(config) => {
const elem = findAllElements(config)
.find((elem) => mbe.contains(elem));
return elem ? getElementValue(elem, config.attribute) : undefined;
},
(key) => key
)) || [];
}
// If MBE approach didn't find all elements, try independent scraping
if (mbeResults.some(result => Object.values(result).some(v => v === undefined))) {
// Fall back to independent scraping
const results = [];
const foundElements = new Map();
// Find all elements for each selector
Object.entries(lists).forEach(([key, config]) => {
const elements = findAllElements(config);
foundElements.set(key, elements);
});
// Create result objects for each found element
foundElements.forEach((elements, key) => {
elements.forEach((element, index) => {
if (!results[index]) {
results[index] = {};
}
results[index][key] = getElementValue(element, lists[key].attribute);
});
});
return results.filter(result => Object.keys(result).length > 0);
}
return mbeResults;
};
/**
* Scrapes multiple lists of similar items based on a template item.
@@ -262,108 +394,275 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
* @returns {Array.<Array.<Object>>} Array of arrays of scraped items, one sub-array per list
*/
window.scrapeList = async function ({ listSelector, fields, limit = 10 }) {
// Helper function to extract values from elements
// Enhanced query function to handle both iframe and shadow DOM
const queryElement = (rootElement, selector) => {
if (!selector.includes('>>') && !selector.includes(':>>')) {
return rootElement.querySelector(selector);
}
const parts = selector.split(/(?:>>|:>>)/).map(part => part.trim());
let currentElement = rootElement;
for (let i = 0; i < parts.length; i++) {
if (!currentElement) return null;
// Handle iframe traversal
if (currentElement.tagName === 'IFRAME') {
try {
const iframeDoc = currentElement.contentDocument || currentElement.contentWindow.document;
currentElement = iframeDoc.querySelector(parts[i]);
continue;
} catch (e) {
console.warn('Cannot access iframe content:', e);
return null;
}
}
// Try regular DOM first
let nextElement = currentElement.querySelector(parts[i]);
// Try shadow DOM if not found
if (!nextElement && currentElement.shadowRoot) {
nextElement = currentElement.shadowRoot.querySelector(parts[i]);
}
// Check children's shadow roots if still not found
if (!nextElement) {
const children = Array.from(currentElement.children || []);
for (const child of children) {
if (child.shadowRoot) {
nextElement = child.shadowRoot.querySelector(parts[i]);
if (nextElement) break;
}
}
}
currentElement = nextElement;
}
return currentElement;
};
// Enhanced query all function for both contexts
const queryElementAll = (rootElement, selector) => {
if (!selector.includes('>>') && !selector.includes(':>>')) {
return rootElement.querySelectorAll(selector);
}
const parts = selector.split(/(?:>>|:>>)/).map(part => part.trim());
let currentElements = [rootElement];
for (const part of parts) {
const nextElements = [];
for (const element of currentElements) {
// Handle iframe traversal
if (element.tagName === 'IFRAME') {
try {
const iframeDoc = element.contentDocument || element.contentWindow.document;
nextElements.push(...iframeDoc.querySelectorAll(part));
} catch (e) {
console.warn('Cannot access iframe content:', e);
continue;
}
} else {
// Regular DOM elements
if (element.querySelectorAll) {
nextElements.push(...element.querySelectorAll(part));
}
// Shadow DOM elements
if (element.shadowRoot) {
nextElements.push(...element.shadowRoot.querySelectorAll(part));
}
// Check children's shadow roots
const children = Array.from(element.children || []);
for (const child of children) {
if (child.shadowRoot) {
nextElements.push(...child.shadowRoot.querySelectorAll(part));
}
}
}
}
currentElements = nextElements;
}
return currentElements;
};
// Enhanced value extraction with context awareness
function extractValue(element, attribute) {
if (!element) return null;
if (attribute === 'innerText') {
return element.innerText.trim();
} else if (attribute === 'innerHTML') {
return element.innerHTML.trim();
} else if (attribute === 'src' || attribute === 'href') {
const attrValue = element.getAttribute(attribute);
return attrValue ? new URL(attrValue, window.location.origin).href : null;
}
return element.getAttribute(attribute);
if (!element) return null;
// Get context-aware base URL
const baseURL = element.ownerDocument?.location?.href || window.location.origin;
// Check shadow root first
if (element.shadowRoot) {
const shadowContent = element.shadowRoot.textContent;
if (shadowContent?.trim()) {
return shadowContent.trim();
}
}
if (attribute === 'innerText') {
return element.innerText.trim();
} else if (attribute === 'innerHTML') {
return element.innerHTML.trim();
} else if (attribute === 'src' || attribute === 'href') {
const attrValue = element.getAttribute(attribute);
return attrValue ? new URL(attrValue, baseURL).href : null;
}
return element.getAttribute(attribute);
}
// Helper function to find table ancestors
// Enhanced table ancestor finding with context support
function findTableAncestor(element) {
let currentElement = element;
const MAX_DEPTH = 5;
let depth = 0;
while (currentElement && depth < MAX_DEPTH) {
if (currentElement.tagName === 'TD') {
return { type: 'TD', element: currentElement };
} else if (currentElement.tagName === 'TR') {
return { type: 'TR', element: currentElement };
}
currentElement = currentElement.parentElement;
depth++;
}
return null;
let currentElement = element;
const MAX_DEPTH = 5;
let depth = 0;
while (currentElement && depth < MAX_DEPTH) {
// Handle shadow DOM
if (currentElement.getRootNode() instanceof ShadowRoot) {
currentElement = currentElement.getRootNode().host;
continue;
}
if (currentElement.tagName === 'TD') {
return { type: 'TD', element: currentElement };
} else if (currentElement.tagName === 'TR') {
return { type: 'TR', element: currentElement };
}
// Handle iframe crossing
if (currentElement.tagName === 'IFRAME') {
try {
currentElement = currentElement.contentDocument.body;
} catch (e) {
return null;
}
} else {
currentElement = currentElement.parentElement;
}
depth++;
}
return null;
}
// Helper function to get cell index
function getCellIndex(td) {
let index = 0;
let sibling = td;
while (sibling = sibling.previousElementSibling) {
index++;
}
return index;
if (td.getRootNode() instanceof ShadowRoot) {
const shadowRoot = td.getRootNode();
const allCells = Array.from(shadowRoot.querySelectorAll('td'));
return allCells.indexOf(td);
}
let index = 0;
let sibling = td;
while (sibling = sibling.previousElementSibling) {
index++;
}
return index;
}
// Helper function to check for TH elements
function hasThElement(row, tableFields) {
for (const [label, { selector }] of Object.entries(tableFields)) {
const element = row.querySelector(selector);
if (element) {
let current = element;
while (current && current !== row) {
if (current.tagName === 'TH') {
return true;
}
current = current.parentElement;
}
}
}
return false;
for (const [_, { selector }] of Object.entries(tableFields)) {
const element = queryElement(row, selector);
if (element) {
let current = element;
while (current && current !== row) {
if (current.getRootNode() instanceof ShadowRoot) {
current = current.getRootNode().host;
continue;
}
if (current.tagName === 'TH') return true;
if (current.tagName === 'IFRAME') {
try {
current = current.contentDocument.body;
} catch (e) {
break;
}
} else {
current = current.parentElement;
}
}
}
}
return false;
}
// Helper function to filter rows
function filterRowsBasedOnTag(rows, tableFields) {
for (const row of rows) {
if (hasThElement(row, tableFields)) {
return rows;
}
}
return rows.filter(row => row.getElementsByTagName('TH').length === 0);
// Include shadow DOM in TH search
return rows.filter(row => {
const directTH = row.getElementsByTagName('TH').length === 0;
const shadowTH = row.shadowRoot ?
row.shadowRoot.querySelector('th') === null : true;
return directTH && shadowTH;
});
}
// Class similarity comparison functions
function calculateClassSimilarity(classList1, classList2) {
const set1 = new Set(classList1);
const set2 = new Set(classList2);
// Calculate intersection
const intersection = new Set([...set1].filter(x => set2.has(x)));
// Calculate union
const union = new Set([...set1, ...set2]);
// Return Jaccard similarity coefficient
return intersection.size / union.size;
}
const set1 = new Set(classList1);
const set2 = new Set(classList2);
const intersection = new Set([...set1].filter(x => set2.has(x)));
const union = new Set([...set1, ...set2]);
return intersection.size / union.size;
}
// New helper function to find elements with similar classes
// Enhanced similar elements finding with context support
function findSimilarElements(baseElement, similarityThreshold = 0.7) {
const baseClasses = Array.from(baseElement.classList);
if (baseClasses.length === 0) return [];
const allElements = [];
const potentialElements = document.getElementsByTagName(baseElement.tagName);
// Get elements from main document
allElements.push(...document.getElementsByTagName(baseElement.tagName));
return Array.from(potentialElements).filter(element => {
if (element === baseElement) return false;
const similarity = calculateClassSimilarity(
baseClasses,
Array.from(element.classList)
);
return similarity >= similarityThreshold;
// Get elements from shadow DOM
if (baseElement.getRootNode() instanceof ShadowRoot) {
const shadowHost = baseElement.getRootNode().host;
allElements.push(...shadowHost.getElementsByTagName(baseElement.tagName));
}
// Get elements from iframes
const iframes = document.getElementsByTagName('iframe');
for (const iframe of iframes) {
try {
const iframeDoc = iframe.contentDocument || iframe.contentWindow.document;
allElements.push(...iframeDoc.getElementsByTagName(baseElement.tagName));
} catch (e) {
console.warn('Cannot access iframe content:', e);
}
}
return allElements.filter(element => {
if (element === baseElement) return false;
const similarity = calculateClassSimilarity(
baseClasses,
Array.from(element.classList)
);
return similarity >= similarityThreshold;
});
}
let containers = Array.from(document.querySelectorAll(listSelector));
// Main scraping logic with context support
let containers = queryElementAll(document, listSelector);
containers = Array.from(containers);
if (containers.length === 0) return [];
if (limit > 1 && containers.length === 1) {
@@ -374,115 +673,157 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
const newContainers = similarContainers.filter(container =>
!container.matches(listSelector)
);
containers = [...containers, ...newContainers];
}
}
// Initialize arrays to store field classifications for each container
const containerFields = containers.map(() => ({
tableFields: {},
nonTableFields: {}
tableFields: {},
nonTableFields: {}
}));
// Analyze field types for each container
// Classify fields
containers.forEach((container, containerIndex) => {
for (const [label, field] of Object.entries(fields)) {
const sampleElement = container.querySelector(field.selector);
if (sampleElement) {
const ancestor = findTableAncestor(sampleElement);
if (ancestor) {
containerFields[containerIndex].tableFields[label] = {
...field,
tableContext: ancestor.type,
cellIndex: ancestor.type === 'TD' ? getCellIndex(ancestor.element) : -1
};
} else {
containerFields[containerIndex].nonTableFields[label] = field;
}
for (const [label, field] of Object.entries(fields)) {
const sampleElement = queryElement(container, field.selector);
if (sampleElement) {
const ancestor = findTableAncestor(sampleElement);
if (ancestor) {
containerFields[containerIndex].tableFields[label] = {
...field,
tableContext: ancestor.type,
cellIndex: ancestor.type === 'TD' ? getCellIndex(ancestor.element) : -1
};
} else {
containerFields[containerIndex].nonTableFields[label] = field;
}
} else {
containerFields[containerIndex].nonTableFields[label] = field;
}
}
});
const tableData = [];
const nonTableData = [];
// Process table fields across all containers
// Process table data with both iframe and shadow DOM support
for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) {
const container = containers[containerIndex];
const { tableFields } = containerFields[containerIndex];
if (Object.keys(tableFields).length > 0) {
const firstField = Object.values(tableFields)[0];
const firstElement = container.querySelector(firstField.selector);
let tableContext = firstElement;
while (tableContext && tableContext.tagName !== 'TABLE' && tableContext !== container) {
tableContext = tableContext.parentElement;
}
if (tableContext) {
const rows = Array.from(tableContext.getElementsByTagName('TR'));
const processedRows = filterRowsBasedOnTag(rows, tableFields);
for (let rowIndex = 0; rowIndex < Math.min(processedRows.length, limit); rowIndex++) {
const record = {};
const currentRow = processedRows[rowIndex];
for (const [label, { selector, attribute, cellIndex }] of Object.entries(tableFields)) {
let element = null;
if (cellIndex >= 0) {
const td = currentRow.children[cellIndex];
if (td) {
element = td.querySelector(selector);
if (!element && selector.split(">").pop().includes('td:nth-child')) {
element = td;
const firstField = Object.values(tableFields)[0];
const firstElement = queryElement(container, firstField.selector);
let tableContext = firstElement;
// Find table context including both iframe and shadow DOM
while (tableContext && tableContext.tagName !== 'TABLE' && tableContext !== container) {
if (tableContext.getRootNode() instanceof ShadowRoot) {
tableContext = tableContext.getRootNode().host;
continue;
}
if (tableContext.tagName === 'IFRAME') {
try {
tableContext = tableContext.contentDocument.body;
} catch (e) {
break;
}
} else {
tableContext = tableContext.parentElement;
}
}
if (!element) {
const tagOnlySelector = selector.split('.')[0];
element = td.querySelector(tagOnlySelector);
if (tableContext) {
// Get rows from all contexts
const rows = [];
// Get rows from regular DOM
rows.push(...tableContext.getElementsByTagName('TR'));
// Get rows from shadow DOM
if (tableContext.shadowRoot) {
rows.push(...tableContext.shadowRoot.getElementsByTagName('TR'));
}
// Get rows from iframes
if (tableContext.tagName === 'IFRAME') {
try {
const iframeDoc = tableContext.contentDocument || tableContext.contentWindow.document;
rows.push(...iframeDoc.getElementsByTagName('TR'));
} catch (e) {
console.warn('Cannot access iframe rows:', e);
}
}
const processedRows = filterRowsBasedOnTag(rows, tableFields);
for (let rowIndex = 0; rowIndex < Math.min(processedRows.length, limit); rowIndex++) {
const record = {};
const currentRow = processedRows[rowIndex];
if (!element) {
let currentElement = td;
while (currentElement && currentElement.children.length > 0) {
let foundContentChild = false;
for (const child of currentElement.children) {
if (extractValue(child, attribute)) {
currentElement = child;
foundContentChild = true;
break;
for (const [label, { selector, attribute, cellIndex }] of Object.entries(tableFields)) {
let element = null;
if (cellIndex >= 0) {
// Get TD element considering both contexts
let td = currentRow.children[cellIndex];
// Check shadow DOM for td
if (!td && currentRow.shadowRoot) {
const shadowCells = currentRow.shadowRoot.children;
if (shadowCells && shadowCells.length > cellIndex) {
td = shadowCells[cellIndex];
}
}
if (!foundContentChild) break;
}
element = currentElement;
}
}
} else {
element = currentRow.querySelector(selector);
}
if (element) {
record[label] = extractValue(element, attribute);
}
}
if (td) {
element = queryElement(td, selector);
if (!element && selector.split(/(?:>>|:>>)/).pop().includes('td:nth-child')) {
element = td;
}
if (Object.keys(record).length > 0) {
tableData.push(record);
}
if (!element) {
const tagOnlySelector = selector.split('.')[0];
element = queryElement(td, tagOnlySelector);
}
if (!element) {
let currentElement = td;
while (currentElement && currentElement.children.length > 0) {
let foundContentChild = false;
for (const child of currentElement.children) {
if (extractValue(child, attribute)) {
currentElement = child;
foundContentChild = true;
break;
}
}
if (!foundContentChild) break;
}
element = currentElement;
}
}
} else {
element = queryElement(currentRow, selector);
}
if (element) {
record[label] = extractValue(element, attribute);
}
}
if (Object.keys(record).length > 0) {
tableData.push(record);
}
}
}
}
}
}
// Process non-table fields across all containers
// Process non-table data with both contexts support
for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) {
if (nonTableData.length >= limit) break;
@@ -490,26 +831,28 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
const { nonTableFields } = containerFields[containerIndex];
if (Object.keys(nonTableFields).length > 0) {
const record = {};
const record = {};
for (const [label, { selector, attribute }] of Object.entries(nonTableFields)) {
const element = container.querySelector(selector);
if (element) {
record[label] = extractValue(element, attribute);
}
}
for (const [label, { selector, attribute }] of Object.entries(nonTableFields)) {
// Get the last part of the selector after any context delimiter
const relativeSelector = selector.split(/(?:>>|:>>)/).slice(-1)[0];
const element = queryElement(container, relativeSelector);
if (Object.keys(record).length > 0) {
nonTableData.push(record);
}
}
if (element) {
record[label] = extractValue(element, attribute);
}
}
if (Object.keys(record).length > 0) {
nonTableData.push(record);
}
}
}
// Merge and limit the results
const scrapedData = [...tableData, ...nonTableData];
return scrapedData;
};
};
/**
* Gets all children of the elements matching the listSelector,

View File

@@ -403,7 +403,7 @@ export default class Interpreter extends EventEmitter {
await this.options.serializableCallback(scrapeResults);
},
scrapeSchema: async (schema: Record<string, { selector: string; tag: string, attribute: string; }>) => {
scrapeSchema: async (schema: Record<string, { selector: string; tag: string, attribute: string; shadow: string}>) => {
await this.ensureScriptsLoaded(page);
const scrapeResult = await page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema);
@@ -663,11 +663,42 @@ export default class Interpreter extends EventEmitter {
if (isApplicable) {
return actionId;
}
}
}
private removeShadowSelectors(workflow: Workflow) {
for (let actionId = workflow.length - 1; actionId >= 0; actionId--) {
const step = workflow[actionId];
// Check if step has where and selectors
if (step.where && Array.isArray(step.where.selectors)) {
// Filter out selectors that contain ">>"
step.where.selectors = step.where.selectors.filter(selector => !selector.includes('>>'));
}
}
return workflow;
}
private removeSpecialSelectors(workflow: Workflow) {
for (let actionId = workflow.length - 1; actionId >= 0; actionId--) {
const step = workflow[actionId];
if (step.where && Array.isArray(step.where.selectors)) {
// Filter out if selector has EITHER ":>>" OR ">>"
step.where.selectors = step.where.selectors.filter(selector =>
!(selector.includes(':>>') || selector.includes('>>'))
);
}
}
return workflow;
}
private async runLoop(p: Page, workflow: Workflow) {
const workflowCopy: Workflow = JSON.parse(JSON.stringify(workflow));
let workflowCopy: Workflow = JSON.parse(JSON.stringify(workflow));
workflowCopy = this.removeSpecialSelectors(workflowCopy);
// apply ad-blocker to the current page
try {

View File

@@ -1,6 +1,6 @@
{
"name": "maxun",
"version": "0.0.5",
"version": "0.0.6",
"author": "Maxun",
"license": "AGPL-3.0-or-later",
"dependencies": {
@@ -44,9 +44,10 @@
"joi": "^17.6.0",
"jsonwebtoken": "^9.0.2",
"jwt-decode": "^4.0.0",
"lodash": "^4.17.21",
"loglevel": "^1.8.0",
"loglevel-plugin-remote": "^0.6.8",
"maxun-core": "^0.0.7",
"maxun-core": "^0.0.8",
"minio": "^8.0.1",
"moment-timezone": "^0.5.45",
"node-cron": "^3.0.3",
@@ -66,6 +67,7 @@
"react-transition-group": "^4.4.2",
"sequelize": "^6.37.3",
"sequelize-typescript": "^2.1.6",
"sharp": "^0.33.5",
"socket.io": "^4.4.1",
"socket.io-client": "^4.4.1",
"styled-components": "^5.3.3",
@@ -97,6 +99,7 @@
"@types/cookie-parser": "^1.4.7",
"@types/express": "^4.17.13",
"@types/js-cookie": "^3.0.6",
"@types/lodash": "^4.17.14",
"@types/loglevel": "^1.6.3",
"@types/node": "22.7.9",
"@types/node-cron": "^3.0.11",

181
perf/performance.ts Normal file
View File

@@ -0,0 +1,181 @@
// Frontend Performance Monitoring
export class FrontendPerformanceMonitor {
private metrics: {
fps: number[];
memoryUsage: MemoryInfo[];
renderTime: number[];
eventLatency: number[];
};
private lastFrameTime: number;
private frameCount: number;
constructor() {
this.metrics = {
fps: [],
memoryUsage: [],
renderTime: [],
eventLatency: [],
};
this.lastFrameTime = performance.now();
this.frameCount = 0;
// Start monitoring
this.startMonitoring();
}
private startMonitoring(): void {
// Monitor FPS
const measureFPS = () => {
const currentTime = performance.now();
const elapsed = currentTime - this.lastFrameTime;
this.frameCount++;
if (elapsed >= 1000) { // Calculate FPS every second
const fps = Math.round((this.frameCount * 1000) / elapsed);
this.metrics.fps.push(fps);
this.frameCount = 0;
this.lastFrameTime = currentTime;
}
requestAnimationFrame(measureFPS);
};
requestAnimationFrame(measureFPS);
// Monitor Memory Usage
if (window.performance && (performance as any).memory) {
setInterval(() => {
const memory = (performance as any).memory;
this.metrics.memoryUsage.push({
usedJSHeapSize: memory.usedJSHeapSize,
totalJSHeapSize: memory.totalJSHeapSize,
timestamp: Date.now()
});
}, 1000);
}
}
// Monitor Canvas Render Time
public measureRenderTime(renderFunction: () => void): void {
const startTime = performance.now();
renderFunction();
const endTime = performance.now();
this.metrics.renderTime.push(endTime - startTime);
}
// Monitor Event Latency
public measureEventLatency(event: MouseEvent | KeyboardEvent): void {
const latency = performance.now() - event.timeStamp;
this.metrics.eventLatency.push(latency);
}
// Get Performance Report
public getPerformanceReport(): PerformanceReport {
return {
averageFPS: this.calculateAverage(this.metrics.fps),
averageRenderTime: this.calculateAverage(this.metrics.renderTime),
averageEventLatency: this.calculateAverage(this.metrics.eventLatency),
memoryTrend: this.getMemoryTrend(),
lastMemoryUsage: this.metrics.memoryUsage[this.metrics.memoryUsage.length - 1]
};
}
private calculateAverage(array: number[]): number {
return array.length ? array.reduce((a, b) => a + b) / array.length : 0;
}
private getMemoryTrend(): MemoryTrend {
if (this.metrics.memoryUsage.length < 2) return 'stable';
const latest = this.metrics.memoryUsage[this.metrics.memoryUsage.length - 1];
const previous = this.metrics.memoryUsage[this.metrics.memoryUsage.length - 2];
const change = latest.usedJSHeapSize - previous.usedJSHeapSize;
if (change > 1000000) return 'increasing'; // 1MB threshold
if (change < -1000000) return 'decreasing';
return 'stable';
}
}
// Backend Performance Monitoring
export class BackendPerformanceMonitor {
private metrics: {
screenshotTimes: number[];
emitTimes: number[];
memoryUsage: NodeJS.MemoryUsage[];
};
constructor() {
this.metrics = {
screenshotTimes: [],
emitTimes: [],
memoryUsage: []
};
this.startMonitoring();
}
private startMonitoring(): void {
// Monitor Memory Usage
setInterval(() => {
this.metrics.memoryUsage.push(process.memoryUsage());
}, 1000);
}
public async measureScreenshotPerformance(
makeScreenshot: () => Promise<void>
): Promise<void> {
const startTime = process.hrtime();
await makeScreenshot();
const [seconds, nanoseconds] = process.hrtime(startTime);
this.metrics.screenshotTimes.push(seconds * 1000 + nanoseconds / 1000000);
}
public measureEmitPerformance(emitFunction: () => void): void {
const startTime = process.hrtime();
emitFunction();
const [seconds, nanoseconds] = process.hrtime(startTime);
this.metrics.emitTimes.push(seconds * 1000 + nanoseconds / 1000000);
}
public getPerformanceReport(): BackendPerformanceReport {
return {
averageScreenshotTime: this.calculateAverage(this.metrics.screenshotTimes),
averageEmitTime: this.calculateAverage(this.metrics.emitTimes),
currentMemoryUsage: this.metrics.memoryUsage[this.metrics.memoryUsage.length - 1],
memoryTrend: this.getMemoryTrend()
};
}
private calculateAverage(array: number[]): number {
return array.length ? array.reduce((a, b) => a + b) / array.length : 0;
}
private getMemoryTrend(): MemoryTrend {
if (this.metrics.memoryUsage.length < 2) return 'stable';
const latest = this.metrics.memoryUsage[this.metrics.memoryUsage.length - 1];
const previous = this.metrics.memoryUsage[this.metrics.memoryUsage.length - 2];
const change = latest.heapUsed - previous.heapUsed;
if (change > 1000000) return 'increasing';
if (change < -1000000) return 'decreasing';
return 'stable';
}
}
interface MemoryInfo {
usedJSHeapSize: number;
totalJSHeapSize: number;
timestamp: number;
}
type MemoryTrend = 'increasing' | 'decreasing' | 'stable';
interface PerformanceReport {
averageFPS: number;
averageRenderTime: number;
averageEventLatency: number;
memoryTrend: MemoryTrend;
lastMemoryUsage: MemoryInfo;
}
interface BackendPerformanceReport {
averageScreenshotTime: number;
averageEmitTime: number;
currentMemoryUsage: NodeJS.MemoryUsage;
memoryTrend: MemoryTrend;
}

View File

@@ -158,11 +158,13 @@
"confirm": "Bestätigen",
"discard": "Verwerfen",
"confirm_capture": "Erfassung bestätigen",
"confirm_pagination": "Paginierung bestätigen",
"confirm_limit": "Limit bestätigen",
"confirm_pagination": "Bestätigen",
"confirm_limit": "Bestätigen",
"finish_capture": "Erfassung abschließen",
"back": "Zurück",
"finish": "Fertig",
"cancel": "Abbrechen"
"cancel": "Abbrechen",
"delete": "Löschen"
},
"screenshot": {
"capture_fullpage": "Vollständige Seite erfassen",

View File

@@ -159,11 +159,13 @@
"confirm": "Confirm",
"discard": "Discard",
"confirm_capture": "Confirm Capture",
"confirm_pagination": "Confirm Pagination",
"confirm_limit": "Confirm Limit",
"confirm_pagination": "Confirm",
"confirm_limit": "Confirm",
"finish_capture": "Finish Capture",
"back": "Back",
"finish": "Finish",
"cancel": "Cancel"
"cancel": "Cancel",
"delete": "Delete"
},
"screenshot": {
"capture_fullpage": "Capture Fullpage",

View File

@@ -159,11 +159,13 @@
"confirm": "Confirmar",
"discard": "Descartar",
"confirm_capture": "Confirmar Captura",
"confirm_pagination": "Confirmar Paginación",
"confirm_limit": "Confirmar Límite",
"confirm_pagination": "Confirmar",
"confirm_limit": "Confirmar",
"finish_capture": "Finalizar Captura",
"back": "Atrás",
"finish": "Finalizar",
"cancel": "Cancelar"
"cancel": "Cancelar",
"delete": "Eliminar"
},
"screenshot": {
"capture_fullpage": "Capturar Página Completa",

View File

@@ -159,11 +159,13 @@
"confirm": "確認",
"discard": "破棄",
"confirm_capture": "取得を確認",
"confirm_pagination": "ページネーションを確認",
"confirm_limit": "制限を確認",
"confirm_pagination": "確認",
"confirm_limit": "確認",
"finish_capture": "取得を完了",
"back": "戻る",
"finish": "完了",
"cancel": "キャンセル"
"cancel": "キャンセル",
"delete": "削除"
},
"screenshot": {
"capture_fullpage": "フルページを取得",

View File

@@ -159,11 +159,13 @@
"confirm": "确认",
"discard": "放弃",
"confirm_capture": "确认捕获",
"confirm_pagination": "确认分页",
"confirm_limit": "确认限制",
"confirm_pagination": "确认",
"confirm_limit": "确认",
"finish_capture": "完成捕获",
"back": "返回",
"finish": "完成",
"cancel": "取消"
"cancel": "取消",
"delete": "删除"
},
"screenshot": {
"capture_fullpage": "捕获整页",

View File

@@ -9,6 +9,8 @@ import { chromium } from 'playwright-extra';
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
import { PlaywrightBlocker } from '@cliqz/adblocker-playwright';
import fetch from 'cross-fetch';
import { throttle } from 'lodash';
import sharp from 'sharp';
import logger from '../../logger';
import { InterpreterSettings, RemoteBrowserOptions } from "../../types";
@@ -16,8 +18,30 @@ import { WorkflowGenerator } from "../../workflow-management/classes/Generator";
import { WorkflowInterpreter } from "../../workflow-management/classes/Interpreter";
import { getDecryptedProxyConfig } from '../../routes/proxy';
import { getInjectableScript } from 'idcac-playwright';
chromium.use(stealthPlugin());
const MEMORY_CONFIG = {
gcInterval: 60000, // 1 minute
maxHeapSize: 2048 * 1024 * 1024, // 2GB
heapUsageThreshold: 0.85 // 85%
};
const SCREENCAST_CONFIG: {
format: "jpeg" | "png";
maxWidth: number;
maxHeight: number;
targetFPS: number;
compressionQuality: number;
maxQueueSize: number;
} = {
format: 'jpeg',
maxWidth: 900,
maxHeight: 400,
targetFPS: 30,
compressionQuality: 0.8,
maxQueueSize: 2
};
/**
* This class represents a remote browser instance.
@@ -78,6 +102,11 @@ export class RemoteBrowser {
*/
public interpreter: WorkflowInterpreter;
private screenshotQueue: Buffer[] = [];
private isProcessingScreenshot = false;
private screencastInterval: NodeJS.Timeout | null = null
/**
* Initializes a new instances of the {@link Generator} and {@link WorkflowInterpreter} classes and
* assigns the socket instance everywhere.
@@ -90,6 +119,46 @@ export class RemoteBrowser {
this.generator = new WorkflowGenerator(socket);
}
private initializeMemoryManagement(): void {
setInterval(() => {
const memoryUsage = process.memoryUsage();
const heapUsageRatio = memoryUsage.heapUsed / MEMORY_CONFIG.maxHeapSize;
if (heapUsageRatio > MEMORY_CONFIG.heapUsageThreshold) {
logger.warn('High memory usage detected, triggering cleanup');
this.performMemoryCleanup();
}
// Clear screenshot queue if it's too large
if (this.screenshotQueue.length > SCREENCAST_CONFIG.maxQueueSize) {
this.screenshotQueue = this.screenshotQueue.slice(-SCREENCAST_CONFIG.maxQueueSize);
}
}, MEMORY_CONFIG.gcInterval);
}
private async performMemoryCleanup(): Promise<void> {
this.screenshotQueue = [];
this.isProcessingScreenshot = false;
if (global.gc) {
global.gc();
}
// Reset CDP session if needed
if (this.client) {
try {
await this.stopScreencast();
this.client = null;
if (this.currentPage) {
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
await this.startScreencast();
}
} catch (error) {
logger.error('Error resetting CDP session:', error);
}
}
}
/**
* Normalizes URLs to prevent navigation loops while maintaining consistent format
*/
@@ -157,7 +226,7 @@ export class RemoteBrowser {
'Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.5938.62 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:118.0) Gecko/20100101 Firefox/118.0',
];
return userAgents[Math.floor(Math.random() * userAgents.length)];
}
@@ -178,7 +247,7 @@ export class RemoteBrowser {
"--disable-extensions",
"--no-sandbox",
"--disable-dev-shm-usage",
],
],
}));
const proxyConfig = await getDecryptedProxyConfig(userId);
let proxyOptions: { server: string, username?: string, password?: string } = { server: '' };
@@ -251,11 +320,11 @@ export class RemoteBrowser {
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
await blocker.disableBlockingInPage(this.currentPage);
console.log('Adblocker initialized');
} catch (error: any) {
} catch (error: any) {
console.warn('Failed to initialize adblocker, continuing without it:', error.message);
// Still need to set up the CDP session even if blocker fails
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
}
}
};
/**
@@ -319,7 +388,7 @@ export class RemoteBrowser {
return;
}
this.client.on('Page.screencastFrame', ({ data: base64, sessionId }) => {
this.emitScreenshot(base64)
this.emitScreenshot(Buffer.from(base64, 'base64'))
setTimeout(async () => {
try {
if (!this.client) {
@@ -339,16 +408,49 @@ export class RemoteBrowser {
* If an interpretation was running it will be stopped.
* @returns {Promise<void>}
*/
public switchOff = async (): Promise<void> => {
await this.interpreter.stopInterpretation();
if (this.browser) {
await this.stopScreencast();
await this.browser.close();
} else {
logger.log('error', 'Browser wasn\'t initialized');
logger.log('error', 'Switching off the browser failed');
public async switchOff(): Promise<void> {
try {
await this.interpreter.stopInterpretation();
if (this.screencastInterval) {
clearInterval(this.screencastInterval);
}
if (this.client) {
await this.stopScreencast();
}
if (this.browser) {
await this.browser.close();
}
this.screenshotQueue = [];
//this.performanceMonitor.reset();
} catch (error) {
logger.error('Error during browser shutdown:', error);
}
};
}
private async optimizeScreenshot(screenshot: Buffer): Promise<Buffer> {
try {
return await sharp(screenshot)
.jpeg({
quality: Math.round(SCREENCAST_CONFIG.compressionQuality * 100),
progressive: true
})
.resize({
width: SCREENCAST_CONFIG.maxWidth,
height: SCREENCAST_CONFIG.maxHeight,
fit: 'inside',
withoutEnlargement: true
})
.toBuffer();
} catch (error) {
logger.error('Screenshot optimization failed:', error);
return screenshot;
}
}
/**
* Makes and emits a single screenshot to the client side.
@@ -358,7 +460,7 @@ export class RemoteBrowser {
try {
const screenshot = await this.currentPage?.screenshot();
if (screenshot) {
this.emitScreenshot(screenshot.toString('base64'));
this.emitScreenshot(screenshot);
}
} catch (e) {
const { message } = e as Error;
@@ -490,37 +592,85 @@ export class RemoteBrowser {
* Should be called only once after the browser is fully initialized.
* @returns {Promise<void>}
*/
private startScreencast = async (): Promise<void> => {
private async startScreencast(): Promise<void> {
if (!this.client) {
logger.log('warn', 'client is not initialized');
logger.warn('Client is not initialized');
return;
}
await this.client.send('Page.startScreencast', { format: 'jpeg', quality: 75 });
logger.log('info', `Browser started with screencasting a page.`);
};
/**
* Unsubscribes the current page from the screencast session.
* @returns {Promise<void>}
*/
private stopScreencast = async (): Promise<void> => {
if (!this.client) {
logger.log('error', 'client is not initialized');
logger.log('error', 'Screencast stop failed');
} else {
await this.client.send('Page.stopScreencast');
logger.log('info', `Browser stopped with screencasting.`);
try {
await this.client.send('Page.startScreencast', {
format: SCREENCAST_CONFIG.format,
});
// Set up screencast frame handler
this.client.on('Page.screencastFrame', async ({ data, sessionId }) => {
try {
const buffer = Buffer.from(data, 'base64');
await this.emitScreenshot(buffer);
await this.client?.send('Page.screencastFrameAck', { sessionId });
} catch (error) {
logger.error('Screencast frame processing failed:', error);
}
});
logger.info('Screencast started successfully');
} catch (error) {
logger.error('Failed to start screencast:', error);
}
};
}
private async stopScreencast(): Promise<void> {
if (!this.client) {
logger.error('Client is not initialized');
return;
}
try {
await this.client.send('Page.stopScreencast');
this.screenshotQueue = [];
this.isProcessingScreenshot = false;
logger.info('Screencast stopped successfully');
} catch (error) {
logger.error('Failed to stop screencast:', error);
}
}
/**
* Helper for emitting the screenshot of browser's active page through websocket.
* @param payload the screenshot binary data
* @returns void
*/
private emitScreenshot = (payload: any): void => {
const dataWithMimeType = ('data:image/jpeg;base64,').concat(payload);
this.socket.emit('screencast', dataWithMimeType);
logger.log('debug', `Screenshot emitted`);
private emitScreenshot = async (payload: Buffer): Promise<void> => {
if (this.isProcessingScreenshot) {
if (this.screenshotQueue.length < SCREENCAST_CONFIG.maxQueueSize) {
this.screenshotQueue.push(payload);
}
return;
}
this.isProcessingScreenshot = true;
try {
const optimizedScreenshot = await this.optimizeScreenshot(payload);
const base64Data = optimizedScreenshot.toString('base64');
const dataWithMimeType = `data:image/jpeg;base64,${base64Data}`;
this.socket.emit('screencast', dataWithMimeType);
logger.debug('Screenshot emitted');
} catch (error) {
logger.error('Screenshot emission failed:', error);
} finally {
this.isProcessingScreenshot = false;
if (this.screenshotQueue.length > 0) {
const nextScreenshot = this.screenshotQueue.shift();
if (nextScreenshot) {
setTimeout(() => this.emitScreenshot(nextScreenshot), 1000 / SCREENCAST_CONFIG.targetFPS);
}
}
}
};
}

View File

@@ -129,6 +129,17 @@ export interface BaseActionInfo {
hasOnlyText: boolean;
}
interface IframeSelector {
full: string;
isIframe: boolean;
}
interface ShadowSelector {
full: string;
mode: string;
}
/**
* Holds all the possible css selectors that has been found for an element.
* @category Types
@@ -143,6 +154,8 @@ export interface Selectors {
hrefSelector: string|null;
accessibilitySelector: string|null;
formSelector: string|null;
iframeSelector: IframeSelector|null;
shadowSelector: ShadowSelector|null;
}
/**
@@ -156,7 +169,7 @@ export interface BaseAction extends BaseActionInfo{
associatedActions: ActionType[];
inputType: string | undefined;
value: string | undefined;
selectors: { [key: string]: string | null };
selectors: Selectors;
timestamp: number;
isPassword: boolean;
/**

View File

@@ -730,15 +730,26 @@ export class WorkflowGenerator {
const displaySelector = await this.generateSelector(page, coordinates, ActionType.Click);
const elementInfo = await getElementInformation(page, coordinates, this.listSelector, this.getList);
if (rect) {
const highlighterData = {
rect,
selector: displaySelector,
elementInfo,
// Include shadow DOM specific information
shadowInfo: elementInfo?.isShadowRoot ? {
mode: elementInfo.shadowRootMode,
content: elementInfo.shadowRootContent
} : null
};
if (this.getList === true) {
if (this.listSelector !== '') {
const childSelectors = await getChildSelectors(page, this.listSelector || '');
this.socket.emit('highlighter', { rect, selector: displaySelector, elementInfo, childSelectors })
this.socket.emit('highlighter', { ...highlighterData, childSelectors })
} else {
this.socket.emit('highlighter', { rect, selector: displaySelector, elementInfo });
this.socket.emit('highlighter', { ...highlighterData });
}
} else {
this.socket.emit('highlighter', { rect, selector: displaySelector, elementInfo });
this.socket.emit('highlighter', { ...highlighterData });
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -12,6 +12,16 @@ export const getBestSelectorForAction = (action: Action) => {
case ActionType.Hover:
case ActionType.DragAndDrop: {
const selectors = action.selectors;
if (selectors?.iframeSelector?.full) {
return selectors.iframeSelector.full;
}
if (selectors?.shadowSelector?.full) {
return selectors.shadowSelector.full;
}
// less than 25 characters, and element only has text inside
const textSelector =
selectors?.text?.length != null &&
@@ -75,6 +85,11 @@ export const getBestSelectorForAction = (action: Action) => {
case ActionType.Input:
case ActionType.Keydown: {
const selectors = action.selectors;
if (selectors?.shadowSelector?.full) {
return selectors.shadowSelector.full;
}
return (
selectors.testIdSelector ??
selectors?.id ??

View File

@@ -5,11 +5,6 @@ import { ScheduleSettings } from "../components/molecules/ScheduleSettings";
import { CreateRunResponse, ScheduleRunResponse } from "../pages/MainPage";
import { apiUrl } from "../apiConfig";
export const getStoredRecordings = async (): Promise<string[] | null> => {
try {
const response = await axios.get(`${apiUrl}/storage/recordings`);
@@ -82,11 +77,7 @@ export const getStoredRecording = async (id: string) => {
}
}
export const checkRunsForRecording = async (id: string): Promise<boolean> => {
try {
const response = await axios.get(`${apiUrl}/storage/recordings/${id}/runs`);
@@ -99,32 +90,26 @@ export const checkRunsForRecording = async (id: string): Promise<boolean> => {
}
};
export const deleteRecordingFromStorage = async (id: string): Promise<boolean> => {
const hasRuns = await checkRunsForRecording(id);
if (hasRuns) {
return false;
}
try {
const response = await axios.delete(`${apiUrl}/storage/recordings/${id}`);
if (response.status === 200) {
return true;
} else {
throw new Error(`Couldn't delete stored recording ${id}`);
}
} catch (error: any) {
console.log(error);
return false;
}
};
export const deleteRunFromStorage = async (id: string): Promise<boolean> => {
@@ -159,7 +144,7 @@ export const createRunForStoredRecording = async (id: string, settings: RunSetti
try {
const response = await axios.put(
`${apiUrl}/storage/runs/${id}`,
{ ...settings });
{ ...settings });
if (response.status === 200) {
return response.data;
} else {

View File

@@ -3,7 +3,7 @@ import { emptyWorkflow } from "../shared/constants";
import { default as axios, AxiosResponse } from "axios";
import { apiUrl } from "../apiConfig";
export const getActiveWorkflow = async(id: string) : Promise<WorkflowFile> => {
export const getActiveWorkflow = async (id: string): Promise<WorkflowFile> => {
try {
const response = await axios.get(`${apiUrl}/workflow/${id}`)
if (response.status === 200) {
@@ -11,13 +11,13 @@ export const getActiveWorkflow = async(id: string) : Promise<WorkflowFile> => {
} else {
throw new Error('Something went wrong when fetching a recorded workflow');
}
} catch(error: any) {
} catch (error: any) {
console.log(error);
return emptyWorkflow;
}
};
export const getParamsOfActiveWorkflow = async(id: string) : Promise<string[]|null> => {
export const getParamsOfActiveWorkflow = async (id: string): Promise<string[] | null> => {
try {
const response = await axios.get(`${apiUrl}/workflow/params/${id}`)
if (response.status === 200) {
@@ -25,15 +25,15 @@ export const getParamsOfActiveWorkflow = async(id: string) : Promise<string[]|nu
} else {
throw new Error('Something went wrong when fetching the parameters of the recorded workflow');
}
} catch(error: any) {
} catch (error: any) {
console.log(error);
return null;
}
};
export const deletePair = async(index: number): Promise<WorkflowFile> => {
export const deletePair = async (index: number): Promise<WorkflowFile> => {
try {
const response = await axios.delete(`${apiUrl}/workflow/pair/${index}`);
const response = await axios.delete(`${apiUrl}/workflow/pair/${index}`);
if (response.status === 200) {
return response.data;
} else {
@@ -45,11 +45,11 @@ export const deletePair = async(index: number): Promise<WorkflowFile> => {
}
};
export const AddPair = async(index: number, pair: WhereWhatPair): Promise<WorkflowFile> => {
export const AddPair = async (index: number, pair: WhereWhatPair): Promise<WorkflowFile> => {
try {
const response = await axios.post(`${apiUrl}/workflow/pair/${index}`, {
pair,
}, {headers: {'Content-Type': 'application/json'}});
}, { headers: { 'Content-Type': 'application/json' } });
if (response.status === 200) {
return response.data;
} else {
@@ -61,11 +61,11 @@ export const AddPair = async(index: number, pair: WhereWhatPair): Promise<Workfl
}
};
export const UpdatePair = async(index: number, pair: WhereWhatPair): Promise<WorkflowFile> => {
export const UpdatePair = async (index: number, pair: WhereWhatPair): Promise<WorkflowFile> => {
try {
const response = await axios.put(`${apiUrl}/workflow/pair/${index}`, {
pair,
}, {headers: {'Content-Type': 'application/json'}});
}, { headers: { 'Content-Type': 'application/json' } });
if (response.status === 200) {
return response.data;
} else {

View File

@@ -1,21 +1,147 @@
import React, { useCallback, useEffect, useRef } from 'react';
import React, { useCallback, useEffect, useRef, useMemo, Suspense } from 'react';
import { useSocketStore } from '../../context/socket';
import { getMappedCoordinates } from "../../helpers/inputHelpers";
import { useGlobalInfoStore } from "../../context/globalInfo";
import { useActionContext } from '../../context/browserActions';
import DatePicker from './DatePicker';
import Dropdown from './Dropdown';
import TimePicker from './TimePicker';
import DateTimeLocalPicker from './DateTimeLocalPicker';
const DatePicker = React.lazy(() => import('./DatePicker'));
const Dropdown = React.lazy(() => import('./Dropdown'));
const TimePicker = React.lazy(() => import('./TimePicker'));
const DateTimeLocalPicker = React.lazy(() => import('./DateTimeLocalPicker'));
interface CreateRefCallback {
(ref: React.RefObject<HTMLCanvasElement>): void;
class RAFScheduler {
private queue: Set<() => void> = new Set();
private isProcessing: boolean = false;
private frameId: number | null = null;
schedule(callback: () => void): void {
this.queue.add(callback);
if (!this.isProcessing) {
this.process();
}
}
private process = (): void => {
this.isProcessing = true;
this.frameId = requestAnimationFrame(() => {
const callbacks = Array.from(this.queue);
this.queue.clear();
callbacks.forEach(callback => {
try {
callback();
} catch (error) {
console.error('RAF Scheduler error:', error);
}
});
this.isProcessing = false;
this.frameId = null;
if (this.queue.size > 0) {
this.process();
}
});
}
clear(): void {
this.queue.clear();
if (this.frameId !== null) {
cancelAnimationFrame(this.frameId);
this.frameId = null;
}
this.isProcessing = false;
}
}
class EventDebouncer {
private highPriorityQueue: Array<() => void> = [];
private lowPriorityQueue: Array<() => void> = [];
private processing: boolean = false;
private scheduler: RAFScheduler;
constructor(scheduler: RAFScheduler) {
this.scheduler = scheduler;
}
add(callback: () => void, highPriority: boolean = false): void {
if (highPriority) {
this.highPriorityQueue.push(callback);
} else {
this.lowPriorityQueue.push(callback);
}
if (!this.processing) {
this.process();
}
}
private process(): void {
this.processing = true;
this.scheduler.schedule(() => {
while (this.highPriorityQueue.length > 0) {
const callback = this.highPriorityQueue.shift();
callback?.();
}
if (this.lowPriorityQueue.length > 0) {
const callback = this.lowPriorityQueue.shift();
callback?.();
if (this.lowPriorityQueue.length > 0) {
this.process();
}
}
this.processing = false;
});
}
clear(): void {
this.highPriorityQueue = [];
this.lowPriorityQueue = [];
this.processing = false;
}
}
// Optimized measurement cache with LRU
class MeasurementCache {
private cache: Map<HTMLElement, DOMRect>;
private maxSize: number;
constructor(maxSize: number = 100) {
this.cache = new Map();
this.maxSize = maxSize;
}
get(element: HTMLElement): DOMRect | undefined {
const cached = this.cache.get(element);
if (cached) {
// Refresh the entry
this.cache.delete(element);
this.cache.set(element, cached);
}
return cached;
}
set(element: HTMLElement, rect: DOMRect): void {
if (this.cache.size >= this.maxSize) {
// Remove oldest entry
const firstKey = this.cache.keys().next().value;
if (firstKey !== undefined) {
this.cache.delete(firstKey);
}
}
this.cache.set(element, rect);
}
clear(): void {
this.cache.clear();
}
}
interface CanvasProps {
width: number;
height: number;
onCreateRef: CreateRefCallback;
onCreateRef: (ref: React.RefObject<HTMLCanvasElement>) => void;
}
/**
@@ -26,225 +152,229 @@ export interface Coordinates {
y: number;
};
const Canvas = ({ width, height, onCreateRef }: CanvasProps) => {
const Canvas = React.memo(({ width, height, onCreateRef }: CanvasProps) => {
const canvasRef = useRef<HTMLCanvasElement>(null);
const { socket } = useSocketStore();
const { setLastAction, lastAction } = useGlobalInfoStore();
const { getText, getList } = useActionContext();
const getTextRef = useRef(getText);
const getListRef = useRef(getList);
const [datePickerInfo, setDatePickerInfo] = React.useState<{
coordinates: Coordinates;
selector: string;
} | null>(null);
const scheduler = useRef(new RAFScheduler());
const debouncer = useRef(new EventDebouncer(scheduler.current));
const measurementCache = useRef(new MeasurementCache(50));
//const performanceMonitor = useRef(new FrontendPerformanceMonitor());
const [dropdownInfo, setDropdownInfo] = React.useState<{
coordinates: Coordinates;
selector: string;
options: Array<{
value: string;
text: string;
disabled: boolean;
selected: boolean;
}>;
} | null>(null);
const refs = useRef({
getText,
getList,
lastMousePosition: { x: 0, y: 0 },
lastFrameTime: 0,
context: null as CanvasRenderingContext2D | null,
});
const [timePickerInfo, setTimePickerInfo] = React.useState<{
coordinates: Coordinates;
selector: string;
} | null>(null);
const [dateTimeLocalInfo, setDateTimeLocalInfo] = React.useState<{
coordinates: Coordinates;
selector: string;
} | null>(null);
const notifyLastAction = (action: string) => {
if (lastAction !== action) {
setLastAction(action);
const [state, dispatch] = React.useReducer((state: any, action: any) => {
switch (action.type) {
case 'BATCH_UPDATE':
return { ...state, ...action.payload };
default:
return state;
}
};
}, {
datePickerInfo: null,
dropdownInfo: null,
timePickerInfo: null,
dateTimeLocalInfo: null
});
const lastMousePosition = useRef<Coordinates>({ x: 0, y: 0 });
const getEventCoordinates = useCallback((event: MouseEvent): { x: number; y: number } => {
if (!canvasRef.current) return { x: 0, y: 0 };
useEffect(() => {
getTextRef.current = getText;
getListRef.current = getList;
}, [getText, getList]);
useEffect(() => {
if (socket) {
socket.on('showDatePicker', (info: {coordinates: Coordinates, selector: string}) => {
setDatePickerInfo(info);
});
socket.on('showDropdown', (info: {
coordinates: Coordinates,
selector: string,
options: Array<{
value: string;
text: string;
disabled: boolean;
selected: boolean;
}>;
}) => {
setDropdownInfo(info);
});
socket.on('showTimePicker', (info: {coordinates: Coordinates, selector: string}) => {
setTimePickerInfo(info);
});
socket.on('showDateTimePicker', (info: {coordinates: Coordinates, selector: string}) => {
setDateTimeLocalInfo(info);
});
return () => {
socket.off('showDatePicker');
socket.off('showDropdown');
socket.off('showTimePicker');
socket.off('showDateTimePicker');
};
let rect = measurementCache.current.get(canvasRef.current);
if (!rect) {
rect = canvasRef.current.getBoundingClientRect();
measurementCache.current.set(canvasRef.current, rect);
}
}, [socket]);
const onMouseEvent = useCallback((event: MouseEvent) => {
if (socket && canvasRef.current) {
// Get the canvas bounding rectangle
const rect = canvasRef.current.getBoundingClientRect();
const clickCoordinates = {
x: event.clientX - rect.left, // Use relative x coordinate
y: event.clientY - rect.top, // Use relative y coordinate
};
return {
x: event.clientX - rect.left,
y: event.clientY - rect.top
};
}, []);
switch (event.type) {
case 'mousedown':
if (getTextRef.current === true) {
const handleMouseEvent = useCallback((event: MouseEvent) => {
if (!socket || !canvasRef.current) return;
//performanceMonitor.current.measureEventLatency(event);
const coordinates = getEventCoordinates(event);
switch (event.type) {
case 'mousedown':
debouncer.current.add(() => {
if (refs.current.getText) {
console.log('Capturing Text...');
} else if (getListRef.current === true) {
} else if (refs.current.getList) {
console.log('Capturing List...');
} else {
socket.emit('input:mousedown', clickCoordinates);
socket.emit('input:mousedown', coordinates);
}
notifyLastAction('click');
break;
case 'mousemove':
if (lastMousePosition.current.x !== clickCoordinates.x ||
lastMousePosition.current.y !== clickCoordinates.y) {
lastMousePosition.current = {
x: clickCoordinates.x,
y: clickCoordinates.y,
};
socket.emit('input:mousemove', {
x: clickCoordinates.x,
y: clickCoordinates.y,
});
notifyLastAction('move');
}
break;
case 'wheel':
const wheelEvent = event as WheelEvent;
const deltas = {
deltaX: Math.round(wheelEvent.deltaX),
deltaY: Math.round(wheelEvent.deltaY),
};
socket.emit('input:wheel', deltas);
notifyLastAction('scroll');
break;
default:
console.log('Default mouseEvent registered');
return;
}
}
}, [socket]);
setLastAction('click');
}, true); // High priority
break;
const onKeyboardEvent = useCallback((event: KeyboardEvent) => {
if (socket) {
case 'mousemove':
if (refs.current.lastMousePosition.x !== coordinates.x ||
refs.current.lastMousePosition.y !== coordinates.y) {
debouncer.current.add(() => {
refs.current.lastMousePosition = coordinates;
socket.emit('input:mousemove', coordinates);
setLastAction('move');
});
}
break;
case 'wheel':
const wheelEvent = event as WheelEvent;
debouncer.current.add(() => {
socket.emit('input:wheel', {
deltaX: Math.round(wheelEvent.deltaX),
deltaY: Math.round(wheelEvent.deltaY)
});
setLastAction('scroll');
});
break;
}
}, [socket, getEventCoordinates]);
const handleKeyboardEvent = useCallback((event: KeyboardEvent) => {
if (!socket) return;
debouncer.current.add(() => {
switch (event.type) {
case 'keydown':
socket.emit('input:keydown', { key: event.key, coordinates: lastMousePosition.current });
notifyLastAction(`${event.key} pressed`);
socket.emit('input:keydown', {
key: event.key,
coordinates: refs.current.lastMousePosition
});
setLastAction(`${event.key} pressed`);
break;
case 'keyup':
socket.emit('input:keyup', event.key);
break;
default:
console.log('Default keyEvent registered');
return;
}
}
}, event.type === 'keydown'); // High priority for keydown
}, [socket]);
// Setup and cleanup
useEffect(() => {
if (!canvasRef.current) return;
const canvas = canvasRef.current;
refs.current.context = canvas.getContext('2d', {
alpha: false,
desynchronized: true
});
onCreateRef(canvasRef);
const options = { passive: true };
canvas.addEventListener('mousedown', handleMouseEvent, options);
canvas.addEventListener('mousemove', handleMouseEvent, options);
canvas.addEventListener('wheel', handleMouseEvent, options);
canvas.addEventListener('keydown', handleKeyboardEvent, options);
canvas.addEventListener('keyup', handleKeyboardEvent, options);
return () => {
canvas.removeEventListener('mousedown', handleMouseEvent);
canvas.removeEventListener('mousemove', handleMouseEvent);
canvas.removeEventListener('wheel', handleMouseEvent);
canvas.removeEventListener('keydown', handleKeyboardEvent);
canvas.removeEventListener('keyup', handleKeyboardEvent);
scheduler.current.clear();
debouncer.current.clear();
measurementCache.current.clear();
};
}, [handleMouseEvent, handleKeyboardEvent, onCreateRef]);
// Performance monitoring
// useEffect(() => {
// const intervalId = setInterval(() => {
// console.log('Performance Report:', performanceMonitor.current.getPerformanceReport());
// }, 20000);
// return () => clearInterval(intervalId);
// }, []);
useEffect(() => {
if (canvasRef.current) {
onCreateRef(canvasRef);
canvasRef.current.addEventListener('mousedown', onMouseEvent);
canvasRef.current.addEventListener('mousemove', onMouseEvent);
canvasRef.current.addEventListener('wheel', onMouseEvent, { passive: true });
canvasRef.current.addEventListener('keydown', onKeyboardEvent);
canvasRef.current.addEventListener('keyup', onKeyboardEvent);
if (!socket) return;
return () => {
if (canvasRef.current) {
canvasRef.current.removeEventListener('mousedown', onMouseEvent);
canvasRef.current.removeEventListener('mousemove', onMouseEvent);
canvasRef.current.removeEventListener('wheel', onMouseEvent);
canvasRef.current.removeEventListener('keydown', onKeyboardEvent);
canvasRef.current.removeEventListener('keyup', onKeyboardEvent);
}
const handlers = {
showDatePicker: (info: any) => dispatch({ type: 'BATCH_UPDATE', payload: { datePickerInfo: info } }),
showDropdown: (info: any) => dispatch({ type: 'BATCH_UPDATE', payload: { dropdownInfo: info } }),
showTimePicker: (info: any) => dispatch({ type: 'BATCH_UPDATE', payload: { timePickerInfo: info } }),
showDateTimePicker: (info: any) => dispatch({ type: 'BATCH_UPDATE', payload: { dateTimeLocalInfo: info } })
};
};
} else {
console.log('Canvas not initialized');
}
Object.entries(handlers).forEach(([event, handler]) => socket.on(event, handler));
return () => {
Object.keys(handlers).forEach(event => socket.off(event));
};
}, [socket]);
}, [onMouseEvent]);
const memoizedDimensions = useMemo(() => ({
width: width || 900,
height: height || 400
}), [width, height]);
return (
<div style={{ borderRadius: '0px 0px 5px 5px', overflow: 'hidden', backgroundColor: 'white' }}>
<div className="relative bg-white rounded-b-md overflow-hidden">
<canvas
tabIndex={0}
ref={canvasRef}
height={400}
width={900}
style={{ display: 'block' }}
height={memoizedDimensions.height}
width={memoizedDimensions.width}
className="block"
/>
{datePickerInfo && (
<DatePicker
coordinates={datePickerInfo.coordinates}
selector={datePickerInfo.selector}
onClose={() => setDatePickerInfo(null)}
/>
)}
{dropdownInfo && (
<Dropdown
coordinates={dropdownInfo.coordinates}
selector={dropdownInfo.selector}
options={dropdownInfo.options}
onClose={() => setDropdownInfo(null)}
/>
)}
{timePickerInfo && (
<TimePicker
coordinates={timePickerInfo.coordinates}
selector={timePickerInfo.selector}
onClose={() => setTimePickerInfo(null)}
/>
)}
{dateTimeLocalInfo && (
<DateTimeLocalPicker
coordinates={dateTimeLocalInfo.coordinates}
selector={dateTimeLocalInfo.selector}
onClose={() => setDateTimeLocalInfo(null)}
/>
)}
<Suspense fallback={null}>
{state.datePickerInfo && (
<DatePicker
coordinates={state.datePickerInfo.coordinates}
selector={state.datePickerInfo.selector}
onClose={() => dispatch({
type: 'BATCH_UPDATE',
payload: { datePickerInfo: null }
})}
/>
)}
{state.dropdownInfo && (
<Dropdown
coordinates={state.dropdownInfo.coordinates}
selector={state.dropdownInfo.selector}
options={state.dropdownInfo.options}
onClose={() => dispatch({
type: 'BATCH_UPDATE',
payload: { dropdownInfo: null }
})}
/>
)}
{state.timePickerInfo && (
<TimePicker
coordinates={state.timePickerInfo.coordinates}
selector={state.timePickerInfo.selector}
onClose={() => dispatch({ type: 'SET_TIME_PICKER', payload: null })}
/>
)}
{state.dateTimeLocalInfo && (
<DateTimeLocalPicker
coordinates={state.dateTimeLocalInfo.coordinates}
selector={state.dateTimeLocalInfo.selector}
onClose={() => dispatch({ type: 'SET_DATETIME_PICKER', payload: null })}
/>
)}
</Suspense>
</div>
);
});
};
Canvas.displayName = 'Canvas';
export default Canvas;

View File

@@ -113,7 +113,7 @@ const ActionDescriptionBox = () => {
return (
<CustomBoxContainer>
<Logo src={MaxunLogo} alt={t('common.maxun_logo')} />
<Logo src={MaxunLogo} alt='maxun_logo' />
<Triangle />
<Content>
{renderActionDescription()}

View File

@@ -318,7 +318,7 @@ export const NavBar: React.FC<NavBarProps> = ({
<MenuItem onClick={() => {
window.open('https://x.com/maxun_io?ref=app', '_blank');
}}>
<X sx={{ marginRight: '5px' }} /> Twiiter (X)
<X sx={{ marginRight: '5px' }} /> Twitter (X)
</MenuItem>
<MenuItem onClick={handleLangMenuOpen}>
<Language sx={{ marginRight: '5px' }} /> {t('navbar.menu_items.language')}

View File

@@ -33,10 +33,6 @@ interface Column {
format?: (value: string) => string;
}
interface Data {
id: string;
name: string;
@@ -441,7 +437,6 @@ const OptionsButton = ({ handleEdit, handleDelete, handleDuplicate }: OptionsBut
</ListItemIcon>
<ListItemText>{t('recordingtable.duplicate')}</ListItemText>
</MenuItem>
</Menu>
</>
);

View File

@@ -101,7 +101,7 @@ export const SaveRecording = ({ fileName }: SaveRecordingProps) => {
</Button>
<WarningText>
<NotificationImportantIcon color="warning" />
{t('save_recording.warnings.robot_exists')}
{t('save_recording.errors.exists_warning')}
</WarningText>
</React.Fragment>)
: <Button type="submit" variant="contained" sx={{ marginTop: '10px' }}>

View File

@@ -79,12 +79,13 @@ export const ScheduleSettingsModal = ({ isOpen, handleStart, handleClose, initia
'SUNDAY'
];
const { recordingId } = useGlobalInfoStore();
const { recordingId, notify } = useGlobalInfoStore();
const deleteRobotSchedule = () => {
if (recordingId) {
deleteSchedule(recordingId);
setSchedule(null);
notify('success', t('Schedule deleted successfully'));
} else {
console.error('No recording id provided');
}

View File

@@ -124,7 +124,11 @@ const ApiKeyManager = () => {
<TableBody>
<TableRow>
<TableCell>{apiKeyName}</TableCell>
<TableCell>{showKey ? `${apiKey?.substring(0, 10)}...` : '***************'}</TableCell>
<TableCell>
<Box sx={{ fontFamily: 'monospace', width: '10ch' }}>
{showKey ? `${apiKey?.substring(0, 10)}...` : '**********'}
</Box>
</TableCell>
<TableCell>
<Tooltip title={t('apikey.actions.copy')}>
<IconButton onClick={copyToClipboard}>

View File

@@ -9,10 +9,11 @@ import { useBrowserSteps, TextStep } from '../../context/browserSteps';
import { useGlobalInfoStore } from '../../context/globalInfo';
import { useTranslation } from 'react-i18next';
interface ElementInfo {
tagName: string;
hasOnlyText?: boolean;
isIframeContent?: boolean;
isShadowRoot?: boolean;
innerText?: string;
url?: string;
imageUrl?: string;
@@ -68,7 +69,7 @@ export const BrowserWindow = () => {
const { socket } = useSocketStore();
const { notify } = useGlobalInfoStore();
const { getText, getList, paginationMode, paginationType, limitMode } = useActionContext();
const { getText, getList, paginationMode, paginationType, limitMode, captureStage } = useActionContext();
const { addTextStep, addListStep } = useBrowserSteps();
const onMouseMove = (e: MouseEvent) => {
@@ -117,34 +118,81 @@ export const BrowserWindow = () => {
}, [screenShot, canvasRef, socket, screencastHandler]);
const highlighterHandler = useCallback((data: { rect: DOMRect, selector: string, elementInfo: ElementInfo | null, childSelectors?: string[] }) => {
console.log("LIST SELECTOR", listSelector);
console.log("DATA SELECTOR", data.selector);
console.log("CHILD SELECTORS", data.childSelectors);
if (getList === true) {
if (listSelector) {
socket?.emit('listSelector', { selector: listSelector });
const hasValidChildSelectors = Array.isArray(data.childSelectors) && data.childSelectors.length > 0;
if (limitMode) {
setHighlighterData(null);
} else if (paginationMode) {
// only set highlighterData if type is not empty, 'none', 'scrollDown', or 'scrollUp'
// Only set highlighterData if type is not empty, 'none', 'scrollDown', or 'scrollUp'
if (paginationType !== '' && !['none', 'scrollDown', 'scrollUp'].includes(paginationType)) {
setHighlighterData(data);
} else {
setHighlighterData(null);
}
} else if (data.childSelectors && data.childSelectors.includes(data.selector)) {
// highlight only valid child elements within the listSelector
// Highlight only valid child elements within the listSelector
setHighlighterData(data);
} else {
} else if (data.elementInfo?.isIframeContent && data.childSelectors) {
// Handle pure iframe elements - similar to previous shadow DOM logic but using iframe syntax
// Check if the selector matches any iframe child selectors
const isIframeChild = data.childSelectors.some(childSelector =>
data.selector.includes(':>>') && // Iframe uses :>> for traversal
childSelector.split(':>>').some(part =>
data.selector.includes(part.trim())
)
);
setHighlighterData(isIframeChild ? data : null);
} else if (data.selector.includes(':>>') && hasValidChildSelectors) {
// Handle mixed DOM cases with iframes
// Split the selector into parts and check each against child selectors
const selectorParts = data.selector.split(':>>').map(part => part.trim());
const isValidMixedSelector = selectorParts.some(part =>
// We know data.childSelectors is defined due to hasValidChildSelectors check
data.childSelectors!.some(childSelector =>
childSelector.includes(part)
)
);
setHighlighterData(isValidMixedSelector ? data : null);
} else if (data.elementInfo?.isShadowRoot && data.childSelectors) {
// New case: Handle pure Shadow DOM elements
// Check if the selector matches any shadow root child selectors
const isShadowChild = data.childSelectors.some(childSelector =>
data.selector.includes('>>') && // Shadow DOM uses >> for piercing
childSelector.split('>>').some(part =>
data.selector.includes(part.trim())
)
);
setHighlighterData(isShadowChild ? data : null);
} else if (data.selector.includes('>>') && hasValidChildSelectors) {
// New case: Handle mixed DOM cases
// Split the selector into parts and check each against child selectors
const selectorParts = data.selector.split('>>').map(part => part.trim());
const isValidMixedSelector = selectorParts.some(part =>
// Now we know data.childSelectors is defined
data.childSelectors!.some(childSelector =>
childSelector.includes(part)
)
);
setHighlighterData(isValidMixedSelector ? data : null);
} else {
// if !valid child in normal mode, clear the highlighter
setHighlighterData(null);
}
} else {
// set highlighterData for the initial listSelector selection
}
} else {
// Set highlighterData for the initial listSelector selection
setHighlighterData(data);
}
} else {
// for non-list steps
}
} else {
// For non-list steps
setHighlighterData(data);
}
}, [highlighterData, getList, socket, listSelector, paginationMode, paginationType]);
}
}, [highlighterData, getList, socket, listSelector, paginationMode, paginationType, captureStage]);
useEffect(() => {
@@ -158,6 +206,13 @@ export const BrowserWindow = () => {
};
}, [socket, onMouseMove]);
useEffect(() => {
if (captureStage === 'initial' && listSelector) {
socket?.emit('setGetList', { getList: true });
socket?.emit('listSelector', { selector: listSelector });
}
}, [captureStage, listSelector, socket]);
const handleClick = (e: React.MouseEvent<HTMLDivElement>) => {
if (highlighterData && canvasRef?.current) {
const canvasRect = canvasRef.current.getBoundingClientRect();
@@ -185,6 +240,7 @@ export const BrowserWindow = () => {
addTextStep('', data, {
selector: highlighterData.selector,
tag: highlighterData.elementInfo?.tagName,
shadow: highlighterData.elementInfo?.isShadowRoot,
attribute
});
} else {
@@ -192,7 +248,7 @@ export const BrowserWindow = () => {
setAttributeOptions(options);
setSelectedElement({
selector: highlighterData.selector,
info: highlighterData.elementInfo
info: highlighterData.elementInfo,
});
setShowAttributeModal(true);
}
@@ -229,6 +285,7 @@ export const BrowserWindow = () => {
selectorObj: {
selector: highlighterData.selector,
tag: highlighterData.elementInfo?.tagName,
shadow: highlighterData.elementInfo?.isShadowRoot,
attribute
}
};
@@ -276,6 +333,7 @@ export const BrowserWindow = () => {
addTextStep('', data, {
selector: selectedElement.selector,
tag: selectedElement.info?.tagName,
shadow: selectedElement.info?.isShadowRoot,
attribute: attribute
});
}
@@ -288,6 +346,7 @@ export const BrowserWindow = () => {
selectorObj: {
selector: selectedElement.selector,
tag: selectedElement.info?.tagName,
shadow: selectedElement.info?.isShadowRoot,
attribute: attribute
}
};
@@ -319,7 +378,6 @@ export const BrowserWindow = () => {
}
}, [paginationMode, resetPaginationSelector]);
return (
<div onClick={handleClick} style={{ width: '900px' }} id="browser-window">
{

View File

@@ -56,6 +56,8 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
const [showCaptureText, setShowCaptureText] = useState(true);
const [hoverStates, setHoverStates] = useState<{ [id: string]: boolean }>({});
const [browserStepIdList, setBrowserStepIdList] = useState<number[]>([]);
const [isCaptureTextConfirmed, setIsCaptureTextConfirmed] = useState(false);
const [isCaptureListConfirmed, setIsCaptureListConfirmed] = useState(false);
const { lastAction, notify, currentWorkflowActionsState, setCurrentWorkflowActionsState, resetInterpretationLog } = useGlobalInfoStore();
const { getText, startGetText, stopGetText, getScreenshot, startGetScreenshot, stopGetScreenshot, getList, startGetList, stopGetList, startPaginationMode, stopPaginationMode, paginationType, updatePaginationType, limitType, customLimit, updateLimitType, updateCustomLimit, stopLimitMode, startLimitMode, captureStage, setCaptureStage } = useActionContext();
@@ -130,6 +132,16 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
const handlePairDelete = () => { }
const handleStartGetText = () => {
setIsCaptureTextConfirmed(false);
startGetText();
}
const handleStartGetList = () => {
setIsCaptureListConfirmed(false);
startGetList();
}
const handleTextLabelChange = (id: number, label: string, listId?: number, fieldKey?: string) => {
if (listId !== undefined && fieldKey !== undefined) {
// Prevent editing if the field is confirmed
@@ -169,6 +181,22 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
});
};
const handleTextStepDelete = (id: number) => {
deleteBrowserStep(id);
setTextLabels(prevLabels => {
const { [id]: _, ...rest } = prevLabels;
return rest;
});
setConfirmedTextSteps(prev => {
const { [id]: _, ...rest } = prev;
return rest;
});
setErrors(prevErrors => {
const { [id]: _, ...rest } = prevErrors;
return rest;
});
};
const handleListTextFieldConfirm = (listId: number, fieldKey: string) => {
setConfirmedListTextFields(prev => ({
...prev,
@@ -195,6 +223,22 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
});
};
const handleListTextFieldDelete = (listId: number, fieldKey: string) => {
removeListTextField(listId, fieldKey);
setConfirmedListTextFields(prev => {
const updatedListFields = { ...(prev[listId] || {}) };
delete updatedListFields[fieldKey];
return {
...prev,
[listId]: updatedListFields
};
});
setErrors(prev => {
const { [fieldKey]: _, ...rest } = prev;
return rest;
});
};
const getTextSettingsObject = useCallback(() => {
const settings: Record<string, { selector: string; tag?: string;[key: string]: any }> = {};
browserSteps.forEach(step => {
@@ -224,6 +268,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
if (hasTextSteps) {
socket?.emit('action', { action: 'scrapeSchema', settings });
}
setIsCaptureTextConfirmed(true);
resetInterpretationLog();
onFinishCapture();
}, [stopGetText, getTextSettingsObject, socket, browserSteps, confirmedTextSteps, resetInterpretationLog]);
@@ -326,6 +371,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
}
stopLimitMode();
setShowLimitOptions(false);
setIsCaptureListConfirmed(true);
stopCaptureAndEmitGetListSettings();
setCaptureStage('complete');
break;
@@ -336,6 +382,23 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
}
}, [captureStage, paginationType, limitType, customLimit, startPaginationMode, stopPaginationMode, startLimitMode, stopLimitMode, notify, stopCaptureAndEmitGetListSettings, getListSettingsObject]);
const handleBackCaptureList = useCallback(() => {
switch (captureStage) {
case 'limit':
stopLimitMode();
setShowLimitOptions(false);
startPaginationMode();
setShowPaginationOptions(true);
setCaptureStage('pagination');
break;
case 'pagination':
stopPaginationMode();
setShowPaginationOptions(false);
setCaptureStage('initial');
break;
}
}, [captureStage, stopLimitMode, startPaginationMode, stopPaginationMode]);
const handlePaginationSettingSelect = (option: PaginationType) => {
updatePaginationType(option);
};
@@ -350,6 +413,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
setTextLabels({});
setErrors({});
setConfirmedTextSteps({});
setIsCaptureTextConfirmed(false);
notify('error', t('right_panel.errors.capture_text_discarded'));
}, [browserSteps, stopGetText, deleteBrowserStep]);
@@ -365,6 +429,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
setShowLimitOptions(false);
setCaptureStage('initial');
setConfirmedListTextFields({});
setIsCaptureListConfirmed(false);
notify('error', t('right_panel.errors.capture_list_discarded'));
}, [browserSteps, stopGetList, deleteBrowserStep, resetListState]);
@@ -408,6 +473,14 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
{getList && (
<>
<Box display="flex" justifyContent="space-between" gap={2} style={{ margin: '15px' }}>
{(captureStage === 'pagination' || captureStage === 'limit') && (
<Button
variant="outlined"
onClick={handleBackCaptureList}
>
{t('right_panel.buttons.back')}
</Button>
)}
<Button
variant="outlined"
onClick={handleConfirmListCapture}
@@ -418,7 +491,9 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
captureStage === 'limit' ? t('right_panel.buttons.confirm_limit') :
t('right_panel.buttons.finish_capture')}
</Button>
<Button variant="outlined" color="error" onClick={discardGetList}>{t('right_panel.buttons.discard')}</Button>
<Button variant="outlined" color="error" onClick={discardGetList}>
{t('right_panel.buttons.discard')}
</Button>
</Box>
</>
)}
@@ -454,7 +529,22 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
<TextField
type="number"
value={customLimit}
onChange={(e) => updateCustomLimit(e.target.value)}
onChange={(e: React.ChangeEvent<HTMLInputElement>) => {
const value = parseInt(e.target.value);
// Only update if the value is greater than or equal to 1 or if the field is empty
if (e.target.value === '' || value >= 1) {
updateCustomLimit(e.target.value);
}
}}
inputProps={{
min: 1,
onKeyPress: (e: React.KeyboardEvent<HTMLInputElement>) => {
const value = (e.target as HTMLInputElement).value + e.key;
if (parseInt(value) < 1) {
e.preventDefault();
}
}
}}
placeholder={t('right_panel.limit.enter_number')}
sx={{
marginLeft: '10px',
@@ -470,7 +560,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
</RadioGroup>
</FormControl>
)}
{!getText && !getScreenshot && !getList && showCaptureText && <Button variant="contained" onClick={startGetText}>{t('right_panel.buttons.capture_text')}</Button>}
{!getText && !getScreenshot && !getList && showCaptureText && <Button variant="contained" onClick={handleStartGetText}>{t('right_panel.buttons.capture_text')}</Button>}
{getText &&
<>
<Box display="flex" justifyContent="space-between" gap={2} style={{ margin: '15px' }}>
@@ -526,11 +616,21 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
)
}}
/>
{!confirmedTextSteps[step.id] && (
{!confirmedTextSteps[step.id] ? (
<Box display="flex" justifyContent="space-between" gap={2}>
<Button variant="contained" onClick={() => handleTextStepConfirm(step.id)} disabled={!textLabels[step.id]?.trim()}>{t('right_panel.buttons.confirm')}</Button>
<Button variant="contained" color="error" onClick={() => handleTextStepDiscard(step.id)}>{t('right_panel.buttons.discard')}</Button>
</Box>
) : !isCaptureTextConfirmed && (
<Box display="flex" justifyContent="flex-end" gap={2}>
<Button
variant="contained"
color="error"
onClick={() => handleTextStepDelete(step.id)}
>
{t('right_panel.buttons.delete')}
</Button>
</Box>
)}
</>
)}
@@ -548,61 +648,70 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
Object.entries(step.fields).length === 0 ? (
<Typography>{t('right_panel.messages.list_empty')}</Typography>
) : (
<>
<Typography>{t('right_panel.messages.list_selected')}</Typography>
{Object.entries(step.fields).map(([key, field]) => (
<Box key={key}>
<TextField
label={t('right_panel.fields.field_label')}
value={field.label || ''}
onChange={(e) => handleTextLabelChange(field.id, e.target.value, step.id, key)}
fullWidth
margin="normal"
InputProps={{
readOnly: confirmedListTextFields[field.id]?.[key],
startAdornment: (
<InputAdornment position="start">
<EditIcon />
</InputAdornment>
)
}}
/>
<TextField
label={t('right_panel.fields.field_data')}
value={field.data || ''}
fullWidth
margin="normal"
InputProps={{
readOnly: true,
startAdornment: (
<InputAdornment position="start">
<TextFieldsIcon />
</InputAdornment>
)
}}
/>
{!confirmedListTextFields[step.id]?.[key] && (
<Box display="flex" justifyContent="space-between" gap={2}>
<Button
variant="contained"
onClick={() => handleListTextFieldConfirm(step.id, key)}
disabled={!field.label?.trim()}
>
{t('right_panel.buttons.confirm')}
</Button>
<Button
variant="contained"
color="error"
onClick={() => handleListTextFieldDiscard(step.id, key)}
>
{t('right_panel.buttons.discard')}
</Button>
</Box>
)}
</Box>
))}
</>
)
<>
<Typography>{t('right_panel.messages.list_selected')}</Typography>
{Object.entries(step.fields).map(([key, field]) => (
<Box key={key}>
<TextField
label={t('right_panel.fields.field_label')}
value={field.label || ''}
onChange={(e) => handleTextLabelChange(field.id, e.target.value, step.id, key)}
fullWidth
margin="normal"
InputProps={{
readOnly: confirmedListTextFields[field.id]?.[key],
startAdornment: (
<InputAdornment position="start">
<EditIcon />
</InputAdornment>
)
}}
/>
<TextField
label={t('right_panel.fields.field_data')}
value={field.data || ''}
fullWidth
margin="normal"
InputProps={{
readOnly: true,
startAdornment: (
<InputAdornment position="start">
<TextFieldsIcon />
</InputAdornment>
)
}}
/>
{!confirmedListTextFields[step.id]?.[key] ? (
<Box display="flex" justifyContent="space-between" gap={2}>
<Button
variant="contained"
onClick={() => handleListTextFieldConfirm(step.id, key)}
disabled={!field.label?.trim()}
>
{t('right_panel.buttons.confirm')}
</Button>
<Button
variant="contained"
color="error"
onClick={() => handleListTextFieldDiscard(step.id, key)}
>
{t('right_panel.buttons.discard')}
</Button>
</Box>
) : !isCaptureListConfirmed && (
<Box display="flex" justifyContent="flex-end" gap={2}>
<Button
variant="contained"
color="error"
onClick={() => handleListTextFieldDelete(step.id, key)}
>
{t('right_panel.buttons.delete')}
</Button>
</Box>
)}
</Box>
))}
</>
)}
</Box>
))}

View File

@@ -32,6 +32,7 @@ export interface SelectorObject {
selector: string;
tag?: string;
attribute?: string;
shadow?: boolean;
[key: string]: any;
}