Merge pull request #22 from amhsirak/develop
feat: `scrapeList` & `scrapeListAuto` actions
This commit is contained in:
@@ -23,7 +23,11 @@
|
|||||||
"author": "Karishma Shukla",
|
"author": "Karishma Shukla",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"@cliqz/adblocker-playwright": "^1.31.3",
|
||||||
|
"cross-fetch": "^4.0.0",
|
||||||
"joi": "^17.6.0",
|
"joi": "^17.6.0",
|
||||||
"playwright": "^1.20.1"
|
"playwright": "^1.20.1",
|
||||||
|
"playwright-extra": "^4.3.6",
|
||||||
|
"puppeteer-extra-plugin-stealth": "^2.11.2"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -126,6 +126,85 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function scrollDownToLoadMore(selector, limit) {
|
||||||
|
let previousHeight = 0;
|
||||||
|
let itemsLoaded = 0;
|
||||||
|
|
||||||
|
while (itemsLoaded < limit) {
|
||||||
|
window.scrollBy(0, window.innerHeight);
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||||
|
|
||||||
|
const currentHeight = document.body.scrollHeight;
|
||||||
|
|
||||||
|
if (currentHeight === previousHeight) {
|
||||||
|
break; // No more items to load
|
||||||
|
}
|
||||||
|
|
||||||
|
previousHeight = currentHeight;
|
||||||
|
itemsLoaded += document.querySelectorAll(selector).length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function scrollUpToLoadMore(selector, limit) {
|
||||||
|
let previousHeight = 0;
|
||||||
|
let itemsLoaded = 0;
|
||||||
|
|
||||||
|
while (itemsLoaded < limit) {
|
||||||
|
window.scrollBy(0, -window.innerHeight);
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||||
|
|
||||||
|
const currentHeight = document.body.scrollHeight;
|
||||||
|
|
||||||
|
if (currentHeight === previousHeight) {
|
||||||
|
break; // No more items to load
|
||||||
|
}
|
||||||
|
|
||||||
|
previousHeight = currentHeight;
|
||||||
|
itemsLoaded += document.querySelectorAll(selector).length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function clickNextPagination(selector, scrapedData, limit) {
|
||||||
|
// Check if the limit is already met
|
||||||
|
if (scrapedData.length >= limit) {
|
||||||
|
return false; // Return false to indicate no further action is needed
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if a single "Next" button exists
|
||||||
|
let nextButton = document.querySelector(selector);
|
||||||
|
|
||||||
|
if (nextButton) {
|
||||||
|
nextButton.click();
|
||||||
|
return true; // Indicate that pagination occurred
|
||||||
|
} else {
|
||||||
|
// Handle pagination with numbers
|
||||||
|
const paginationButtons = document.querySelectorAll(selector);
|
||||||
|
let clicked = false;
|
||||||
|
|
||||||
|
// Loop through pagination buttons to find the current active page
|
||||||
|
for (let i = 0; i < paginationButtons.length - 1; i++) {
|
||||||
|
const button = paginationButtons[i];
|
||||||
|
if (button.classList.contains('active')) {
|
||||||
|
// Click the next button if available
|
||||||
|
const nextButtonInPagination = paginationButtons[i + 1];
|
||||||
|
if (nextButtonInPagination) {
|
||||||
|
nextButtonInPagination.click();
|
||||||
|
clicked = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no next button was clicked, we might be on the last page
|
||||||
|
if (!clicked) {
|
||||||
|
throw new Error("No more items to load or pagination has ended.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return clicked; // Indicate whether pagination occurred
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a "scrape" result from the current page.
|
* Returns a "scrape" result from the current page.
|
||||||
* @returns {Array<Object>} *Curated* array of scraped information (with sparse rows removed)
|
* @returns {Array<Object>} *Curated* array of scraped information (with sparse rows removed)
|
||||||
@@ -183,6 +262,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* TODO: Simplify.
|
||||||
* Given an object with named lists of elements,
|
* Given an object with named lists of elements,
|
||||||
* groups the elements by their distance in the DOM tree.
|
* groups the elements by their distance in the DOM tree.
|
||||||
* @param {Object.<string, {selector: string, tag: string}>} lists The named lists of HTML elements.
|
* @param {Object.<string, {selector: string, tag: string}>} lists The named lists of HTML elements.
|
||||||
@@ -250,4 +330,134 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scrapes multiple lists of similar items based on a template item.
|
||||||
|
* @param {Object} config - Configuration object
|
||||||
|
* @param {string} config.listSelector - Selector for the list container(s)
|
||||||
|
* @param {Object.<string, {selector: string, attribute?: string}>} config.fields - Fields to scrape
|
||||||
|
* @param {number} [config.limit] - Maximum number of items to scrape per list (optional)
|
||||||
|
* @param {boolean} [config.flexible=false] - Whether to use flexible matching for field selectors
|
||||||
|
* @returns {Array.<Array.<Object>>} Array of arrays of scraped items, one sub-array per list
|
||||||
|
*/
|
||||||
|
window.scrapeList = async function ({ listSelector, fields, limit = 10 }) {
|
||||||
|
const scrapedData = [];
|
||||||
|
|
||||||
|
while (scrapedData.length < limit) {
|
||||||
|
// Get all parent elements matching the listSelector
|
||||||
|
const parentElements = Array.from(document.querySelectorAll(listSelector));
|
||||||
|
|
||||||
|
// Iterate through each parent element
|
||||||
|
for (const parent of parentElements) {
|
||||||
|
if (scrapedData.length >= limit) break;
|
||||||
|
const record = {};
|
||||||
|
|
||||||
|
// For each field, select the corresponding element within the parent
|
||||||
|
for (const [label, { selector, attribute }] of Object.entries(fields)) {
|
||||||
|
const fieldElement = parent.querySelector(selector);
|
||||||
|
|
||||||
|
if (fieldElement) {
|
||||||
|
if (attribute === 'innerText') {
|
||||||
|
record[label] = fieldElement.innerText.trim();
|
||||||
|
} else if (attribute === 'innerHTML') {
|
||||||
|
record[label] = fieldElement.innerHTML.trim();
|
||||||
|
} else if (attribute === 'src') {
|
||||||
|
record[label] = fieldElement.src;
|
||||||
|
} else if (attribute === 'href') {
|
||||||
|
record[label] = fieldElement.href;
|
||||||
|
} else {
|
||||||
|
record[label] = fieldElement.getAttribute(attribute);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
scrapedData.push(record);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return scrapedData
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets all children of the elements matching the listSelector,
|
||||||
|
* returning their CSS selectors and innerText.
|
||||||
|
* @param {string} listSelector - Selector for the list container(s)
|
||||||
|
* @returns {Array.<Object>} Array of objects, each containing the CSS selector and innerText of the children
|
||||||
|
*/
|
||||||
|
window.scrapeListAuto = function (listSelector) {
|
||||||
|
const lists = Array.from(document.querySelectorAll(listSelector));
|
||||||
|
|
||||||
|
const results = [];
|
||||||
|
|
||||||
|
lists.forEach(list => {
|
||||||
|
const children = Array.from(list.children);
|
||||||
|
|
||||||
|
children.forEach(child => {
|
||||||
|
const selectors = [];
|
||||||
|
let element = child;
|
||||||
|
|
||||||
|
// Traverse up to gather the CSS selector for the element
|
||||||
|
while (element && element !== document) {
|
||||||
|
let selector = element.nodeName.toLowerCase();
|
||||||
|
if (element.id) {
|
||||||
|
selector += `#${element.id}`;
|
||||||
|
selectors.push(selector);
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
const className = element.className.trim().split(/\s+/).join('.');
|
||||||
|
if (className) {
|
||||||
|
selector += `.${className}`;
|
||||||
|
}
|
||||||
|
selectors.push(selector);
|
||||||
|
element = element.parentElement;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
results.push({
|
||||||
|
selector: selectors.reverse().join(' > '),
|
||||||
|
innerText: child.innerText.trim()
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return results;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
window.scrollDown = async function (selector, limit) {
|
||||||
|
let previousHeight = 0;
|
||||||
|
let itemsLoaded = 0;
|
||||||
|
|
||||||
|
while (itemsLoaded < limit) {
|
||||||
|
window.scrollTo(0, document.body.scrollHeight);
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||||
|
|
||||||
|
const currentHeight = document.body.scrollHeight;
|
||||||
|
|
||||||
|
if (currentHeight === previousHeight) {
|
||||||
|
break; // No more items to load
|
||||||
|
}
|
||||||
|
|
||||||
|
previousHeight = currentHeight;
|
||||||
|
itemsLoaded += document.querySelectorAll(selector).length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
window.scrollUp = async function (selector, limit) {
|
||||||
|
let previousHeight = 0;
|
||||||
|
let itemsLoaded = 0;
|
||||||
|
|
||||||
|
while (itemsLoaded < limit) {
|
||||||
|
window.scrollBy(0, -window.innerHeight);
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||||
|
|
||||||
|
const currentHeight = document.body.scrollHeight;
|
||||||
|
|
||||||
|
if (currentHeight === previousHeight) {
|
||||||
|
break; // No more items to load
|
||||||
|
}
|
||||||
|
|
||||||
|
previousHeight = currentHeight;
|
||||||
|
itemsLoaded += document.querySelectorAll(selector).length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
})(window);
|
})(window);
|
||||||
@@ -1,5 +1,7 @@
|
|||||||
/* eslint-disable no-await-in-loop, no-restricted-syntax */
|
/* eslint-disable no-await-in-loop, no-restricted-syntax */
|
||||||
import { Page, PageScreenshotOptions } from 'playwright';
|
import { Page, PageScreenshotOptions } from 'playwright';
|
||||||
|
import { PlaywrightBlocker } from '@cliqz/adblocker-playwright';
|
||||||
|
import fetch from 'cross-fetch';
|
||||||
import path from 'path';
|
import path from 'path';
|
||||||
|
|
||||||
import { EventEmitter } from 'events';
|
import { EventEmitter } from 'events';
|
||||||
@@ -29,6 +31,7 @@ interface InterpreterOptions {
|
|||||||
}>
|
}>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class for running the Smart Workflows.
|
* Class for running the Smart Workflows.
|
||||||
*/
|
*/
|
||||||
@@ -45,6 +48,8 @@ export default class Interpreter extends EventEmitter {
|
|||||||
|
|
||||||
private log: typeof log;
|
private log: typeof log;
|
||||||
|
|
||||||
|
private blocker: PlaywrightBlocker | null = null;
|
||||||
|
|
||||||
constructor(workflow: WorkflowFile, options?: Partial<InterpreterOptions>) {
|
constructor(workflow: WorkflowFile, options?: Partial<InterpreterOptions>) {
|
||||||
super();
|
super();
|
||||||
this.workflow = workflow.workflow;
|
this.workflow = workflow.workflow;
|
||||||
@@ -76,6 +81,24 @@ export default class Interpreter extends EventEmitter {
|
|||||||
oldLog(...args);
|
oldLog(...args);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PlaywrightBlocker.fromPrebuiltAdsAndTracking(fetch).then(blocker => {
|
||||||
|
this.blocker = blocker;
|
||||||
|
}).catch(err => {
|
||||||
|
this.log(`Failed to initialize ad-blocker:`, Level.ERROR);
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
private async applyAdBlocker(page: Page): Promise<void> {
|
||||||
|
if (this.blocker) {
|
||||||
|
await this.blocker.enableBlockingInPage(page);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async disableAdBlocker(page: Page): Promise<void> {
|
||||||
|
if (this.blocker) {
|
||||||
|
await this.blocker.disableBlockingInPage(page);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -285,11 +308,32 @@ export default class Interpreter extends EventEmitter {
|
|||||||
|
|
||||||
scrapeSchema: async (schema: Record<string, { selector: string; tag: string, attribute: string; }>) => {
|
scrapeSchema: async (schema: Record<string, { selector: string; tag: string, attribute: string; }>) => {
|
||||||
await this.ensureScriptsLoaded(page);
|
await this.ensureScriptsLoaded(page);
|
||||||
|
|
||||||
const scrapeResult = await page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema);
|
const scrapeResult = await page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema);
|
||||||
await this.options.serializableCallback(scrapeResult);
|
await this.options.serializableCallback(scrapeResult);
|
||||||
},
|
},
|
||||||
|
|
||||||
|
scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => {
|
||||||
|
await this.ensureScriptsLoaded(page);
|
||||||
|
if (!config.pagination) {
|
||||||
|
const scrapeResults: Record<string, any>[] = await page.evaluate((cfg) => window.scrapeList(cfg), config);
|
||||||
|
await this.options.serializableCallback(scrapeResults);
|
||||||
|
} else {
|
||||||
|
const scrapeResults: Record<string, any>[] = await this.handlePagination(page, config);
|
||||||
|
await this.options.serializableCallback(scrapeResults);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
scrapeListAuto: async (config: { listSelector: string }) => {
|
||||||
|
await this.ensureScriptsLoaded(page);
|
||||||
|
|
||||||
|
const scrapeResults: { selector: string, innerText: string }[] = await page.evaluate((listSelector) => {
|
||||||
|
return window.scrapeListAuto(listSelector);
|
||||||
|
}, config.listSelector);
|
||||||
|
|
||||||
|
await this.options.serializableCallback(scrapeResults);
|
||||||
|
},
|
||||||
|
|
||||||
scroll: async (pages?: number) => {
|
scroll: async (pages?: number) => {
|
||||||
await page.evaluate(async (pagesInternal) => {
|
await page.evaluate(async (pagesInternal) => {
|
||||||
for (let i = 1; i <= (pagesInternal ?? 1); i += 1) {
|
for (let i = 1; i <= (pagesInternal ?? 1); i += 1) {
|
||||||
@@ -298,6 +342,7 @@ export default class Interpreter extends EventEmitter {
|
|||||||
}
|
}
|
||||||
}, pages ?? 1);
|
}, pages ?? 1);
|
||||||
},
|
},
|
||||||
|
|
||||||
script: async (code: string) => {
|
script: async (code: string) => {
|
||||||
const AsyncFunction: FunctionConstructor = Object.getPrototypeOf(
|
const AsyncFunction: FunctionConstructor = Object.getPrototypeOf(
|
||||||
async () => { },
|
async () => { },
|
||||||
@@ -305,6 +350,7 @@ export default class Interpreter extends EventEmitter {
|
|||||||
const x = new AsyncFunction('page', 'log', code);
|
const x = new AsyncFunction('page', 'log', code);
|
||||||
await x(page, this.log);
|
await x(page, this.log);
|
||||||
},
|
},
|
||||||
|
|
||||||
flag: async () => new Promise((res) => {
|
flag: async () => new Promise((res) => {
|
||||||
this.emit('flag', page, res);
|
this.emit('flag', page, res);
|
||||||
}),
|
}),
|
||||||
@@ -338,7 +384,82 @@ export default class Interpreter extends EventEmitter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private async handlePagination(page: Page, config: { listSelector: string, fields: any, limit?: number, pagination: any }) {
|
||||||
|
let allResults: Record<string, any>[] = [];
|
||||||
|
let previousHeight = 0;
|
||||||
|
// track unique items per page to avoid re-scraping
|
||||||
|
let scrapedItems: Set<string> = new Set<string>();
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
switch (config.pagination.type) {
|
||||||
|
case 'scrollDown':
|
||||||
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||||
|
await page.waitForTimeout(2000);
|
||||||
|
|
||||||
|
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||||
|
if (currentHeight === previousHeight) {
|
||||||
|
const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config);
|
||||||
|
allResults = allResults.concat(finalResults);
|
||||||
|
return allResults;
|
||||||
|
}
|
||||||
|
|
||||||
|
previousHeight = currentHeight;
|
||||||
|
break;
|
||||||
|
case 'scrollUp':
|
||||||
|
break;
|
||||||
|
case 'clickNext':
|
||||||
|
const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config);
|
||||||
|
|
||||||
|
// Filter out already scraped items
|
||||||
|
const newResults = pageResults.filter(item => {
|
||||||
|
const uniqueKey = JSON.stringify(item);
|
||||||
|
if (scrapedItems.has(uniqueKey)) return false; // Ignore if already scraped
|
||||||
|
scrapedItems.add(uniqueKey); // Mark as scraped
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
|
||||||
|
allResults = allResults.concat(newResults);
|
||||||
|
|
||||||
|
if (config.limit && allResults.length >= config.limit) {
|
||||||
|
return allResults.slice(0, config.limit);
|
||||||
|
}
|
||||||
|
|
||||||
|
const nextButton = await page.$(config.pagination.selector);
|
||||||
|
if (!nextButton) {
|
||||||
|
return allResults; // No more pages to scrape
|
||||||
|
}
|
||||||
|
await Promise.all([
|
||||||
|
nextButton.click(),
|
||||||
|
page.waitForNavigation({ waitUntil: 'networkidle' })
|
||||||
|
]);
|
||||||
|
|
||||||
|
await page.waitForTimeout(1000);
|
||||||
|
break;
|
||||||
|
case 'clickLoadMore':
|
||||||
|
const loadMoreButton = await page.$(config.pagination.selector);
|
||||||
|
if (!loadMoreButton) {
|
||||||
|
return allResults;
|
||||||
|
}
|
||||||
|
await loadMoreButton.click();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
const results = await page.evaluate((cfg) => window.scrapeList(cfg), config);
|
||||||
|
allResults = allResults.concat(results);
|
||||||
|
return allResults;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (config.limit && allResults.length >= config.limit) {
|
||||||
|
allResults = allResults.slice(0, config.limit);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return allResults;
|
||||||
|
}
|
||||||
|
|
||||||
private async runLoop(p: Page, workflow: Workflow) {
|
private async runLoop(p: Page, workflow: Workflow) {
|
||||||
|
// apply ad-blocker to the current page
|
||||||
|
await this.applyAdBlocker(p);
|
||||||
const usedActions: string[] = [];
|
const usedActions: string[] = [];
|
||||||
let lastAction = null;
|
let lastAction = null;
|
||||||
let repeatCount = 0;
|
let repeatCount = 0;
|
||||||
@@ -404,13 +525,14 @@ export default class Interpreter extends EventEmitter {
|
|||||||
this.log(<Error>e, Level.ERROR);
|
this.log(<Error>e, Level.ERROR);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
//await this.disableAdBlocker(p);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private async ensureScriptsLoaded(page: Page) {
|
private async ensureScriptsLoaded(page: Page) {
|
||||||
const isScriptLoaded = await page.evaluate(() => typeof window.scrape === 'function' && typeof window.scrapeSchema === 'function');
|
const isScriptLoaded = await page.evaluate(() => typeof window.scrape === 'function' && typeof window.scrapeSchema === 'function' && typeof window.scrapeList === 'function' && typeof window.scrapeListAuto === 'function' && typeof window.scrollDown === 'function' && typeof window.scrollUp === 'function');
|
||||||
if (!isScriptLoaded) {
|
if (!isScriptLoaded) {
|
||||||
await page.addInitScript({ path: path.join(__dirname, 'browserSide', 'scraper.js') });
|
await page.addInitScript({ path: path.join(__dirname, 'browserSide', 'scraper.js') });
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -46,11 +46,11 @@ export default class Preprocessor {
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extracts parameter names from the workflow.
|
* Extracts parameter names from the workflow.
|
||||||
* @param {WorkflowFile} workflow The given workflow
|
* @param {WorkflowFile} workflow The given workflow
|
||||||
* @returns {String[]} List of parameters' names.
|
* @returns {String[]} List of parameters' names.
|
||||||
*/
|
*/
|
||||||
static getParams(workflow: WorkflowFile): string[] {
|
static getParams(workflow: WorkflowFile): string[] {
|
||||||
const getParamsRecurse = (object: any): string[] => {
|
const getParamsRecurse = (object: any): string[] => {
|
||||||
if (typeof object === 'object') {
|
if (typeof object === 'object') {
|
||||||
@@ -69,10 +69,10 @@ export default class Preprocessor {
|
|||||||
return getParamsRecurse(workflow.workflow);
|
return getParamsRecurse(workflow.workflow);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* List all the selectors used in the given workflow (only literal "selector"
|
* List all the selectors used in the given workflow (only literal "selector"
|
||||||
* field in WHERE clauses so far)
|
* field in WHERE clauses so far)
|
||||||
*/
|
*/
|
||||||
// TODO : add recursive selector search (also in click/fill etc. events?)
|
// TODO : add recursive selector search (also in click/fill etc. events?)
|
||||||
static extractSelectors(workflow: Workflow): SelectorArray {
|
static extractSelectors(workflow: Workflow): SelectorArray {
|
||||||
/**
|
/**
|
||||||
@@ -107,11 +107,11 @@ export default class Preprocessor {
|
|||||||
], []);
|
], []);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Recursively crawl `object` and initializes params - replaces the `{$param : paramName}` objects
|
* Recursively crawl `object` and initializes params - replaces the `{$param : paramName}` objects
|
||||||
* with the defined value.
|
* with the defined value.
|
||||||
* @returns {Workflow} Copy of the given workflow, modified (the initial workflow is left untouched).
|
* @returns {Workflow} Copy of the given workflow, modified (the initial workflow is left untouched).
|
||||||
*/
|
*/
|
||||||
static initWorkflow(workflow: Workflow, params?: ParamType): Workflow {
|
static initWorkflow(workflow: Workflow, params?: ParamType): Workflow {
|
||||||
const paramNames = this.getParams({ workflow });
|
const paramNames = this.getParams({ workflow });
|
||||||
|
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ type MethodNames<T> = {
|
|||||||
[K in keyof T]: T[K] extends Function ? K : never;
|
[K in keyof T]: T[K] extends Function ? K : never;
|
||||||
}[keyof T];
|
}[keyof T];
|
||||||
|
|
||||||
export type CustomFunctions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag';
|
export type CustomFunctions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag' | 'scrapeList' | 'scrapeListAuto';
|
||||||
|
|
||||||
export type What = {
|
export type What = {
|
||||||
action: MethodNames<Page> | CustomFunctions,
|
action: MethodNames<Page> | CustomFunctions,
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import {
|
|||||||
getElementInformation,
|
getElementInformation,
|
||||||
getRect,
|
getRect,
|
||||||
getSelectors,
|
getSelectors,
|
||||||
|
getNonUniqueSelectors,
|
||||||
isRuleOvershadowing,
|
isRuleOvershadowing,
|
||||||
selectorAlreadyInWorkflow
|
selectorAlreadyInWorkflow
|
||||||
} from "../selector";
|
} from "../selector";
|
||||||
@@ -46,6 +47,12 @@ export class WorkflowGenerator {
|
|||||||
*/
|
*/
|
||||||
private socket: Socket;
|
private socket: Socket;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* getList is one of the custom actions from maxun-core.
|
||||||
|
* Used to provide appropriate selectors for the getList action.
|
||||||
|
*/
|
||||||
|
private getList: boolean = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The public constructor of the WorkflowGenerator.
|
* The public constructor of the WorkflowGenerator.
|
||||||
* Takes socket for communication as a parameter and registers some important events on it.
|
* Takes socket for communication as a parameter and registers some important events on it.
|
||||||
@@ -55,6 +62,7 @@ export class WorkflowGenerator {
|
|||||||
public constructor(socket: Socket) {
|
public constructor(socket: Socket) {
|
||||||
this.socket = socket;
|
this.socket = socket;
|
||||||
this.registerEventHandlers(socket);
|
this.registerEventHandlers(socket);
|
||||||
|
this.initializeSocketListeners();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -88,6 +96,15 @@ export class WorkflowGenerator {
|
|||||||
lastAction: '',
|
lastAction: '',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initializes the socket listeners for the generator.
|
||||||
|
*/
|
||||||
|
private initializeSocketListeners() {
|
||||||
|
this.socket.on('setGetList', (data: { getList: boolean }) => {
|
||||||
|
this.getList = data.getList;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Registers the event handlers for all generator-related events on the socket.
|
* Registers the event handlers for all generator-related events on the socket.
|
||||||
* @param socket The socket used to communicate with the client.
|
* @param socket The socket used to communicate with the client.
|
||||||
@@ -459,13 +476,17 @@ export class WorkflowGenerator {
|
|||||||
*/
|
*/
|
||||||
private generateSelector = async (page: Page, coordinates: Coordinates, action: ActionType) => {
|
private generateSelector = async (page: Page, coordinates: Coordinates, action: ActionType) => {
|
||||||
const elementInfo = await getElementInformation(page, coordinates);
|
const elementInfo = await getElementInformation(page, coordinates);
|
||||||
|
|
||||||
|
const selectorBasedOnCustomAction = (this.getList === true)
|
||||||
|
? await getNonUniqueSelectors(page, coordinates)
|
||||||
|
: await getSelectors(page, coordinates);
|
||||||
const bestSelector = getBestSelectorForAction(
|
const bestSelector = getBestSelectorForAction(
|
||||||
{
|
{
|
||||||
type: action,
|
type: action,
|
||||||
tagName: elementInfo?.tagName as TagName || '',
|
tagName: elementInfo?.tagName as TagName || '',
|
||||||
inputType: undefined,
|
inputType: undefined,
|
||||||
value: undefined,
|
value: undefined,
|
||||||
selectors: await getSelectors(page, coordinates) || {},
|
selectors: selectorBasedOnCustomAction || {},
|
||||||
timestamp: 0,
|
timestamp: 0,
|
||||||
isPassword: false,
|
isPassword: false,
|
||||||
hasOnlyText: elementInfo?.hasOnlyText || false,
|
hasOnlyText: elementInfo?.hasOnlyText || false,
|
||||||
@@ -488,6 +509,8 @@ export class WorkflowGenerator {
|
|||||||
if (rect) {
|
if (rect) {
|
||||||
this.socket.emit('highlighter', { rect, selector: displaySelector, elementInfo });
|
this.socket.emit('highlighter', { rect, selector: displaySelector, elementInfo });
|
||||||
}
|
}
|
||||||
|
// reset getList after usage
|
||||||
|
this.getList = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -98,20 +98,20 @@ export const getElementInformation = async (
|
|||||||
{ x: coordinates.x, y: coordinates.y },
|
{ x: coordinates.x, y: coordinates.y },
|
||||||
);
|
);
|
||||||
|
|
||||||
if (elementInfo) {
|
// if (elementInfo) {
|
||||||
if (elementInfo.tagName === 'A') {
|
// if (elementInfo.tagName === 'A') {
|
||||||
if (elementInfo.innerText) {
|
// if (elementInfo.innerText) {
|
||||||
console.log(`Link text: ${elementInfo.innerText}, URL: ${elementInfo.url}`);
|
// console.log(`Link text: ${elementInfo.innerText}, URL: ${elementInfo.url}`);
|
||||||
} else {
|
// } else {
|
||||||
console.log(`URL: ${elementInfo.url}`);
|
// console.log(`URL: ${elementInfo.url}`);
|
||||||
}
|
// }
|
||||||
} else if (elementInfo.tagName === 'IMG') {
|
// } else if (elementInfo.tagName === 'IMG') {
|
||||||
console.log(`Image URL: ${elementInfo.imageUrl}`);
|
// console.log(`Image URL: ${elementInfo.imageUrl}`);
|
||||||
} else {
|
// } else {
|
||||||
console.log(`Element innerText: ${elementInfo.innerText}`);
|
// console.log(`Element innerText: ${elementInfo.innerText}`);
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
return elementInfo;
|
return elementInfo;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
const { message, stack } = error as Error;
|
const { message, stack } = error as Error;
|
||||||
@@ -721,6 +721,66 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => {
|
|||||||
return null;
|
return null;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the best non-unique css {@link Selectors} for the element on the page.
|
||||||
|
* @param page The page instance.
|
||||||
|
* @param coordinates Coordinates of an element.
|
||||||
|
* @category WorkflowManagement-Selectors
|
||||||
|
* @returns {Promise<Selectors|null|undefined>}
|
||||||
|
*/
|
||||||
|
|
||||||
|
export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates) => {
|
||||||
|
try {
|
||||||
|
const selectors = await page.evaluate(({ x, y }: { x: number, y: number }) => {
|
||||||
|
|
||||||
|
function getNonUniqueSelector(element: HTMLElement): string {
|
||||||
|
let selector = element.tagName.toLowerCase();
|
||||||
|
|
||||||
|
// Avoid using IDs to maintain non-uniqueness
|
||||||
|
if (element.className) {
|
||||||
|
const classes = element.className.split(/\s+/).filter((cls: string) => Boolean(cls));
|
||||||
|
if (classes.length > 0) {
|
||||||
|
// Exclude utility classes and escape special characters
|
||||||
|
const validClasses = classes.filter((cls: string) => !cls.startsWith('!') && !cls.includes(':'));
|
||||||
|
if (validClasses.length > 0) {
|
||||||
|
selector += '.' + validClasses.map(cls => CSS.escape(cls)).join('.');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return selector;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getSelectorPath(element: HTMLElement | null): string {
|
||||||
|
const path: string[] = [];
|
||||||
|
while (element && element !== document.body) {
|
||||||
|
const selector = getNonUniqueSelector(element);
|
||||||
|
path.unshift(selector);
|
||||||
|
element = element.parentElement;
|
||||||
|
}
|
||||||
|
return path.join(' > ');
|
||||||
|
}
|
||||||
|
|
||||||
|
const element = document.elementFromPoint(x, y) as HTMLElement | null;
|
||||||
|
if (!element) return null;
|
||||||
|
|
||||||
|
const generalSelector = getSelectorPath(element);
|
||||||
|
return {
|
||||||
|
generalSelector,
|
||||||
|
};
|
||||||
|
}, coordinates);
|
||||||
|
|
||||||
|
return selectors || {};
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error in getNonUniqueSelectors:', error);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the first pair from the given workflow that contains the given selector
|
* Returns the first pair from the given workflow that contains the given selector
|
||||||
* inside the where condition, and it is the only selector there.
|
* inside the where condition, and it is the only selector there.
|
||||||
|
|||||||
@@ -24,10 +24,6 @@ export const Highlighter = ({ unmodifiedRect, displayedSelector = '', width, hei
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
//console.log('unmodifiedRect:', unmodifiedRect)
|
|
||||||
//console.log('rectangle:', rect)
|
|
||||||
//console.log('canvas rectangle:', canvasRect)
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div>
|
<div>
|
||||||
<HighlighterOutline
|
<HighlighterOutline
|
||||||
@@ -54,7 +50,7 @@ const HighlighterOutline = styled.div<HighlighterOutlineProps>`
|
|||||||
pointer-events: none !important;
|
pointer-events: none !important;
|
||||||
position: fixed !important;
|
position: fixed !important;
|
||||||
background: #ff5d5b26 !important;
|
background: #ff5d5b26 !important;
|
||||||
outline: 4px solid pink !important;
|
outline: 4px solid red !important;
|
||||||
//border: 4px solid #ff5d5b !important;
|
//border: 4px solid #ff5d5b !important;
|
||||||
z-index: 2147483647 !important;
|
z-index: 2147483647 !important;
|
||||||
//border-radius: 5px;
|
//border-radius: 5px;
|
||||||
|
|||||||
@@ -27,8 +27,9 @@ const Canvas = ({ width, height, onCreateRef }: CanvasProps) => {
|
|||||||
const canvasRef = useRef<HTMLCanvasElement>(null);
|
const canvasRef = useRef<HTMLCanvasElement>(null);
|
||||||
const { socket } = useSocketStore();
|
const { socket } = useSocketStore();
|
||||||
const { setLastAction, lastAction } = useGlobalInfoStore();
|
const { setLastAction, lastAction } = useGlobalInfoStore();
|
||||||
const { getText, getScreenshot } = useActionContext();
|
const { getText, getList } = useActionContext();
|
||||||
const getTextRef = useRef(getText);
|
const getTextRef = useRef(getText);
|
||||||
|
const getListRef = useRef(getList);
|
||||||
|
|
||||||
const notifyLastAction = (action: string) => {
|
const notifyLastAction = (action: string) => {
|
||||||
if (lastAction !== action) {
|
if (lastAction !== action) {
|
||||||
@@ -40,7 +41,8 @@ const Canvas = ({ width, height, onCreateRef }: CanvasProps) => {
|
|||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
getTextRef.current = getText;
|
getTextRef.current = getText;
|
||||||
}, [getText]);
|
getListRef.current = getList;
|
||||||
|
}, [getText, getList]);
|
||||||
|
|
||||||
const onMouseEvent = useCallback((event: MouseEvent) => {
|
const onMouseEvent = useCallback((event: MouseEvent) => {
|
||||||
if (socket) {
|
if (socket) {
|
||||||
@@ -51,8 +53,9 @@ const Canvas = ({ width, height, onCreateRef }: CanvasProps) => {
|
|||||||
switch (event.type) {
|
switch (event.type) {
|
||||||
case 'mousedown':
|
case 'mousedown':
|
||||||
const clickCoordinates = getMappedCoordinates(event, canvasRef.current, width, height);
|
const clickCoordinates = getMappedCoordinates(event, canvasRef.current, width, height);
|
||||||
if (getTextRef.current === true) {
|
if (getTextRef.current === true || getListRef.current === true) {
|
||||||
console.log('get text')
|
// todo: remove console.log and return
|
||||||
|
console.log('get text or get list is true');
|
||||||
} else {
|
} else {
|
||||||
socket.emit('input:mousedown', clickCoordinates);
|
socket.emit('input:mousedown', clickCoordinates);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ export const InterpretationLog = () => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const handleLog = useCallback((msg: string, date: boolean = true) => {
|
const handleLog = useCallback((msg: string, date: boolean = true) => {
|
||||||
if (!date){
|
if (!date) {
|
||||||
setLog((prevState) => prevState + '\n' + msg);
|
setLog((prevState) => prevState + '\n' + msg);
|
||||||
} else {
|
} else {
|
||||||
setLog((prevState) => prevState + '\n' + `[${new Date().toLocaleString()}] ` + msg);
|
setLog((prevState) => prevState + '\n' + `[${new Date().toLocaleString()}] ` + msg);
|
||||||
@@ -42,9 +42,9 @@ export const InterpretationLog = () => {
|
|||||||
scrollLogToBottom();
|
scrollLogToBottom();
|
||||||
}, [log, scrollLogToBottom])
|
}, [log, scrollLogToBottom])
|
||||||
|
|
||||||
const handleBinaryCallback = useCallback(({data, mimetype}: any) => {
|
const handleBinaryCallback = useCallback(({ data, mimetype }: any) => {
|
||||||
setLog((prevState) =>
|
setLog((prevState) =>
|
||||||
prevState + '\n' + '---------- Binary output data received ----------' + '\n'
|
prevState + '\n' + '---------- Binary output data received ----------' + '\n'
|
||||||
+ `mimetype: ${mimetype}` + '\n' + `data: ${JSON.stringify(data)}` + '\n'
|
+ `mimetype: ${mimetype}` + '\n' + `data: ${JSON.stringify(data)}` + '\n'
|
||||||
+ '------------------------------------------------');
|
+ '------------------------------------------------');
|
||||||
scrollLogToBottom();
|
scrollLogToBottom();
|
||||||
@@ -66,10 +66,10 @@ export const InterpretationLog = () => {
|
|||||||
<Accordion
|
<Accordion
|
||||||
expanded={expanded}
|
expanded={expanded}
|
||||||
onChange={handleChange(!expanded)}
|
onChange={handleChange(!expanded)}
|
||||||
style={{background: '#3f4853', color: 'white', borderRadius: '0px'}}
|
style={{ background: '#3f4853', color: 'white', borderRadius: '0px' }}
|
||||||
>
|
>
|
||||||
<AccordionSummary
|
<AccordionSummary
|
||||||
expandIcon={<ExpandMoreIcon sx={{color: 'white'}}/>}
|
expandIcon={<ExpandMoreIcon sx={{ color: 'white' }} />}
|
||||||
aria-controls="panel1bh-content"
|
aria-controls="panel1bh-content"
|
||||||
id="panel1bh-header"
|
id="panel1bh-header"
|
||||||
>
|
>
|
||||||
@@ -88,8 +88,8 @@ export const InterpretationLog = () => {
|
|||||||
<Highlight className="javascript">
|
<Highlight className="javascript">
|
||||||
{log}
|
{log}
|
||||||
</Highlight>
|
</Highlight>
|
||||||
<div style={{ float:"left", clear: "both" }}
|
<div style={{ float: "left", clear: "both" }}
|
||||||
ref={logEndRef}/>
|
ref={logEndRef} />
|
||||||
</div>
|
</div>
|
||||||
</AccordionDetails>
|
</AccordionDetails>
|
||||||
</Accordion>
|
</Accordion>
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import { useBrowserDimensionsStore } from "../../context/browserDimensions";
|
|||||||
import { Highlighter } from "../atoms/Highlighter";
|
import { Highlighter } from "../atoms/Highlighter";
|
||||||
import { GenericModal } from '../atoms/GenericModal';
|
import { GenericModal } from '../atoms/GenericModal';
|
||||||
import { useActionContext } from '../../context/browserActions';
|
import { useActionContext } from '../../context/browserActions';
|
||||||
import { useBrowserSteps } from '../../context/browserSteps';
|
import { useBrowserSteps, TextStep } from '../../context/browserSteps';
|
||||||
|
|
||||||
interface ElementInfo {
|
interface ElementInfo {
|
||||||
tagName: string;
|
tagName: string;
|
||||||
@@ -45,10 +45,13 @@ export const BrowserWindow = () => {
|
|||||||
const [attributeOptions, setAttributeOptions] = useState<AttributeOption[]>([]);
|
const [attributeOptions, setAttributeOptions] = useState<AttributeOption[]>([]);
|
||||||
const [selectedElement, setSelectedElement] = useState<{ selector: string, info: ElementInfo | null } | null>(null);
|
const [selectedElement, setSelectedElement] = useState<{ selector: string, info: ElementInfo | null } | null>(null);
|
||||||
|
|
||||||
|
const [listSelector, setListSelector] = useState<string | null>(null);
|
||||||
|
const [fields, setFields] = useState<Record<string, TextStep>>({});
|
||||||
|
|
||||||
const { socket } = useSocketStore();
|
const { socket } = useSocketStore();
|
||||||
const { width, height } = useBrowserDimensionsStore();
|
const { width, height } = useBrowserDimensionsStore();
|
||||||
const { getText } = useActionContext();
|
const { getText, getList } = useActionContext();
|
||||||
const { addTextStep } = useBrowserSteps();
|
const { addTextStep, addListStep } = useBrowserSteps();
|
||||||
|
|
||||||
const onMouseMove = (e: MouseEvent) => {
|
const onMouseMove = (e: MouseEvent) => {
|
||||||
if (canvasRef && canvasRef.current && highlighterData) {
|
if (canvasRef && canvasRef.current && highlighterData) {
|
||||||
@@ -84,8 +87,11 @@ export const BrowserWindow = () => {
|
|||||||
}, [screenShot, canvasRef, socket, screencastHandler]);
|
}, [screenShot, canvasRef, socket, screencastHandler]);
|
||||||
|
|
||||||
const highlighterHandler = useCallback((data: { rect: DOMRect, selector: string, elementInfo: ElementInfo | null }) => {
|
const highlighterHandler = useCallback((data: { rect: DOMRect, selector: string, elementInfo: ElementInfo | null }) => {
|
||||||
|
if (getList === true) {
|
||||||
|
socket?.emit('setGetList', { getList: true });
|
||||||
|
}
|
||||||
setHighlighterData(data);
|
setHighlighterData(data);
|
||||||
}, [highlighterData])
|
}, [highlighterData, getList, socket]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
document.addEventListener('mousemove', onMouseMove, false);
|
document.addEventListener('mousemove', onMouseMove, false);
|
||||||
@@ -128,6 +134,45 @@ export const BrowserWindow = () => {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (getList === true && !listSelector) {
|
||||||
|
setListSelector(highlighterData.selector);
|
||||||
|
} else if (getList === true && listSelector) {
|
||||||
|
const options = getAttributeOptions(highlighterData.elementInfo?.tagName || '');
|
||||||
|
if (options.length > 1) {
|
||||||
|
setAttributeOptions(options);
|
||||||
|
setSelectedElement({
|
||||||
|
selector: highlighterData.selector,
|
||||||
|
info: highlighterData.elementInfo
|
||||||
|
});
|
||||||
|
setShowAttributeModal(true);
|
||||||
|
} else {
|
||||||
|
const newField: TextStep = {
|
||||||
|
id: Date.now(),
|
||||||
|
type: 'text',
|
||||||
|
label: `Label ${Object.keys(fields).length + 1}`,
|
||||||
|
data: highlighterData.elementInfo?.innerText || '',
|
||||||
|
selectorObj: {
|
||||||
|
selector: highlighterData.selector,
|
||||||
|
tag: highlighterData.elementInfo?.tagName,
|
||||||
|
attribute: 'innerText'
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
setFields(prevFields => {
|
||||||
|
const updatedFields = {
|
||||||
|
...prevFields,
|
||||||
|
[newField.label]: newField
|
||||||
|
};
|
||||||
|
return updatedFields;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (listSelector) {
|
||||||
|
addListStep(listSelector, { ...fields, [newField.label]: newField });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -153,6 +198,31 @@ export const BrowserWindow = () => {
|
|||||||
attribute: attribute
|
attribute: attribute
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
if (getList === true) {
|
||||||
|
const newField: TextStep = {
|
||||||
|
id: Date.now(),
|
||||||
|
type: 'text',
|
||||||
|
label: `Label ${Object.keys(fields).length + 1}`,
|
||||||
|
data: selectedElement.info?.innerText || '',
|
||||||
|
selectorObj: {
|
||||||
|
selector: selectedElement.selector,
|
||||||
|
tag: selectedElement.info?.tagName,
|
||||||
|
attribute: attribute
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
setFields(prevFields => {
|
||||||
|
const updatedFields = {
|
||||||
|
...prevFields,
|
||||||
|
[newField.label]: newField
|
||||||
|
};
|
||||||
|
return updatedFields;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (listSelector) {
|
||||||
|
addListStep(listSelector, { ...fields, [newField.label]: newField });
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
setShowAttributeModal(false);
|
setShowAttributeModal(false);
|
||||||
@@ -161,7 +231,7 @@ export const BrowserWindow = () => {
|
|||||||
return (
|
return (
|
||||||
<div onClick={handleClick}>
|
<div onClick={handleClick}>
|
||||||
{
|
{
|
||||||
getText === true ? (
|
getText === true || getList === true ? (
|
||||||
<GenericModal
|
<GenericModal
|
||||||
isOpen={showAttributeModal}
|
isOpen={showAttributeModal}
|
||||||
onClose={() => { }}
|
onClose={() => { }}
|
||||||
@@ -179,7 +249,7 @@ export const BrowserWindow = () => {
|
|||||||
</GenericModal>
|
</GenericModal>
|
||||||
) : null
|
) : null
|
||||||
}
|
}
|
||||||
{(getText === true && !showAttributeModal && highlighterData?.rect != null && highlighterData?.rect.top != null) && canvasRef?.current ?
|
{((getText === true || getList === true) && !showAttributeModal && highlighterData?.rect != null && highlighterData?.rect.top != null) && canvasRef?.current ?
|
||||||
<Highlighter
|
<Highlighter
|
||||||
unmodifiedRect={highlighterData?.rect}
|
unmodifiedRect={highlighterData?.rect}
|
||||||
displayedSelector={highlighterData?.selector}
|
displayedSelector={highlighterData?.selector}
|
||||||
|
|||||||
@@ -8,11 +8,16 @@ import { SimpleBox } from "../atoms/Box";
|
|||||||
import Typography from "@mui/material/Typography";
|
import Typography from "@mui/material/Typography";
|
||||||
import { useGlobalInfoStore } from "../../context/globalInfo";
|
import { useGlobalInfoStore } from "../../context/globalInfo";
|
||||||
import { useActionContext } from '../../context/browserActions';
|
import { useActionContext } from '../../context/browserActions';
|
||||||
import { useBrowserSteps } from '../../context/browserSteps';
|
import { useBrowserSteps, ListStep, TextStep, SelectorObject } from '../../context/browserSteps';
|
||||||
import { useSocketStore } from '../../context/socket';
|
import { useSocketStore } from '../../context/socket';
|
||||||
import { ScreenshotSettings } from '../../shared/types';
|
import { ScreenshotSettings } from '../../shared/types';
|
||||||
import InputAdornment from '@mui/material/InputAdornment';
|
import InputAdornment from '@mui/material/InputAdornment';
|
||||||
|
|
||||||
|
// TODO:
|
||||||
|
// 1. Handle field label update
|
||||||
|
// 2. Handle field deletion | confirmation
|
||||||
|
// 3. Add description for each browser step
|
||||||
|
// 4. Handle non custom action steps
|
||||||
|
|
||||||
export const RightSidePanel = () => {
|
export const RightSidePanel = () => {
|
||||||
const [textLabels, setTextLabels] = useState<{ [id: number]: string }>({});
|
const [textLabels, setTextLabels] = useState<{ [id: number]: string }>({});
|
||||||
@@ -20,7 +25,7 @@ export const RightSidePanel = () => {
|
|||||||
const [confirmedTextSteps, setConfirmedTextSteps] = useState<{ [id: number]: boolean }>({});
|
const [confirmedTextSteps, setConfirmedTextSteps] = useState<{ [id: number]: boolean }>({});
|
||||||
|
|
||||||
const { lastAction, notify } = useGlobalInfoStore();
|
const { lastAction, notify } = useGlobalInfoStore();
|
||||||
const { getText, startGetText, stopGetText, getScreenshot, startGetScreenshot, stopGetScreenshot } = useActionContext();
|
const { getText, startGetText, stopGetText, getScreenshot, startGetScreenshot, stopGetScreenshot, getList, startGetList, stopGetList } = useActionContext();
|
||||||
const { browserSteps, updateBrowserTextStepLabel, deleteBrowserStep, addScreenshotStep } = useBrowserSteps();
|
const { browserSteps, updateBrowserTextStepLabel, deleteBrowserStep, addScreenshotStep } = useBrowserSteps();
|
||||||
const { socket } = useSocketStore();
|
const { socket } = useSocketStore();
|
||||||
|
|
||||||
@@ -80,6 +85,49 @@ export const RightSidePanel = () => {
|
|||||||
}
|
}
|
||||||
}, [stopGetText, getTextSettingsObject, socket, browserSteps, confirmedTextSteps]);
|
}, [stopGetText, getTextSettingsObject, socket, browserSteps, confirmedTextSteps]);
|
||||||
|
|
||||||
|
|
||||||
|
const getListSettingsObject = useCallback(() => {
|
||||||
|
let settings: { listSelector?: string; fields?: Record<string, { selector: string; tag?: string;[key: string]: any }> } = {};
|
||||||
|
|
||||||
|
browserSteps.forEach(step => {
|
||||||
|
if (step.type === 'list' && step.listSelector && Object.keys(step.fields).length > 0) {
|
||||||
|
const fields: Record<string, { selector: string; tag?: string;[key: string]: any }> = {};
|
||||||
|
Object.entries(step.fields).forEach(([label, field]) => {
|
||||||
|
if (field.selectorObj?.selector) {
|
||||||
|
fields[label] = {
|
||||||
|
selector: field.selectorObj.selector,
|
||||||
|
tag: field.selectorObj.tag,
|
||||||
|
attribute: field.selectorObj.attribute
|
||||||
|
};
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
settings = {
|
||||||
|
listSelector: step.listSelector,
|
||||||
|
fields: fields
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return settings;
|
||||||
|
}, [browserSteps]);
|
||||||
|
|
||||||
|
|
||||||
|
const stopCaptureAndEmitGetListSettings = useCallback(() => {
|
||||||
|
stopGetList();
|
||||||
|
const settings = getListSettingsObject();
|
||||||
|
if (settings) {
|
||||||
|
socket?.emit('action', { action: 'scrapeList', settings });
|
||||||
|
} else {
|
||||||
|
notify('error', 'Unable to create list settings. Make sure you have defined a field for the list.');
|
||||||
|
}
|
||||||
|
}, [stopGetList, getListSettingsObject, socket, notify]);
|
||||||
|
|
||||||
|
// const handleListFieldChange = (stepId: number, key: 'label' | 'data', value: string) => {
|
||||||
|
// updateListStepField(stepId, key, value);
|
||||||
|
// };
|
||||||
|
|
||||||
const captureScreenshot = (fullPage: boolean) => {
|
const captureScreenshot = (fullPage: boolean) => {
|
||||||
const screenshotSettings: ScreenshotSettings = {
|
const screenshotSettings: ScreenshotSettings = {
|
||||||
fullPage,
|
fullPage,
|
||||||
@@ -101,7 +149,17 @@ export const RightSidePanel = () => {
|
|||||||
</SimpleBox>
|
</SimpleBox>
|
||||||
|
|
||||||
<Box display="flex" flexDirection="column" gap={2} style={{ margin: '15px' }}>
|
<Box display="flex" flexDirection="column" gap={2} style={{ margin: '15px' }}>
|
||||||
{!getText && !getScreenshot && <Button variant="contained" onClick={startGetText}>Capture Text</Button>}
|
{!getText && !getScreenshot && !getList && <Button variant="contained" onClick={startGetList}>Capture List</Button>}
|
||||||
|
{getList &&
|
||||||
|
<>
|
||||||
|
<Box display="flex" justifyContent="space-between" gap={2} style={{ margin: '15px' }}>
|
||||||
|
<Button variant="outlined" onClick={stopCaptureAndEmitGetListSettings}>Confirm</Button>
|
||||||
|
<Button variant="outlined" color="error" onClick={stopGetList}>Discard</Button>
|
||||||
|
</Box>
|
||||||
|
</>
|
||||||
|
}
|
||||||
|
|
||||||
|
{!getText && !getScreenshot && !getList && <Button variant="contained" onClick={startGetText}>Capture Text</Button>}
|
||||||
{getText &&
|
{getText &&
|
||||||
<>
|
<>
|
||||||
<Box display="flex" justifyContent="space-between" gap={2} style={{ margin: '15px' }}>
|
<Box display="flex" justifyContent="space-between" gap={2} style={{ margin: '15px' }}>
|
||||||
@@ -111,7 +169,7 @@ export const RightSidePanel = () => {
|
|||||||
</>
|
</>
|
||||||
}
|
}
|
||||||
|
|
||||||
{!getText && !getScreenshot && <Button variant="contained" onClick={startGetScreenshot}>Capture Screenshot</Button>}
|
{!getText && !getScreenshot && !getList && <Button variant="contained" onClick={startGetScreenshot}>Capture Screenshot</Button>}
|
||||||
{getScreenshot && (
|
{getScreenshot && (
|
||||||
<Box display="flex" flexDirection="column" gap={2}>
|
<Box display="flex" flexDirection="column" gap={2}>
|
||||||
<Button variant="contained" onClick={() => captureScreenshot(true)}>Capture Fullpage</Button>
|
<Button variant="contained" onClick={() => captureScreenshot(true)}>Capture Fullpage</Button>
|
||||||
@@ -125,7 +183,7 @@ export const RightSidePanel = () => {
|
|||||||
{browserSteps.map(step => (
|
{browserSteps.map(step => (
|
||||||
<Box key={step.id} sx={{ boxShadow: 5, padding: '10px', margin: '10px', borderRadius: '4px' }}>
|
<Box key={step.id} sx={{ boxShadow: 5, padding: '10px', margin: '10px', borderRadius: '4px' }}>
|
||||||
{
|
{
|
||||||
step.type === 'text' ? (
|
step.type === 'text' && (
|
||||||
<>
|
<>
|
||||||
<TextField
|
<TextField
|
||||||
label="Label"
|
label="Label"
|
||||||
@@ -165,24 +223,55 @@ export const RightSidePanel = () => {
|
|||||||
</Box>
|
</Box>
|
||||||
)}
|
)}
|
||||||
</>
|
</>
|
||||||
) : (
|
)}
|
||||||
step.type === 'screenshot' && (
|
{step.type === 'screenshot' && (
|
||||||
<Box display="flex" alignItems="center">
|
<Box display="flex" alignItems="center">
|
||||||
<DocumentScannerIcon sx={{ mr: 1 }} />
|
<DocumentScannerIcon sx={{ mr: 1 }} />
|
||||||
<Typography>
|
<Typography>
|
||||||
{`Take ${step.fullPage ? 'Fullpage' : 'Visible Part'} Screenshot`}
|
{`Take ${step.fullPage ? 'Fullpage' : 'Visible Part'} Screenshot`}
|
||||||
</Typography>
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
)}
|
||||||
|
{step.type === 'list' && (
|
||||||
|
<>
|
||||||
|
<Typography>List Selected Successfully</Typography>
|
||||||
|
{Object.entries(step.fields).map(([key, field]) => (
|
||||||
|
<Box key={key}>
|
||||||
|
<TextField
|
||||||
|
label="Field Label"
|
||||||
|
value={field.label || ''}
|
||||||
|
onChange={() => { }}
|
||||||
|
fullWidth
|
||||||
|
margin="normal"
|
||||||
|
InputProps={{
|
||||||
|
startAdornment: (
|
||||||
|
<InputAdornment position="start">
|
||||||
|
<EditIcon />
|
||||||
|
</InputAdornment>
|
||||||
|
)
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
<TextField
|
||||||
|
label="Field Data"
|
||||||
|
value={field.data || ''}
|
||||||
|
fullWidth
|
||||||
|
margin="normal"
|
||||||
|
InputProps={{
|
||||||
|
readOnly: true,
|
||||||
|
startAdornment: (
|
||||||
|
<InputAdornment position="start">
|
||||||
|
<TextFieldsIcon />
|
||||||
|
</InputAdornment>
|
||||||
|
)
|
||||||
|
}}
|
||||||
|
/>
|
||||||
</Box>
|
</Box>
|
||||||
)
|
))}
|
||||||
)
|
</>
|
||||||
}
|
)}
|
||||||
</Box>
|
</Box>
|
||||||
))}
|
))}
|
||||||
</Box>
|
</Box>
|
||||||
</Paper>
|
</Paper>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
export const ActionDescription = styled.p`
|
|
||||||
margin-left: 15px;
|
|
||||||
`;
|
|
||||||
@@ -2,9 +2,12 @@ import React, { createContext, useContext, useState, ReactNode } from 'react';
|
|||||||
|
|
||||||
interface ActionContextProps {
|
interface ActionContextProps {
|
||||||
getText: boolean;
|
getText: boolean;
|
||||||
|
getList: boolean;
|
||||||
getScreenshot: boolean;
|
getScreenshot: boolean;
|
||||||
startGetText: () => void;
|
startGetText: () => void;
|
||||||
stopGetText: () => void;
|
stopGetText: () => void;
|
||||||
|
startGetList: () => void;
|
||||||
|
stopGetList: () => void;
|
||||||
startGetScreenshot: () => void;
|
startGetScreenshot: () => void;
|
||||||
stopGetScreenshot: () => void;
|
stopGetScreenshot: () => void;
|
||||||
}
|
}
|
||||||
@@ -13,16 +16,20 @@ const ActionContext = createContext<ActionContextProps | undefined>(undefined);
|
|||||||
|
|
||||||
export const ActionProvider = ({ children }: { children: ReactNode }) => {
|
export const ActionProvider = ({ children }: { children: ReactNode }) => {
|
||||||
const [getText, setGetText] = useState<boolean>(false);
|
const [getText, setGetText] = useState<boolean>(false);
|
||||||
|
const [getList, setGetList] = useState<boolean>(false);
|
||||||
const [getScreenshot, setGetScreenshot] = useState<boolean>(false);
|
const [getScreenshot, setGetScreenshot] = useState<boolean>(false);
|
||||||
|
|
||||||
const startGetText = () => setGetText(true);
|
const startGetText = () => setGetText(true);
|
||||||
const stopGetText = () => setGetText(false);
|
const stopGetText = () => setGetText(false);
|
||||||
|
|
||||||
|
const startGetList = () => setGetList(true);
|
||||||
|
const stopGetList = () => setGetList(false);
|
||||||
|
|
||||||
const startGetScreenshot = () => setGetScreenshot(true);
|
const startGetScreenshot = () => setGetScreenshot(true);
|
||||||
const stopGetScreenshot = () => setGetScreenshot(false);
|
const stopGetScreenshot = () => setGetScreenshot(false);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<ActionContext.Provider value={{ getText, getScreenshot, startGetText, stopGetText, startGetScreenshot, stopGetScreenshot }}>
|
<ActionContext.Provider value={{ getText, getList, getScreenshot, startGetText, stopGetText, startGetList, stopGetList, startGetScreenshot, stopGetScreenshot }}>
|
||||||
{children}
|
{children}
|
||||||
</ActionContext.Provider>
|
</ActionContext.Provider>
|
||||||
);
|
);
|
||||||
@@ -34,4 +41,4 @@ export const useActionContext = () => {
|
|||||||
throw new Error('useActionContext must be used within an ActionProvider');
|
throw new Error('useActionContext must be used within an ActionProvider');
|
||||||
}
|
}
|
||||||
return context;
|
return context;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import React, { createContext, useContext, useState } from 'react';
|
import React, { createContext, useContext, useState } from 'react';
|
||||||
|
|
||||||
interface TextStep {
|
export interface TextStep {
|
||||||
id: number;
|
id: number;
|
||||||
type: 'text';
|
type: 'text';
|
||||||
label: string;
|
label: string;
|
||||||
@@ -14,10 +14,16 @@ interface ScreenshotStep {
|
|||||||
fullPage: boolean;
|
fullPage: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface ListStep {
|
||||||
|
id: number;
|
||||||
|
type: 'list';
|
||||||
|
listSelector: string;
|
||||||
|
fields: { [key: string]: TextStep };
|
||||||
|
}
|
||||||
|
|
||||||
type BrowserStep = TextStep | ScreenshotStep;
|
type BrowserStep = TextStep | ScreenshotStep | ListStep;
|
||||||
|
|
||||||
interface SelectorObject {
|
export interface SelectorObject {
|
||||||
selector: string;
|
selector: string;
|
||||||
tag?: string;
|
tag?: string;
|
||||||
attribute?: string;
|
attribute?: string;
|
||||||
@@ -27,6 +33,7 @@ interface SelectorObject {
|
|||||||
interface BrowserStepsContextType {
|
interface BrowserStepsContextType {
|
||||||
browserSteps: BrowserStep[];
|
browserSteps: BrowserStep[];
|
||||||
addTextStep: (label: string, data: string, selectorObj: SelectorObject) => void;
|
addTextStep: (label: string, data: string, selectorObj: SelectorObject) => void;
|
||||||
|
addListStep: (listSelector: string, fields: { [key: string]: TextStep }) => void
|
||||||
addScreenshotStep: (fullPage: boolean) => void;
|
addScreenshotStep: (fullPage: boolean) => void;
|
||||||
deleteBrowserStep: (id: number) => void;
|
deleteBrowserStep: (id: number) => void;
|
||||||
updateBrowserTextStepLabel: (id: number, newLabel: string) => void;
|
updateBrowserTextStepLabel: (id: number, newLabel: string) => void;
|
||||||
@@ -44,6 +51,31 @@ export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({
|
|||||||
]);
|
]);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const addListStep = (listSelector: string, newFields: { [key: string]: TextStep }) => {
|
||||||
|
setBrowserSteps(prevSteps => {
|
||||||
|
const existingListStepIndex = prevSteps.findIndex(
|
||||||
|
step => step.type === 'list' && step.listSelector === listSelector
|
||||||
|
);
|
||||||
|
if (existingListStepIndex !== -1) {
|
||||||
|
// Update the existing ListStep with new fields
|
||||||
|
const updatedSteps = [...prevSteps];
|
||||||
|
const existingListStep = updatedSteps[existingListStepIndex] as ListStep;
|
||||||
|
updatedSteps[existingListStepIndex] = {
|
||||||
|
...existingListStep,
|
||||||
|
fields: { ...existingListStep.fields, ...newFields }
|
||||||
|
};
|
||||||
|
return updatedSteps;
|
||||||
|
} else {
|
||||||
|
// Create a new ListStep
|
||||||
|
return [
|
||||||
|
...prevSteps,
|
||||||
|
{ id: Date.now(), type: 'list', listSelector, fields: newFields }
|
||||||
|
];
|
||||||
|
}
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
const addScreenshotStep = (fullPage: boolean) => {
|
const addScreenshotStep = (fullPage: boolean) => {
|
||||||
setBrowserSteps(prevSteps => [
|
setBrowserSteps(prevSteps => [
|
||||||
...prevSteps,
|
...prevSteps,
|
||||||
@@ -67,6 +99,7 @@ export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({
|
|||||||
<BrowserStepsContext.Provider value={{
|
<BrowserStepsContext.Provider value={{
|
||||||
browserSteps,
|
browserSteps,
|
||||||
addTextStep,
|
addTextStep,
|
||||||
|
addListStep,
|
||||||
addScreenshotStep,
|
addScreenshotStep,
|
||||||
deleteBrowserStep,
|
deleteBrowserStep,
|
||||||
updateBrowserTextStepLabel,
|
updateBrowserTextStepLabel,
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import React, { useCallback, useEffect, useState } from 'react';
|
import React, { useCallback, useEffect, useState } from 'react';
|
||||||
import { Grid } from '@mui/material';
|
import { Grid } from '@mui/material';
|
||||||
import { BrowserContent } from "../components/organisms/BrowserContent";
|
import { BrowserContent } from "../components/organisms/BrowserContent";
|
||||||
|
import { InterpretationLog } from "../components/molecules/InterpretationLog";
|
||||||
import { startRecording, getActiveBrowserId } from "../api/recording";
|
import { startRecording, getActiveBrowserId } from "../api/recording";
|
||||||
import { LeftSidePanel } from "../components/organisms/LeftSidePanel";
|
import { LeftSidePanel } from "../components/organisms/LeftSidePanel";
|
||||||
import { RightSidePanel } from "../components/organisms/RightSidePanel";
|
import { RightSidePanel } from "../components/organisms/RightSidePanel";
|
||||||
@@ -121,6 +122,7 @@ export const RecordingPage = ({ recordingName }: RecordingPageProps) => {
|
|||||||
</Grid>
|
</Grid>
|
||||||
<Grid id="browser-content" ref={browserContentRef} item xs>
|
<Grid id="browser-content" ref={browserContentRef} item xs>
|
||||||
<BrowserContent />
|
<BrowserContent />
|
||||||
|
<InterpretationLog />
|
||||||
</Grid>
|
</Grid>
|
||||||
<Grid item xs={2}>
|
<Grid item xs={2}>
|
||||||
<RightSidePanel />
|
<RightSidePanel />
|
||||||
|
|||||||
@@ -23,4 +23,4 @@ export interface ScreenshotSettings {
|
|||||||
type?: "jpeg" | "png";
|
type?: "jpeg" | "png";
|
||||||
};
|
};
|
||||||
|
|
||||||
export declare type CustomActions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag';
|
export declare type CustomActions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag' | 'scrapeList' | 'scrapeListAuto';
|
||||||
|
|||||||
Reference in New Issue
Block a user