feat: scrapable heuristics
This commit is contained in:
@@ -11,3 +11,43 @@ function getBiggestElement(selector) {
|
||||
);
|
||||
return biggest;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates structural selector (describing element by its DOM tree location).
|
||||
*
|
||||
* **The generated selector is not guaranteed to be unique!** (In fact, this is
|
||||
* the desired behaviour in here.)
|
||||
* @param {HTMLElement} element Element being described.
|
||||
* @returns {string} CSS-compliant selector describing the element's location in the DOM tree.
|
||||
*/
|
||||
function GetSelectorStructural(element) {
|
||||
// Base conditions for the recursive approach.
|
||||
if (element.tagName === 'BODY') {
|
||||
return 'BODY';
|
||||
}
|
||||
const selector = element.tagName;
|
||||
if (element.parentElement) {
|
||||
return `${GetSelectorStructural(element.parentElement)} > ${selector}`;
|
||||
}
|
||||
|
||||
return selector;
|
||||
}
|
||||
|
||||
/**
|
||||
* Heuristic method to find collections of "interesting" items on the page.
|
||||
* @returns {Array<HTMLElement>} A collection of interesting DOM nodes
|
||||
* (online store products, plane tickets, list items... and many more?)
|
||||
*/
|
||||
function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, metricType = 'size_deviation') {
|
||||
const restoreScroll = (() => {
|
||||
const { scrollX, scrollY } = window;
|
||||
return () => {
|
||||
window.scrollTo(scrollX, scrollY);
|
||||
};
|
||||
})();
|
||||
|
||||
/**
|
||||
* @typedef {Array<{x: number, y: number}>} Grid
|
||||
*/
|
||||
|
||||
|
||||
Reference in New Issue
Block a user