diff --git a/mx-interpreter/browserSide/scraper.js b/mx-interpreter/browserSide/scraper.js index c64c4808..d9e5a0d9 100644 --- a/mx-interpreter/browserSide/scraper.js +++ b/mx-interpreter/browserSide/scraper.js @@ -11,3 +11,43 @@ function getBiggestElement(selector) { ); return biggest; } + +/** + * Generates structural selector (describing element by its DOM tree location). + * + * **The generated selector is not guaranteed to be unique!** (In fact, this is + * the desired behaviour in here.) + * @param {HTMLElement} element Element being described. + * @returns {string} CSS-compliant selector describing the element's location in the DOM tree. + */ +function GetSelectorStructural(element) { + // Base conditions for the recursive approach. + if (element.tagName === 'BODY') { + return 'BODY'; + } + const selector = element.tagName; + if (element.parentElement) { + return `${GetSelectorStructural(element.parentElement)} > ${selector}`; + } + + return selector; +} + +/** + * Heuristic method to find collections of "interesting" items on the page. + * @returns {Array} A collection of interesting DOM nodes + * (online store products, plane tickets, list items... and many more?) + */ +function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, metricType = 'size_deviation') { + const restoreScroll = (() => { + const { scrollX, scrollY } = window; + return () => { + window.scrollTo(scrollX, scrollY); + }; + })(); + + /** +* @typedef {Array<{x: number, y: number}>} Grid +*/ + + \ No newline at end of file