feat: update maximum with point
This commit is contained in:
@@ -69,4 +69,60 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
let maxSelector = { selector: 'body', metric: 0 };
|
||||
|
||||
const updateMaximumWithPoint = (point) => {
|
||||
const currentElement = document.elementFromPoint(point.x, point.y);
|
||||
const selector = GetSelectorStructural(currentElement);
|
||||
|
||||
const elements = Array.from(document.querySelectorAll(selector))
|
||||
.filter((element) => area(element) > minArea);
|
||||
|
||||
// If the current selector targets less than three elements,
|
||||
// we consider it not interesting (would be a very underwhelming scraper)
|
||||
if (elements.length < 3) {
|
||||
return;
|
||||
}
|
||||
|
||||
let metric = null;
|
||||
|
||||
if (metricType === 'total_area') {
|
||||
metric = elements
|
||||
.reduce((p, x) => p + area(x), 0);
|
||||
} else if (metricType === 'size_deviation') {
|
||||
// This could use a proper "statistics" approach... but meh, so far so good!
|
||||
const sizes = elements
|
||||
.map((element) => area(element));
|
||||
|
||||
metric = (1 - (Math.max(...sizes) - Math.min(...sizes)) / Math.max(...sizes));
|
||||
}
|
||||
|
||||
// console.debug(`Total ${metricType} is ${metric}.`)
|
||||
if (metric > maxSelector.metric && elements.length < maxCountPerPage) {
|
||||
maxSelector = { selector, metric };
|
||||
}
|
||||
};
|
||||
|
||||
for (let scroll = 0; scroll < scrolls; scroll += 1) {
|
||||
window.scrollTo(0, scroll * window.innerHeight);
|
||||
|
||||
const grid = getGrid();
|
||||
|
||||
grid.forEach(updateMaximumWithPoint);
|
||||
}
|
||||
|
||||
restoreScroll();
|
||||
|
||||
let out = Array.from(document.querySelectorAll(maxSelector.selector));
|
||||
|
||||
const different = (x, i, a) => a.findIndex((e) => e === x) === i;
|
||||
// as long as we don't merge any two elements by substituing them for their parents,
|
||||
// we substitute.
|
||||
while (out.map((x) => x.parentElement).every(different)
|
||||
&& out.forEach((x) => x.parentElement !== null)) {
|
||||
out = out.map((x) => x.parentElement ?? x);
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user