chore: lint

This commit is contained in:
karishmas6
2024-08-03 20:46:05 +05:30
parent eabe62fff7
commit 99f17421db

View File

@@ -132,12 +132,12 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
*/
// Wrap the entire function in an IIFE (Immediately Invoked Function Expression)
// and attach it to the window object
(function(window) {
(function (window) {
/**
* Returns a "scrape" result from the current page.
* @returns {Array<Object>} *Curated* array of scraped information (with sparse rows removed)
*/
window.scrape = function(selector = null) {
window.scrape = function (selector = null) {
/**
* **crudeRecords** contains uncurated rundowns of "scrapable" elements
* @type {Array<Object>}
@@ -182,59 +182,59 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
return crudeRecords;
};
/**
* Given an object with named lists of elements,
* groups the elements by their distance in the DOM tree.
* @param {Object.<string, {selector: string, tag: string}>} lists The named lists of HTML elements.
* @returns {Array.<Object.<string, string>>}
*/
window.scrapeSchema = function (lists) {
function omap(object, f, kf = (x) => x) {
return Object.fromEntries(
Object.entries(object)
.map(([k, v]) => [kf(k), f(v)]),
);
/**
* Given an object with named lists of elements,
* groups the elements by their distance in the DOM tree.
* @param {Object.<string, {selector: string, tag: string}>} lists The named lists of HTML elements.
* @returns {Array.<Object.<string, string>>}
*/
window.scrapeSchema = function (lists) {
function omap(object, f, kf = (x) => x) {
return Object.fromEntries(
Object.entries(object)
.map(([k, v]) => [kf(k), f(v)]),
);
}
function ofilter(object, f) {
return Object.fromEntries(
Object.entries(object)
.filter(([k, v]) => f(k, v)),
);
}
function getSeedKey(listObj) {
const maxLength = Math.max(...Object.values(omap(listObj, (x) => document.querySelectorAll(x.selector).length)));
return Object.keys(ofilter(listObj, (_, v) => document.querySelectorAll(v.selector).length === maxLength))[0];
}
function getMBEs(elements) {
return elements.map((element) => {
let candidate = element;
const isUniqueChild = (e) => elements
.filter((elem) => e.parentNode?.contains(elem))
.length === 1;
while (candidate && isUniqueChild(candidate)) {
candidate = candidate.parentNode;
}
return candidate;
});
}
const seedName = getSeedKey(lists);
const seedElements = Array.from(document.querySelectorAll(lists[seedName].selector));
const MBEs = getMBEs(seedElements);
return MBEs.map((mbe) => omap(
lists,
({ selector }, key) => {
const elem = Array.from(document.querySelectorAll(selector)).find((elem) => mbe.contains(elem));
return elem ? elem.innerText : undefined;
},
(key) => key // Use the original key in the output
));
}
function ofilter(object, f) {
return Object.fromEntries(
Object.entries(object)
.filter(([k, v]) => f(k, v)),
);
}
function getSeedKey(listObj) {
const maxLength = Math.max(...Object.values(omap(listObj, (x) => document.querySelectorAll(x.selector).length)));
return Object.keys(ofilter(listObj, (_, v) => document.querySelectorAll(v.selector).length === maxLength))[0];
}
function getMBEs(elements) {
return elements.map((element) => {
let candidate = element;
const isUniqueChild = (e) => elements
.filter((elem) => e.parentNode?.contains(elem))
.length === 1;
while (candidate && isUniqueChild(candidate)) {
candidate = candidate.parentNode;
}
return candidate;
});
}
const seedName = getSeedKey(lists);
const seedElements = Array.from(document.querySelectorAll(lists[seedName].selector));
const MBEs = getMBEs(seedElements);
return MBEs.map((mbe) => omap(
lists,
({ selector }, key) => {
const elem = Array.from(document.querySelectorAll(selector)).find((elem) => mbe.contains(elem));
return elem ? elem.innerText : undefined;
},
(key) => key // Use the original key in the output
));
}
})(window);