feat: handle pagination types

This commit is contained in:
karishmas6
2024-08-14 05:58:57 +05:30
parent 375590fc0d
commit 2407c79dd2

View File

@@ -278,14 +278,16 @@ async function scrollDownToLoadMore(selector, limit) {
* @param {boolean} [config.flexible=false] - Whether to use flexible matching for field selectors
* @returns {Array.<Array.<Object>>} Array of arrays of scraped items, one sub-array per list
*/
window.scrapeList = function ({ listSelector, fields, limit = 10, pagination }) {
// Get all parent elements matching the listSelector
const parentElements = Array.from(document.querySelectorAll(listSelector)).slice(0, limit);
window.scrapeList = async function({ listSelector, fields, limit = 10, pagination = null }) {
const scrapedData = [];
while (scrapedData.length < limit) {
// Get all parent elements matching the listSelector
const parentElements = Array.from(document.querySelectorAll(listSelector));
// Iterate through each parent element
parentElements.forEach(parent => {
for (const parent of parentElements) {
if (scrapedData.length >= limit) break;
const record = {};
// For each field, select the corresponding element within the parent
@@ -308,11 +310,43 @@ async function scrollDownToLoadMore(selector, limit) {
}
}
}
// Add the record to the scrapedData array
scrapedData.push(record);
});
return scrapedData;
}
// Check if we need to paginate
if (pagination && scrapedData.length < limit) {
switch (pagination.type) {
case 'scrollDown':
//await scrollDownPagination();
break;
case 'scrollUp':
//await scrollUpPagination();
break;
case 'clickNext':
//await clickNextPagination(pagination.selector);
break;
case 'clickLoadMore':
//await clickLoadMorePagination(pagination.selector);
break;
case 'none':
// No more items to load
break;
default:
console.warn("Unknown pagination type");
break;
}
await new Promise(resolve => setTimeout(resolve, 2000)); // Wait for content to load
} else {
break; // No more items to load or no pagination
}
}
return scrapedData.slice(0, limit); // Return only the limited number of records
};
/**
* Gets all children of the elements matching the listSelector,
* returning their CSS selectors and innerText.