feat: handle pagination types
This commit is contained in:
@@ -278,14 +278,16 @@ async function scrollDownToLoadMore(selector, limit) {
|
||||
* @param {boolean} [config.flexible=false] - Whether to use flexible matching for field selectors
|
||||
* @returns {Array.<Array.<Object>>} Array of arrays of scraped items, one sub-array per list
|
||||
*/
|
||||
window.scrapeList = function ({ listSelector, fields, limit = 10, pagination }) {
|
||||
// Get all parent elements matching the listSelector
|
||||
const parentElements = Array.from(document.querySelectorAll(listSelector)).slice(0, limit);
|
||||
|
||||
window.scrapeList = async function({ listSelector, fields, limit = 10, pagination = null }) {
|
||||
const scrapedData = [];
|
||||
|
||||
while (scrapedData.length < limit) {
|
||||
// Get all parent elements matching the listSelector
|
||||
const parentElements = Array.from(document.querySelectorAll(listSelector));
|
||||
|
||||
// Iterate through each parent element
|
||||
parentElements.forEach(parent => {
|
||||
for (const parent of parentElements) {
|
||||
if (scrapedData.length >= limit) break;
|
||||
const record = {};
|
||||
|
||||
// For each field, select the corresponding element within the parent
|
||||
@@ -308,11 +310,43 @@ async function scrollDownToLoadMore(selector, limit) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add the record to the scrapedData array
|
||||
scrapedData.push(record);
|
||||
});
|
||||
return scrapedData;
|
||||
}
|
||||
|
||||
// Check if we need to paginate
|
||||
if (pagination && scrapedData.length < limit) {
|
||||
switch (pagination.type) {
|
||||
case 'scrollDown':
|
||||
//await scrollDownPagination();
|
||||
break;
|
||||
case 'scrollUp':
|
||||
//await scrollUpPagination();
|
||||
break;
|
||||
case 'clickNext':
|
||||
//await clickNextPagination(pagination.selector);
|
||||
break;
|
||||
case 'clickLoadMore':
|
||||
//await clickLoadMorePagination(pagination.selector);
|
||||
break;
|
||||
case 'none':
|
||||
// No more items to load
|
||||
break;
|
||||
default:
|
||||
console.warn("Unknown pagination type");
|
||||
break;
|
||||
}
|
||||
await new Promise(resolve => setTimeout(resolve, 2000)); // Wait for content to load
|
||||
} else {
|
||||
break; // No more items to load or no pagination
|
||||
}
|
||||
}
|
||||
|
||||
return scrapedData.slice(0, limit); // Return only the limited number of records
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Gets all children of the elements matching the listSelector,
|
||||
* returning their CSS selectors and innerText.
|
||||
|
||||
Reference in New Issue
Block a user