chore: lint
This commit is contained in:
@@ -167,40 +167,40 @@ async function scrollUpToLoadMore(selector, limit) {
|
|||||||
async function clickNextPagination(selector, scrapedData, limit) {
|
async function clickNextPagination(selector, scrapedData, limit) {
|
||||||
// Check if the limit is already met
|
// Check if the limit is already met
|
||||||
if (scrapedData.length >= limit) {
|
if (scrapedData.length >= limit) {
|
||||||
return false; // Return false to indicate no further action is needed
|
return false; // Return false to indicate no further action is needed
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if a single "Next" button exists
|
// Check if a single "Next" button exists
|
||||||
let nextButton = document.querySelector(selector);
|
let nextButton = document.querySelector(selector);
|
||||||
|
|
||||||
if (nextButton) {
|
if (nextButton) {
|
||||||
nextButton.click();
|
nextButton.click();
|
||||||
return true; // Indicate that pagination occurred
|
return true; // Indicate that pagination occurred
|
||||||
} else {
|
} else {
|
||||||
// Handle pagination with numbers
|
// Handle pagination with numbers
|
||||||
const paginationButtons = document.querySelectorAll(selector);
|
const paginationButtons = document.querySelectorAll(selector);
|
||||||
let clicked = false;
|
let clicked = false;
|
||||||
|
|
||||||
// Loop through pagination buttons to find the current active page
|
// Loop through pagination buttons to find the current active page
|
||||||
for (let i = 0; i < paginationButtons.length - 1; i++) {
|
for (let i = 0; i < paginationButtons.length - 1; i++) {
|
||||||
const button = paginationButtons[i];
|
const button = paginationButtons[i];
|
||||||
if (button.classList.contains('active')) {
|
if (button.classList.contains('active')) {
|
||||||
// Click the next button if available
|
// Click the next button if available
|
||||||
const nextButtonInPagination = paginationButtons[i + 1];
|
const nextButtonInPagination = paginationButtons[i + 1];
|
||||||
if (nextButtonInPagination) {
|
if (nextButtonInPagination) {
|
||||||
nextButtonInPagination.click();
|
nextButtonInPagination.click();
|
||||||
clicked = true;
|
clicked = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// If no next button was clicked, we might be on the last page
|
// If no next button was clicked, we might be on the last page
|
||||||
if (!clicked) {
|
if (!clicked) {
|
||||||
throw new Error("No more items to load or pagination has ended.");
|
throw new Error("No more items to load or pagination has ended.");
|
||||||
}
|
}
|
||||||
|
|
||||||
//return clicked; // Indicate whether pagination occurred
|
return clicked; // Indicate whether pagination occurred
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -339,72 +339,81 @@ async function clickNextPagination(selector, scrapedData, limit) {
|
|||||||
* @param {boolean} [config.flexible=false] - Whether to use flexible matching for field selectors
|
* @param {boolean} [config.flexible=false] - Whether to use flexible matching for field selectors
|
||||||
* @returns {Array.<Array.<Object>>} Array of arrays of scraped items, one sub-array per list
|
* @returns {Array.<Array.<Object>>} Array of arrays of scraped items, one sub-array per list
|
||||||
*/
|
*/
|
||||||
window.scrapeList = async function({ listSelector, fields, limit = 10, pagination = null }) {
|
window.scrapeList = async function ({ listSelector, fields, limit = 10, pagination = null }) {
|
||||||
const scrapedData = [];
|
const scrapedData = [];
|
||||||
|
|
||||||
while (scrapedData.length < limit) {
|
while (scrapedData.length < limit) {
|
||||||
// Get all parent elements matching the listSelector
|
// Get all parent elements matching the listSelector
|
||||||
const parentElements = Array.from(document.querySelectorAll(listSelector));
|
const parentElements = Array.from(document.querySelectorAll(listSelector));
|
||||||
|
|
||||||
// Iterate through each parent element
|
// Iterate through each parent element
|
||||||
for (const parent of parentElements) {
|
for (const parent of parentElements) {
|
||||||
if (scrapedData.length >= limit) break;
|
if (scrapedData.length >= limit) break;
|
||||||
const record = {};
|
const record = {};
|
||||||
|
|
||||||
// For each field, select the corresponding element within the parent
|
// For each field, select the corresponding element within the parent
|
||||||
for (const [label, { selector, attribute }] of Object.entries(fields)) {
|
for (const [label, { selector, attribute }] of Object.entries(fields)) {
|
||||||
const fieldElement = parent.querySelector(selector);
|
const fieldElement = parent.querySelector(selector);
|
||||||
|
|
||||||
// Depending on the attribute specified, extract the data
|
if (fieldElement) {
|
||||||
if (fieldElement) {
|
if (attribute === 'innerText') {
|
||||||
if (attribute === 'innerText') {
|
record[label] = fieldElement.innerText.trim();
|
||||||
record[label] = fieldElement.innerText.trim();
|
} else if (attribute === 'innerHTML') {
|
||||||
} else if (attribute === 'innerHTML') {
|
record[label] = fieldElement.innerHTML.trim();
|
||||||
record[label] = fieldElement.innerHTML.trim();
|
} else if (attribute === 'src') {
|
||||||
} else if (attribute === 'src') {
|
record[label] = fieldElement.src;
|
||||||
record[label] = fieldElement.src;
|
} else if (attribute === 'href') {
|
||||||
} else if (attribute === 'href') {
|
record[label] = fieldElement.href;
|
||||||
record[label] = fieldElement.href;
|
} else {
|
||||||
} else {
|
record[label] = fieldElement.getAttribute(attribute);
|
||||||
// Default to attribute retrieval
|
|
||||||
record[label] = fieldElement.getAttribute(attribute);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
// Add the record to the scrapedData array
|
|
||||||
scrapedData.push(record);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pagination && scrapedData.length < limit) {
|
// Add the record to the scrapedData array
|
||||||
switch (pagination.type) {
|
scrapedData.push(record);
|
||||||
case 'scrollDown':
|
}
|
||||||
await scrollDownToLoadMore(listSelector, limit);
|
|
||||||
break;
|
// Check if we need to paginate
|
||||||
case 'scrollUp':
|
if (pagination && scrapedData.length < limit) {
|
||||||
await scrollUpToLoadMore(listSelector, limit);
|
let paginated = false;
|
||||||
break;
|
|
||||||
case 'clickNext':
|
switch (pagination.type) {
|
||||||
await clickNextPagination(pagination.selector, scrapedData, limit);
|
case 'scrollDown':
|
||||||
break;
|
await scrollDownToLoadMore(listSelector, limit);
|
||||||
case 'clickLoadMore':
|
paginated = true;
|
||||||
//await clickLoadMorePagination(pagination.selector);
|
break;
|
||||||
break;
|
case 'scrollUp':
|
||||||
case 'none':
|
await scrollUpToLoadMore(listSelector, limit);
|
||||||
// No more items to load
|
paginated = true;
|
||||||
break;
|
break;
|
||||||
default:
|
case 'clickNext':
|
||||||
console.warn("Unknown pagination type");
|
paginated = await clickNextPagination(pagination.selector, scrapedData, limit);
|
||||||
break;
|
break;
|
||||||
}
|
case 'clickLoadMore':
|
||||||
await new Promise(resolve => setTimeout(resolve, 2000)); // Wait for content to load
|
//await clickLoadMorePagination(pagination.selector);
|
||||||
|
//paginated = true;
|
||||||
|
break;
|
||||||
|
case 'none':
|
||||||
|
// No more items to load
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
console.warn("Unknown pagination type");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (paginated) {
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 2000)); // Wait for content to load
|
||||||
} else {
|
} else {
|
||||||
break; // No more items to load or no pagination
|
break; // No further pagination needed
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
break; // No more items to load or no pagination
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return scrapedData.slice(0, limit); // Return only the limited number of records
|
return scrapedData.slice(0, limit); // Return only the limited number of records
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
Reference in New Issue
Block a user