chore: lint

This commit is contained in:
karishmas6
2024-08-16 23:43:51 +05:30
parent 87476c3e35
commit b2678759db

View File

@@ -167,40 +167,40 @@ async function scrollUpToLoadMore(selector, limit) {
async function clickNextPagination(selector, scrapedData, limit) { async function clickNextPagination(selector, scrapedData, limit) {
// Check if the limit is already met // Check if the limit is already met
if (scrapedData.length >= limit) { if (scrapedData.length >= limit) {
return false; // Return false to indicate no further action is needed return false; // Return false to indicate no further action is needed
} }
// Check if a single "Next" button exists // Check if a single "Next" button exists
let nextButton = document.querySelector(selector); let nextButton = document.querySelector(selector);
if (nextButton) { if (nextButton) {
nextButton.click(); nextButton.click();
return true; // Indicate that pagination occurred return true; // Indicate that pagination occurred
} else { } else {
// Handle pagination with numbers // Handle pagination with numbers
const paginationButtons = document.querySelectorAll(selector); const paginationButtons = document.querySelectorAll(selector);
let clicked = false; let clicked = false;
// Loop through pagination buttons to find the current active page // Loop through pagination buttons to find the current active page
for (let i = 0; i < paginationButtons.length - 1; i++) { for (let i = 0; i < paginationButtons.length - 1; i++) {
const button = paginationButtons[i]; const button = paginationButtons[i];
if (button.classList.contains('active')) { if (button.classList.contains('active')) {
// Click the next button if available // Click the next button if available
const nextButtonInPagination = paginationButtons[i + 1]; const nextButtonInPagination = paginationButtons[i + 1];
if (nextButtonInPagination) { if (nextButtonInPagination) {
nextButtonInPagination.click(); nextButtonInPagination.click();
clicked = true; clicked = true;
break; break;
} }
}
} }
}
// If no next button was clicked, we might be on the last page // If no next button was clicked, we might be on the last page
if (!clicked) { if (!clicked) {
throw new Error("No more items to load or pagination has ended."); throw new Error("No more items to load or pagination has ended.");
} }
//return clicked; // Indicate whether pagination occurred return clicked; // Indicate whether pagination occurred
} }
} }
@@ -339,72 +339,81 @@ async function clickNextPagination(selector, scrapedData, limit) {
* @param {boolean} [config.flexible=false] - Whether to use flexible matching for field selectors * @param {boolean} [config.flexible=false] - Whether to use flexible matching for field selectors
* @returns {Array.<Array.<Object>>} Array of arrays of scraped items, one sub-array per list * @returns {Array.<Array.<Object>>} Array of arrays of scraped items, one sub-array per list
*/ */
window.scrapeList = async function({ listSelector, fields, limit = 10, pagination = null }) { window.scrapeList = async function ({ listSelector, fields, limit = 10, pagination = null }) {
const scrapedData = []; const scrapedData = [];
while (scrapedData.length < limit) { while (scrapedData.length < limit) {
// Get all parent elements matching the listSelector // Get all parent elements matching the listSelector
const parentElements = Array.from(document.querySelectorAll(listSelector)); const parentElements = Array.from(document.querySelectorAll(listSelector));
// Iterate through each parent element // Iterate through each parent element
for (const parent of parentElements) { for (const parent of parentElements) {
if (scrapedData.length >= limit) break; if (scrapedData.length >= limit) break;
const record = {}; const record = {};
// For each field, select the corresponding element within the parent // For each field, select the corresponding element within the parent
for (const [label, { selector, attribute }] of Object.entries(fields)) { for (const [label, { selector, attribute }] of Object.entries(fields)) {
const fieldElement = parent.querySelector(selector); const fieldElement = parent.querySelector(selector);
// Depending on the attribute specified, extract the data if (fieldElement) {
if (fieldElement) { if (attribute === 'innerText') {
if (attribute === 'innerText') { record[label] = fieldElement.innerText.trim();
record[label] = fieldElement.innerText.trim(); } else if (attribute === 'innerHTML') {
} else if (attribute === 'innerHTML') { record[label] = fieldElement.innerHTML.trim();
record[label] = fieldElement.innerHTML.trim(); } else if (attribute === 'src') {
} else if (attribute === 'src') { record[label] = fieldElement.src;
record[label] = fieldElement.src; } else if (attribute === 'href') {
} else if (attribute === 'href') { record[label] = fieldElement.href;
record[label] = fieldElement.href; } else {
} else { record[label] = fieldElement.getAttribute(attribute);
// Default to attribute retrieval
record[label] = fieldElement.getAttribute(attribute);
}
}
} }
}
// Add the record to the scrapedData array
scrapedData.push(record);
} }
if (pagination && scrapedData.length < limit) { // Add the record to the scrapedData array
switch (pagination.type) { scrapedData.push(record);
case 'scrollDown': }
await scrollDownToLoadMore(listSelector, limit);
break; // Check if we need to paginate
case 'scrollUp': if (pagination && scrapedData.length < limit) {
await scrollUpToLoadMore(listSelector, limit); let paginated = false;
break;
case 'clickNext': switch (pagination.type) {
await clickNextPagination(pagination.selector, scrapedData, limit); case 'scrollDown':
break; await scrollDownToLoadMore(listSelector, limit);
case 'clickLoadMore': paginated = true;
//await clickLoadMorePagination(pagination.selector); break;
break; case 'scrollUp':
case 'none': await scrollUpToLoadMore(listSelector, limit);
// No more items to load paginated = true;
break; break;
default: case 'clickNext':
console.warn("Unknown pagination type"); paginated = await clickNextPagination(pagination.selector, scrapedData, limit);
break; break;
} case 'clickLoadMore':
await new Promise(resolve => setTimeout(resolve, 2000)); // Wait for content to load //await clickLoadMorePagination(pagination.selector);
//paginated = true;
break;
case 'none':
// No more items to load
break;
default:
console.warn("Unknown pagination type");
break;
}
if (paginated) {
await new Promise(resolve => setTimeout(resolve, 2000)); // Wait for content to load
} else { } else {
break; // No more items to load or no pagination break; // No further pagination needed
} }
} else {
break; // No more items to load or no pagination
}
} }
return scrapedData.slice(0, limit); // Return only the limited number of records return scrapedData.slice(0, limit); // Return only the limited number of records
}; };
/** /**