fix: timeout mechanism, revamp working button logic

This commit is contained in:
Rohit Rajan
2025-09-28 23:25:32 +05:30
parent a83b69cfc6
commit 47d1b24de3

View File

@@ -617,6 +617,13 @@ export default class Interpreter extends EventEmitter {
if (methodName === 'waitForLoadState') { if (methodName === 'waitForLoadState') {
try { try {
let args = step.args;
if (Array.isArray(args) && args.length === 1) {
args = [args[0], { timeout: 30000 }];
} else if (!Array.isArray(args)) {
args = [args, { timeout: 30000 }];
}
await executeAction(invokee, methodName, step.args); await executeAction(invokee, methodName, step.args);
} catch (error) { } catch (error) {
await executeAction(invokee, methodName, 'domcontentloaded'); await executeAction(invokee, methodName, 'domcontentloaded');
@@ -677,7 +684,19 @@ export default class Interpreter extends EventEmitter {
return; return;
} }
const results = await page.evaluate((cfg) => window.scrapeList(cfg), config); const evaluationPromise = page.evaluate((cfg) => window.scrapeList(cfg), config);
const timeoutPromise = new Promise<any[]>((_, reject) =>
setTimeout(() => reject(new Error('Page evaluation timeout')), 10000)
);
let results;
try {
results = await Promise.race([evaluationPromise, timeoutPromise]);
} catch (error) {
debugLog(`Page evaluation failed: ${error.message}`);
return;
}
const newResults = results.filter(item => { const newResults = results.filter(item => {
const uniqueKey = JSON.stringify(item); const uniqueKey = JSON.stringify(item);
if (scrapedItems.has(uniqueKey)) return false; if (scrapedItems.has(uniqueKey)) return false;
@@ -698,43 +717,94 @@ export default class Interpreter extends EventEmitter {
return false; return false;
}; };
// Helper function to detect if a selector is XPath
const isXPathSelector = (selector: string): boolean => {
return selector.startsWith('//') ||
selector.startsWith('/') ||
selector.startsWith('./') ||
selector.includes('contains(@') ||
selector.includes('[count(') ||
selector.includes('@class=') ||
selector.includes('@id=') ||
selector.includes(' and ') ||
selector.includes(' or ');
};
// Helper function to wait for selector (CSS or XPath)
const waitForSelectorUniversal = async (selector: string, options: any = {}): Promise<ElementHandle | null> => {
try {
if (isXPathSelector(selector)) {
// Use XPath locator
const locator = page.locator(`xpath=${selector}`);
await locator.waitFor({
state: 'attached',
timeout: options.timeout || 10000
});
return await locator.elementHandle();
} else {
// Use CSS selector
return await page.waitForSelector(selector, {
state: 'attached',
timeout: options.timeout || 10000
});
}
} catch (error) {
return null;
}
};
// Enhanced button finder with retry mechanism // Enhanced button finder with retry mechanism
const findWorkingButton = async (selectors: string[]): Promise<{ const findWorkingButton = async (selectors: string[]): Promise<{
button: ElementHandle | null, button: ElementHandle | null,
workingSelector: string | null, workingSelector: string | null,
updatedSelectors: string[] updatedSelectors: string[]
}> => { }> => {
let updatedSelectors = [...selectors]; const startTime = Date.now();
const MAX_BUTTON_SEARCH_TIME = 15000;
let updatedSelectors = [...selectors];
for (let i = 0; i < selectors.length; i++) { for (let i = 0; i < selectors.length; i++) {
if (Date.now() - startTime > MAX_BUTTON_SEARCH_TIME) {
debugLog(`Button search timeout reached (${MAX_BUTTON_SEARCH_TIME}ms), aborting`);
break;
}
const selector = selectors[i]; const selector = selectors[i];
let retryCount = 0; let retryCount = 0;
let selectorSuccess = false; let selectorSuccess = false;
while (retryCount < MAX_RETRIES && !selectorSuccess) { while (retryCount < MAX_RETRIES && !selectorSuccess) {
try { try {
const button = await page.waitForSelector(selector, { const button = await waitForSelectorUniversal(selector, { timeout: 2000 });
state: 'attached',
timeout: 10000
});
if (button) { if (button) {
debugLog('Found working selector:', selector); debugLog('Found working selector:', selector);
return { return {
button, button,
workingSelector: selector, workingSelector: selector,
updatedSelectors updatedSelectors
}; };
} else {
retryCount++;
debugLog(`Selector "${selector}" not found: attempt ${retryCount}/${MAX_RETRIES}`);
if (retryCount < MAX_RETRIES) {
await page.waitForTimeout(RETRY_DELAY);
} else {
debugLog(`Removing failed selector "${selector}" after ${MAX_RETRIES} attempts`);
updatedSelectors = updatedSelectors.filter(s => s !== selector);
selectorSuccess = true;
}
} }
} catch (error) { } catch (error) {
retryCount++; retryCount++;
debugLog(`Selector "${selector}" failed: attempt ${retryCount}/${MAX_RETRIES}`); debugLog(`Selector "${selector}" error: attempt ${retryCount}/${MAX_RETRIES} - ${error.message}`);
if (retryCount < MAX_RETRIES) { if (retryCount < MAX_RETRIES) {
await page.waitForTimeout(RETRY_DELAY); await page.waitForTimeout(RETRY_DELAY);
} else { } else {
debugLog(`Removing failed selector "${selector}" after ${MAX_RETRIES} attempts`); debugLog(`Removing failed selector "${selector}" after ${MAX_RETRIES} attempts`);
updatedSelectors = updatedSelectors.filter(s => s !== selector); updatedSelectors = updatedSelectors.filter(s => s !== selector);
selectorSuccess = true;
} }
} }
} }
@@ -1354,9 +1424,35 @@ export default class Interpreter extends EventEmitter {
} }
private async ensureScriptsLoaded(page: Page) { private async ensureScriptsLoaded(page: Page) {
const isScriptLoaded = await page.evaluate(() => typeof window.scrape === 'function' && typeof window.scrapeSchema === 'function' && typeof window.scrapeList === 'function' && typeof window.scrapeListAuto === 'function' && typeof window.scrollDown === 'function' && typeof window.scrollUp === 'function'); try {
if (!isScriptLoaded) { const evaluationPromise = page.evaluate(() =>
await page.addInitScript({ path: path.join(__dirname, 'browserSide', 'scraper.js') }); typeof window.scrape === 'function' &&
typeof window.scrapeSchema === 'function' &&
typeof window.scrapeList === 'function' &&
typeof window.scrapeListAuto === 'function' &&
typeof window.scrollDown === 'function' &&
typeof window.scrollUp === 'function'
);
const timeoutPromise = new Promise<boolean>((_, reject) =>
setTimeout(() => reject(new Error('Script check timeout')), 3000)
);
const isScriptLoaded = await Promise.race([
evaluationPromise,
timeoutPromise
]);
if (!isScriptLoaded) {
await page.addInitScript({ path: path.join(__dirname, 'browserSide', 'scraper.js') });
}
} catch (error) {
this.log(`Script check failed, adding script anyway: ${error.message}`, Level.WARN);
try {
await page.addInitScript({ path: path.join(__dirname, 'browserSide', 'scraper.js') });
} catch (scriptError) {
this.log(`Failed to add script: ${scriptError.message}`, Level.ERROR);
}
} }
} }