From a00e69e40af259bbf1ecb548fd2bf9da337e8ec0 Mon Sep 17 00:00:00 2001 From: Rohit Date: Tue, 29 Apr 2025 00:29:52 +0530 Subject: [PATCH] feat: change scrape schema merge logic --- maxun-core/src/interpret.ts | 54 +++++++++++++------------------------ 1 file changed, 19 insertions(+), 35 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 4e5e2be9..5c92a767 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -432,46 +432,30 @@ export default class Interpreter extends EventEmitter { if (this.options.debugChannel?.setActionType) { this.options.debugChannel.setActionType('scrapeSchema'); } - + await this.ensureScriptsLoaded(page); const scrapeResult = await page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema); - const newResults = Array.isArray(scrapeResult) ? scrapeResult : [scrapeResult]; - newResults.forEach((result) => { - Object.entries(result).forEach(([key, value]) => { - const keyExists = this.cumulativeResults.some( - (item) => key in item && item[key] !== undefined - ); - - if (!keyExists) { - this.cumulativeResults.push({ [key]: value }); - } - }); + if (!this.cumulativeResults || !Array.isArray(this.cumulativeResults)) { + this.cumulativeResults = []; + } + + if (this.cumulativeResults.length === 0) { + this.cumulativeResults.push({}); + } + + const mergedResult = this.cumulativeResults[0]; + const resultToProcess = Array.isArray(scrapeResult) ? scrapeResult[0] : scrapeResult; + + Object.entries(resultToProcess).forEach(([key, value]) => { + if (value !== undefined) { + mergedResult[key] = value; + } }); - - const mergedResult: Record[] = [ - Object.fromEntries( - Object.entries( - this.cumulativeResults.reduce((acc, curr) => { - Object.entries(curr).forEach(([key, value]) => { - // If the key doesn't exist or the current value is not undefined, add/update it - if (value !== undefined) { - acc[key] = value; - } - }); - return acc; - }, {}) - ) - ) - ]; - - // Log cumulative results after each action - console.log("CUMULATIVE results:", this.cumulativeResults); - console.log("MERGED results:", mergedResult); - - await this.options.serializableCallback(mergedResult); - // await this.options.serializableCallback(scrapeResult); + + console.log("Updated merged result:", mergedResult); + await this.options.serializableCallback([mergedResult]); }, scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => {