feat: change scrape schema merge logic

This commit is contained in:
Rohit
2025-04-29 00:29:52 +05:30
parent 82d6f70920
commit a00e69e40a

View File

@@ -437,41 +437,25 @@ export default class Interpreter extends EventEmitter {
const scrapeResult = await page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema); const scrapeResult = await page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema);
const newResults = Array.isArray(scrapeResult) ? scrapeResult : [scrapeResult]; if (!this.cumulativeResults || !Array.isArray(this.cumulativeResults)) {
newResults.forEach((result) => { this.cumulativeResults = [];
Object.entries(result).forEach(([key, value]) => {
const keyExists = this.cumulativeResults.some(
(item) => key in item && item[key] !== undefined
);
if (!keyExists) {
this.cumulativeResults.push({ [key]: value });
} }
});
});
const mergedResult: Record<string, string>[] = [ if (this.cumulativeResults.length === 0) {
Object.fromEntries( this.cumulativeResults.push({});
Object.entries( }
this.cumulativeResults.reduce((acc, curr) => {
Object.entries(curr).forEach(([key, value]) => { const mergedResult = this.cumulativeResults[0];
// If the key doesn't exist or the current value is not undefined, add/update it const resultToProcess = Array.isArray(scrapeResult) ? scrapeResult[0] : scrapeResult;
Object.entries(resultToProcess).forEach(([key, value]) => {
if (value !== undefined) { if (value !== undefined) {
acc[key] = value; mergedResult[key] = value;
} }
}); });
return acc;
}, {})
)
)
];
// Log cumulative results after each action console.log("Updated merged result:", mergedResult);
console.log("CUMULATIVE results:", this.cumulativeResults); await this.options.serializableCallback([mergedResult]);
console.log("MERGED results:", mergedResult);
await this.options.serializableCallback(mergedResult);
// await this.options.serializableCallback(scrapeResult);
}, },
scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => { scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => {