feat: change scrape schema merge logic

This commit is contained in:
Rohit
2025-04-29 00:29:52 +05:30
parent 82d6f70920
commit a00e69e40a

View File

@@ -432,46 +432,30 @@ export default class Interpreter extends EventEmitter {
if (this.options.debugChannel?.setActionType) { if (this.options.debugChannel?.setActionType) {
this.options.debugChannel.setActionType('scrapeSchema'); this.options.debugChannel.setActionType('scrapeSchema');
} }
await this.ensureScriptsLoaded(page); await this.ensureScriptsLoaded(page);
const scrapeResult = await page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema); const scrapeResult = await page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema);
const newResults = Array.isArray(scrapeResult) ? scrapeResult : [scrapeResult]; if (!this.cumulativeResults || !Array.isArray(this.cumulativeResults)) {
newResults.forEach((result) => { this.cumulativeResults = [];
Object.entries(result).forEach(([key, value]) => { }
const keyExists = this.cumulativeResults.some(
(item) => key in item && item[key] !== undefined if (this.cumulativeResults.length === 0) {
); this.cumulativeResults.push({});
}
if (!keyExists) {
this.cumulativeResults.push({ [key]: value }); const mergedResult = this.cumulativeResults[0];
} const resultToProcess = Array.isArray(scrapeResult) ? scrapeResult[0] : scrapeResult;
});
Object.entries(resultToProcess).forEach(([key, value]) => {
if (value !== undefined) {
mergedResult[key] = value;
}
}); });
const mergedResult: Record<string, string>[] = [ console.log("Updated merged result:", mergedResult);
Object.fromEntries( await this.options.serializableCallback([mergedResult]);
Object.entries(
this.cumulativeResults.reduce((acc, curr) => {
Object.entries(curr).forEach(([key, value]) => {
// If the key doesn't exist or the current value is not undefined, add/update it
if (value !== undefined) {
acc[key] = value;
}
});
return acc;
}, {})
)
)
];
// Log cumulative results after each action
console.log("CUMULATIVE results:", this.cumulativeResults);
console.log("MERGED results:", mergedResult);
await this.options.serializableCallback(mergedResult);
// await this.options.serializableCallback(scrapeResult);
}, },
scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => { scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => {