feat: change scrape schema merge logic
This commit is contained in:
@@ -437,41 +437,25 @@ export default class Interpreter extends EventEmitter {
|
|||||||
|
|
||||||
const scrapeResult = await page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema);
|
const scrapeResult = await page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema);
|
||||||
|
|
||||||
const newResults = Array.isArray(scrapeResult) ? scrapeResult : [scrapeResult];
|
if (!this.cumulativeResults || !Array.isArray(this.cumulativeResults)) {
|
||||||
newResults.forEach((result) => {
|
this.cumulativeResults = [];
|
||||||
Object.entries(result).forEach(([key, value]) => {
|
|
||||||
const keyExists = this.cumulativeResults.some(
|
|
||||||
(item) => key in item && item[key] !== undefined
|
|
||||||
);
|
|
||||||
|
|
||||||
if (!keyExists) {
|
|
||||||
this.cumulativeResults.push({ [key]: value });
|
|
||||||
}
|
}
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
const mergedResult: Record<string, string>[] = [
|
if (this.cumulativeResults.length === 0) {
|
||||||
Object.fromEntries(
|
this.cumulativeResults.push({});
|
||||||
Object.entries(
|
}
|
||||||
this.cumulativeResults.reduce((acc, curr) => {
|
|
||||||
Object.entries(curr).forEach(([key, value]) => {
|
const mergedResult = this.cumulativeResults[0];
|
||||||
// If the key doesn't exist or the current value is not undefined, add/update it
|
const resultToProcess = Array.isArray(scrapeResult) ? scrapeResult[0] : scrapeResult;
|
||||||
|
|
||||||
|
Object.entries(resultToProcess).forEach(([key, value]) => {
|
||||||
if (value !== undefined) {
|
if (value !== undefined) {
|
||||||
acc[key] = value;
|
mergedResult[key] = value;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
return acc;
|
|
||||||
}, {})
|
|
||||||
)
|
|
||||||
)
|
|
||||||
];
|
|
||||||
|
|
||||||
// Log cumulative results after each action
|
console.log("Updated merged result:", mergedResult);
|
||||||
console.log("CUMULATIVE results:", this.cumulativeResults);
|
await this.options.serializableCallback([mergedResult]);
|
||||||
console.log("MERGED results:", mergedResult);
|
|
||||||
|
|
||||||
await this.options.serializableCallback(mergedResult);
|
|
||||||
// await this.options.serializableCallback(scrapeResult);
|
|
||||||
},
|
},
|
||||||
|
|
||||||
scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => {
|
scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => {
|
||||||
|
|||||||
Reference in New Issue
Block a user