Merge branch 'develop' into iframe

This commit is contained in:
Rohit
2025-01-04 15:51:51 +05:30
committed by GitHub
56 changed files with 3858 additions and 657 deletions

View File

@@ -1,6 +1,6 @@
{
"name": "maxun-core",
"version": "0.0.6",
"version": "0.0.7",
"description": "Core package for Maxun, responsible for data extraction",
"main": "build/index.js",
"typings": "build/index.d.ts",

View File

@@ -712,7 +712,6 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
return scrapedData;
};
/**
* Gets all children of the elements matching the listSelector,
* returning their CSS selectors and innerText.

View File

@@ -111,13 +111,21 @@ export default class Interpreter extends EventEmitter {
private async applyAdBlocker(page: Page): Promise<void> {
if (this.blocker) {
await this.blocker.enableBlockingInPage(page);
try {
await this.blocker.enableBlockingInPage(page);
} catch (err) {
this.log(`Ad-blocker operation failed:`, Level.ERROR);
}
}
}
private async disableAdBlocker(page: Page): Promise<void> {
if (this.blocker) {
await this.blocker.disableBlockingInPage(page);
try {
await this.blocker.disableBlockingInPage(page);
} catch (err) {
this.log(`Ad-blocker operation failed:`, Level.ERROR);
}
}
}
@@ -678,7 +686,11 @@ export default class Interpreter extends EventEmitter {
workflowCopy = this.removeIframeSelectors(workflowCopy);
// apply ad-blocker to the current page
await this.applyAdBlocker(p);
try {
await this.applyAdBlocker(p);
} catch (error) {
this.log(`Failed to apply ad-blocker: ${error.message}`, Level.ERROR);
}
const usedActions: string[] = [];
let selectors: string[] = [];
let lastAction = null;

View File

@@ -3,36 +3,36 @@
*/
export default class Concurrency {
/**
* Maximum number of workers running in parallel. If set to `null`, there is no limit.
*/
* Maximum number of workers running in parallel. If set to `null`, there is no limit.
*/
maxConcurrency: number = 1;
/**
* Number of currently active workers.
*/
* Number of currently active workers.
*/
activeWorkers: number = 0;
/**
* Queue of jobs waiting to be completed.
*/
* Queue of jobs waiting to be completed.
*/
private jobQueue: Function[] = [];
/**
* "Resolve" callbacks of the waitForCompletion() promises.
*/
* "Resolve" callbacks of the waitForCompletion() promises.
*/
private waiting: Function[] = [];
/**
* Constructs a new instance of concurrency manager.
* @param {number} maxConcurrency Maximum number of workers running in parallel.
*/
* Constructs a new instance of concurrency manager.
* @param {number} maxConcurrency Maximum number of workers running in parallel.
*/
constructor(maxConcurrency: number) {
this.maxConcurrency = maxConcurrency;
}
/**
* Takes a waiting job out of the queue and runs it.
*/
* Takes a waiting job out of the queue and runs it.
*/
private runNextJob(): void {
const job = this.jobQueue.pop();
@@ -53,12 +53,12 @@ export default class Concurrency {
}
/**
* Pass a job (a time-demanding async function) to the concurrency manager. \
* The time of the job's execution depends on the concurrency manager itself
* (given a generous enough `maxConcurrency` value, it might be immediate,
* but this is not guaranteed).
* @param worker Async function to be executed (job to be processed).
*/
* Pass a job (a time-demanding async function) to the concurrency manager. \
* The time of the job's execution depends on the concurrency manager itself
* (given a generous enough `maxConcurrency` value, it might be immediate,
* but this is not guaranteed).
* @param worker Async function to be executed (job to be processed).
*/
addJob(job: () => Promise<any>): void {
// console.debug("Adding a worker!");
this.jobQueue.push(job);
@@ -72,11 +72,11 @@ export default class Concurrency {
}
/**
* Waits until there is no running nor waiting job. \
* If the concurrency manager is idle at the time of calling this function,
* it waits until at least one job is completed (can be "presubscribed").
* @returns Promise, resolved after there is no running/waiting worker.
*/
* Waits until there is no running nor waiting job. \
* If the concurrency manager is idle at the time of calling this function,
* it waits until at least one job is completed (can be "presubscribed").
* @returns Promise, resolved after there is no running/waiting worker.
*/
waitForCompletion(): Promise<void> {
return new Promise((res) => {
this.waiting.push(res);