feat: add sdk selector validation, pagination detection

This commit is contained in:
Rohit Rajan
2025-12-07 22:13:34 +05:30
parent 63c9d53272
commit d64cc4dec3
6 changed files with 4545 additions and 3 deletions

715
server/src/api/sdk.ts Normal file
View File

@@ -0,0 +1,715 @@
/**
* SDK API Routes
* Separate API endpoints specifically for Maxun SDKs
* All routes require API key authentication
*/
import { Router, Request, Response } from 'express';
import { requireAPIKey } from "../middlewares/api";
import Robot from "../models/Robot";
import Run from "../models/Run";
import { v4 as uuid } from 'uuid';
import { WorkflowFile } from "maxun-core";
import logger from "../logger";
import { capture } from "../utils/analytics";
import { handleRunRecording } from "./record";
import { WorkflowEnricher } from "../sdk/workflowEnricher";
import { cancelScheduledWorkflow, scheduleWorkflow } from '../storage/schedule';
import { computeNextRun } from "../utils/schedule";
import moment from 'moment-timezone';
const router = Router();
interface AuthenticatedRequest extends Request {
user?: any;
}
/**
* Create a new robot programmatically
* POST /api/sdk/robots
*/
router.post("/sdk/robots", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
try {
const user = req.user;
const workflowFile: WorkflowFile = req.body;
if (!workflowFile.meta || !workflowFile.workflow) {
return res.status(400).json({
error: "Invalid workflow structure. Expected { meta, workflow }"
});
}
if (!workflowFile.meta.name) {
return res.status(400).json({
error: "Robot name is required in meta.name"
});
}
const type = (workflowFile.meta as any).type || 'extract';
let enrichedWorkflow: any[] = [];
let extractedUrl: string | undefined;
if (type === 'scrape') {
enrichedWorkflow = [];
extractedUrl = (workflowFile.meta as any).url;
if (!extractedUrl) {
return res.status(400).json({
error: "URL is required for scrape robots"
});
}
} else {
const enrichResult = await WorkflowEnricher.enrichWorkflow(workflowFile.workflow);
if (!enrichResult.success) {
return res.status(400).json({
error: "Workflow validation failed",
details: enrichResult.errors
});
}
enrichedWorkflow = enrichResult.workflow!;
extractedUrl = enrichResult.url;
}
const robotId = uuid();
const metaId = uuid();
const robotMeta: any = {
name: workflowFile.meta.name,
id: metaId,
createdAt: new Date().toISOString(),
updatedAt: new Date().toISOString(),
pairs: enrichedWorkflow.length,
params: [],
type,
url: extractedUrl,
formats: (workflowFile.meta as any).formats || [],
};
const robot = await Robot.create({
id: robotId,
userId: user.id,
recording_meta: robotMeta,
recording: {
workflow: enrichedWorkflow
}
});
capture("maxun-oss-robot-created", {
robot_meta: robot.recording_meta,
recording: robot.recording,
});
return res.status(201).json({
data: robot,
message: "Robot created successfully"
});
} catch (error: any) {
logger.error("[SDK] Error creating robot:", error);
return res.status(500).json({
error: "Failed to create robot",
message: error.message
});
}
});
/**
* List all robots for the authenticated user
* GET /api/sdk/robots
*/
router.get("/sdk/robots", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
try {
const robots = await Robot.findAll();
return res.status(200).json({
data: robots
});
} catch (error: any) {
logger.error("[SDK] Error listing robots:", error);
return res.status(500).json({
error: "Failed to list robots",
message: error.message
});
}
});
/**
* Get a specific robot by ID
* GET /api/sdk/robots/:id
*/
router.get("/sdk/robots/:id", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
try {
const robotId = req.params.id;
const robot = await Robot.findOne({
where: {
'recording_meta.id': robotId
}
});
if (!robot) {
return res.status(404).json({
error: "Robot not found"
});
}
return res.status(200).json({
data: robot
});
} catch (error: any) {
logger.error("[SDK] Error getting robot:", error);
return res.status(500).json({
error: "Failed to get robot",
message: error.message
});
}
});
/**
* Update a robot
* PUT /api/sdk/robots/:id
*/
router.put("/sdk/robots/:id", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
try {
const robotId = req.params.id;
const updates = req.body;
const robot = await Robot.findOne({
where: {
'recording_meta.id': robotId
}
});
if (!robot) {
return res.status(404).json({
error: "Robot not found"
});
}
const updateData: any = {};
if (updates.workflow) {
updateData.recording = {
workflow: updates.workflow
};
}
if (updates.meta) {
updateData.recording_meta = {
...robot.recording_meta,
...updates.meta,
updatedAt: new Date().toISOString()
};
}
if (updates.google_sheet_email !== undefined) {
updateData.google_sheet_email = updates.google_sheet_email;
}
if (updates.google_sheet_name !== undefined) {
updateData.google_sheet_name = updates.google_sheet_name;
}
if (updates.airtable_base_id !== undefined) {
updateData.airtable_base_id = updates.airtable_base_id;
}
if (updates.airtable_table_name !== undefined) {
updateData.airtable_table_name = updates.airtable_table_name;
}
if (updates.schedule !== undefined) {
if (updates.schedule === null) {
try {
await cancelScheduledWorkflow(robotId);
} catch (cancelError) {
logger.warn(`[SDK] Failed to cancel existing schedule for robot ${robotId}: ${cancelError}`);
}
updateData.schedule = null;
} else {
const {
runEvery,
runEveryUnit,
timezone,
startFrom = 'SUNDAY',
dayOfMonth = 1,
atTimeStart = '00:00',
atTimeEnd = '23:59'
} = updates.schedule;
if (!runEvery || !runEveryUnit || !timezone) {
return res.status(400).json({
error: "Missing required schedule parameters: runEvery, runEveryUnit, timezone"
});
}
if (!moment.tz.zone(timezone)) {
return res.status(400).json({
error: "Invalid timezone"
});
}
const [startHours, startMinutes] = atTimeStart.split(':').map(Number);
const [endHours, endMinutes] = atTimeEnd.split(':').map(Number);
if (isNaN(startHours) || isNaN(startMinutes) || isNaN(endHours) || isNaN(endMinutes) ||
startHours < 0 || startHours > 23 || startMinutes < 0 || startMinutes > 59 ||
endHours < 0 || endHours > 23 || endMinutes < 0 || endMinutes > 59) {
return res.status(400).json({ error: 'Invalid time format. Expected HH:MM (e.g., 09:30)' });
}
const days = ['SUNDAY', 'MONDAY', 'TUESDAY', 'WEDNESDAY', 'THURSDAY', 'FRIDAY', 'SATURDAY'];
if (!days.includes(startFrom)) {
return res.status(400).json({ error: 'Invalid startFrom day. Must be one of: SUNDAY, MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY' });
}
let cronExpression;
const dayIndex = days.indexOf(startFrom);
switch (runEveryUnit) {
case 'MINUTES':
cronExpression = `*/${runEvery} * * * *`;
break;
case 'HOURS':
cronExpression = `${startMinutes} */${runEvery} * * *`;
break;
case 'DAYS':
cronExpression = `${startMinutes} ${startHours} */${runEvery} * *`;
break;
case 'WEEKS':
cronExpression = `${startMinutes} ${startHours} * * ${dayIndex}`;
break;
case 'MONTHS':
cronExpression = `${startMinutes} ${startHours} ${dayOfMonth} */${runEvery} *`;
if (startFrom !== 'SUNDAY') {
cronExpression += ` ${dayIndex}`;
}
break;
default:
return res.status(400).json({
error: "Invalid runEveryUnit. Must be one of: MINUTES, HOURS, DAYS, WEEKS, MONTHS"
});
}
try {
await cancelScheduledWorkflow(robotId);
} catch (cancelError) {
logger.warn(`[SDK] Failed to cancel existing schedule for robot ${robotId}: ${cancelError}`);
}
try {
await scheduleWorkflow(robotId, cronExpression, timezone);
} catch (scheduleError: any) {
logger.error(`[SDK] Failed to schedule workflow for robot ${robotId}: ${scheduleError.message}`);
return res.status(500).json({
error: "Failed to schedule workflow",
message: scheduleError.message
});
}
const nextRunAt = computeNextRun(cronExpression, timezone);
updateData.schedule = {
runEvery,
runEveryUnit,
timezone,
startFrom,
dayOfMonth,
atTimeStart,
atTimeEnd,
cronExpression,
lastRunAt: undefined,
nextRunAt: nextRunAt || undefined,
};
logger.info(`[SDK] Scheduled robot ${robotId} with cron: ${cronExpression} in timezone: ${timezone}`);
}
}
if (updates.webhooks !== undefined) {
updateData.webhooks = updates.webhooks;
}
if (updates.proxy_url !== undefined) {
updateData.proxy_url = updates.proxy_url;
}
if (updates.proxy_username !== undefined) {
updateData.proxy_username = updates.proxy_username;
}
if (updates.proxy_password !== undefined) {
updateData.proxy_password = updates.proxy_password;
}
await robot.update(updateData);
logger.info(`[SDK] Robot updated: ${robotId}`);
return res.status(200).json({
data: robot,
message: "Robot updated successfully"
});
} catch (error: any) {
logger.error("[SDK] Error updating robot:", error);
return res.status(500).json({
error: "Failed to update robot",
message: error.message
});
}
});
/**
* Delete a robot
* DELETE /api/sdk/robots/:id
*/
router.delete("/sdk/robots/:id", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
try {
const robotId = req.params.id;
const robot = await Robot.findOne({
where: {
'recording_meta.id': robotId
}
});
if (!robot) {
return res.status(404).json({
error: "Robot not found"
});
}
await Run.destroy({
where: {
robotMetaId: robot.recording_meta.id
}
});
await robot.destroy();
logger.info(`[SDK] Robot deleted: ${robotId}`);
capture(
'maxun-oss-robot-deleted',
{
robotId: robotId,
user_id: req.user?.id,
deleted_at: new Date().toISOString(),
}
)
return res.status(200).json({
message: "Robot deleted successfully"
});
} catch (error: any) {
logger.error("[SDK] Error deleting robot:", error);
return res.status(500).json({
error: "Failed to delete robot",
message: error.message
});
}
});
/**
* Execute a robot
* POST /api/sdk/robots/:id/execute
*/
router.post("/sdk/robots/:id/execute", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
try {
const user = req.user;
const robotId = req.params.id;
logger.info(`[SDK] Starting execution for robot ${robotId}`);
const runId = await handleRunRecording(robotId, user.id.toString());
if (!runId) {
throw new Error('Failed to start robot execution');
}
const run = await waitForRunCompletion(runId, user.id.toString());
let listData: any[] = [];
if (run.serializableOutput?.scrapeList) {
const scrapeList: any = run.serializableOutput.scrapeList;
if (scrapeList.scrapeList && Array.isArray(scrapeList.scrapeList)) {
listData = scrapeList.scrapeList;
}
else if (Array.isArray(scrapeList)) {
listData = scrapeList;
}
else if (typeof scrapeList === 'object') {
const listValues = Object.values(scrapeList);
if (listValues.length > 0 && Array.isArray(listValues[0])) {
listData = listValues[0] as any[];
}
}
}
return res.status(200).json({
data: {
runId: run.runId,
status: run.status,
data: {
textData: run.serializableOutput?.scrapeSchema || {},
listData: listData
},
screenshots: Object.values(run.binaryOutput || {})
}
});
} catch (error: any) {
logger.error("[SDK] Error executing robot:", error);
return res.status(500).json({
error: "Failed to execute robot",
message: error.message
});
}
});
/**
* Wait for run completion
*/
async function waitForRunCompletion(runId: string, interval: number = 2000) {
const MAX_WAIT_TIME = 180 * 60 * 1000;
const startTime = Date.now();
while (true) {
if (Date.now() - startTime > MAX_WAIT_TIME) {
throw new Error('Run completion timeout after 3 hours');
}
const run = await Run.findOne({ where: { runId } });
if (!run) throw new Error('Run not found');
if (run.status === 'success') {
return run.toJSON();
} else if (run.status === 'failed') {
throw new Error('Run failed');
} else if (run.status === 'aborted') {
throw new Error('Run was aborted');
}
await new Promise(resolve => setTimeout(resolve, interval));
}
}
/**
* Get all runs for a robot
* GET /api/sdk/robots/:id/runs
*/
router.get("/sdk/robots/:id/runs", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
try {
const robotId = req.params.id;
const robot = await Robot.findOne({
where: {
'recording_meta.id': robotId
}
});
if (!robot) {
return res.status(404).json({
error: "Robot not found"
});
}
const runs = await Run.findAll({
where: {
robotMetaId: robot.recording_meta.id
},
order: [['startedAt', 'DESC']]
});
return res.status(200).json({
data: runs
});
} catch (error: any) {
logger.error("[SDK] Error getting runs:", error);
return res.status(500).json({
error: "Failed to get runs",
message: error.message
});
}
});
/**
* Get a specific run
* GET /api/sdk/robots/:id/runs/:runId
*/
router.get("/sdk/robots/:id/runs/:runId", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
try {
const robotId = req.params.id;
const runId = req.params.runId;
const robot = await Robot.findOne({
where: {
'recording_meta.id': robotId
}
});
if (!robot) {
return res.status(404).json({
error: "Robot not found"
});
}
const run = await Run.findOne({
where: {
runId: runId,
robotMetaId: robot.recording_meta.id
}
});
if (!run) {
return res.status(404).json({
error: "Run not found"
});
}
return res.status(200).json({
data: run
});
} catch (error: any) {
logger.error("[SDK] Error getting run:", error);
return res.status(500).json({
error: "Failed to get run",
message: error.message
});
}
});
/**
* Abort a running execution
* POST /api/sdk/robots/:id/runs/:runId/abort
*/
router.post("/sdk/robots/:id/runs/:runId/abort", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
try {
const robotId = req.params.id;
const runId = req.params.runId;
const robot = await Robot.findOne({
where: {
'recording_meta.id': robotId
}
});
if (!robot) {
return res.status(404).json({
error: "Robot not found"
});
}
const run = await Run.findOne({
where: {
runId: runId,
robotMetaId: robot.recording_meta.id
}
});
if (!run) {
return res.status(404).json({
error: "Run not found"
});
}
if (run.status !== 'running' && run.status !== 'queued') {
return res.status(400).json({
error: "Run is not in a state that can be aborted",
currentStatus: run.status
});
}
await run.update({ status: 'aborted' });
logger.info(`[SDK] Run ${runId} marked for abortion`);
return res.status(200).json({
message: "Run abortion initiated",
data: run
});
} catch (error: any) {
logger.error("[SDK] Error aborting run:", error);
return res.status(500).json({
error: "Failed to abort run",
message: error.message
});
}
});
/**
* LLM-based extraction - generate workflow from natural language prompt
* POST /api/sdk/extract/llm
*/
router.post("/sdk/extract/llm", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
try {
const user = req.user.id
const { url, prompt, llmProvider, llmModel, llmApiKey, llmBaseUrl, robotName } = req.body;
if (!url || !prompt) {
return res.status(400).json({
error: "URL and prompt are required"
});
}
const workflowResult = await WorkflowEnricher.generateWorkflowFromPrompt(url, prompt, {
provider: llmProvider,
model: llmModel,
apiKey: llmApiKey,
baseUrl: llmBaseUrl
});
if (!workflowResult.success || !workflowResult.workflow) {
return res.status(400).json({
error: "Failed to generate workflow from prompt",
details: workflowResult.errors
});
}
const robotId = uuid();
const metaId = uuid();
const robotMeta: any = {
name: robotName || `LLM Extract: ${prompt.substring(0, 50)}`,
id: metaId,
createdAt: new Date().toISOString(),
updatedAt: new Date().toISOString(),
pairs: workflowResult.workflow.length,
params: [],
type: 'extract',
url: workflowResult.url,
};
const robot = await Robot.create({
id: robotId,
userId: user.id,
recording_meta: robotMeta,
recording: {
workflow: workflowResult.workflow
},
});
logger.info(`[SDK] Persistent robot created: ${metaId} for LLM extraction`);
capture("maxun-oss-robot-created", {
robot_meta: robot.recording_meta,
recording: robot.recording,
});
return res.status(200).json({
success: true,
data: {
robotId: metaId,
name: robotMeta.name,
description: prompt,
url: workflowResult.url,
workflow: workflowResult.workflow
}
});
} catch (error: any) {
logger.error("[SDK] Error in LLM extraction:", error);
return res.status(500).json({
error: "Failed to perform LLM extraction",
message: error.message
});
}
});
export default router;

View File

@@ -894,7 +894,7 @@ router.put('/schedule/:id/', requireSignIn, async (req: AuthenticatedRequest, re
logger.log('warn', `Failed to cancel existing schedule for robot ${id}: ${cancelError}`);
}
const jobId = await scheduleWorkflow(id, req.user.id, cronExpression, timezone);
await scheduleWorkflow(id, cronExpression, timezone);
const nextRunAt = computeNextRun(cronExpression, timezone);

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,576 @@
/**
* Selector Validator
* Validates and enriches selectors with metadata using Playwright page instance
*/
import { Page } from 'playwright-core';
import logger from '../logger';
interface SelectorInput {
selector: string;
attribute?: string;
}
interface EnrichedSelector {
tag: string;
isShadow: boolean;
selector: string;
attribute: string;
}
interface ValidationResult {
valid: boolean;
enriched?: EnrichedSelector;
error?: string;
}
export class SelectorValidator {
private page: Page | null = null;
/**
* Initialize with an existing Page instance and navigate to URL
* @param page Page instance from RemoteBrowser
* @param url URL to navigate to
*/
async initialize(page: Page, url: string): Promise<void> {
this.page = page;
await this.page.goto(url, { waitUntil: 'networkidle', timeout: 30000 });
logger.info(`Navigated to ${url} using RemoteBrowser page`);
}
/**
* Validate and enrich a single selector
*/
async validateSelector(input: SelectorInput): Promise<ValidationResult> {
if (!this.page) {
return { valid: false, error: 'Browser not initialized' };
}
const { selector, attribute = 'innerText' } = input;
try {
const isXPath = selector.startsWith('//') || selector.startsWith('(//');
let element;
if (isXPath) {
element = await this.page.locator(`xpath=${selector}`).first();
} else {
element = await this.page.locator(selector).first();
}
const count = await element.count();
if (count === 0) {
return {
valid: false,
error: `Selector "${selector}" did not match any elements`
};
}
const tagName = await element.evaluate((el) => el.tagName);
const isShadow = await element.evaluate((el) => {
let parent = el.parentNode;
while (parent) {
if (parent instanceof ShadowRoot) {
return true;
}
parent = parent.parentNode;
}
return false;
});
return {
valid: true,
enriched: {
tag: tagName,
isShadow,
selector,
attribute
}
};
} catch (error: any) {
logger.error(`Error validating selector "${selector}":`, error.message);
return {
valid: false,
error: `Invalid selector: ${error.message}`
};
}
}
/**
* Validate and enrich multiple selectors
*/
async validateSchemaFields(
fields: Record<string, string | SelectorInput>
): Promise<{ valid: boolean; enriched?: Record<string, EnrichedSelector>; errors?: string[] }> {
const enriched: Record<string, EnrichedSelector> = {};
const errors: string[] = [];
for (const [fieldName, fieldInput] of Object.entries(fields)) {
const input: SelectorInput = typeof fieldInput === 'string'
? { selector: fieldInput }
: fieldInput;
const result = await this.validateSelector(input);
if (result.valid && result.enriched) {
enriched[fieldName] = result.enriched;
} else {
errors.push(`Field "${fieldName}": ${result.error}`);
}
}
if (errors.length > 0) {
return { valid: false, errors };
}
return { valid: true, enriched };
}
/**
* Validate list selector and fields
*/
async validateListFields(config: {
itemSelector: string;
fields: Record<string, string | SelectorInput>;
}): Promise<{
valid: boolean;
enriched?: {
listSelector: string;
listTag: string;
fields: Record<string, EnrichedSelector>;
};
errors?: string[]
}> {
const errors: string[] = [];
const listResult = await this.validateSelector({
selector: config.itemSelector,
attribute: 'innerText'
});
if (!listResult.valid || !listResult.enriched) {
errors.push(`List selector: ${listResult.error}`);
return { valid: false, errors };
}
const fieldsResult = await this.validateSchemaFields(config.fields);
if (!fieldsResult.valid) {
errors.push(...(fieldsResult.errors || []));
return { valid: false, errors };
}
return {
valid: true,
enriched: {
listSelector: config.itemSelector,
listTag: listResult.enriched.tag,
fields: fieldsResult.enriched!
}
};
}
/**
* Detect input type for a given selector
*/
async detectInputType(selector: string): Promise<string> {
if (!this.page) {
throw new Error('Browser not initialized');
}
try {
const isXPath = selector.startsWith('//') || selector.startsWith('(//');
let element;
if (isXPath) {
element = await this.page.locator(`xpath=${selector}`).first();
} else {
element = await this.page.locator(selector).first();
}
const count = await element.count();
if (count === 0) {
throw new Error(`Selector "${selector}" did not match any elements`);
}
const inputType = await element.evaluate((el) => {
if (el instanceof HTMLInputElement) {
return el.type || 'text';
}
if (el instanceof HTMLTextAreaElement) {
return 'textarea';
}
if (el instanceof HTMLSelectElement) {
return 'select';
}
return 'text';
});
return inputType;
} catch (error: any) {
throw new Error(`Failed to detect input type: ${error.message}`);
}
}
/**
* Auto-detect fields from list selector
*/
async autoDetectListFields(listSelector: string): Promise<{
success: boolean;
fields?: Record<string, any>;
listSelector?: string;
error?: string;
}> {
if (!this.page) {
return { success: false, error: 'Browser not initialized' };
}
try {
const fs = require('fs');
const path = require('path');
const scriptPath = path.join(__dirname, 'browserSide/pageAnalyzer.js');
const scriptContent = fs.readFileSync(scriptPath, 'utf8');
await this.page.evaluate((script) => {
eval(script);
}, scriptContent);
const result = await this.page.evaluate((selector) => {
const win = window as any;
if (typeof win.autoDetectListFields === 'function') {
return win.autoDetectListFields(selector);
} else {
return {
fields: {},
error: 'Auto-detection function not loaded'
};
}
}, listSelector);
// Log debug information
if (result.debug) {
logger.info(`Debug info: ${JSON.stringify(result.debug)}`);
}
if (result.error || !result.fields || Object.keys(result.fields).length === 0) {
return {
success: false,
error: result.error || 'No fields detected from list selector'
};
}
const convertedListSelector = result.listSelector || listSelector;
logger.info(`Auto-detected ${Object.keys(result.fields).length} fields from list`);
return {
success: true,
fields: result.fields,
listSelector: convertedListSelector,
};
} catch (error: any) {
logger.error('Field auto-detection error:', error);
return {
success: false,
error: `Field auto-detection failed: ${error.message}`
};
}
}
/**
* Auto-detect pagination type and selector from list selector
*/
async autoDetectPagination(listSelector: string): Promise<{
success: boolean;
type?: string;
selector?: string | null;
error?: string;
}> {
if (!this.page) {
return { success: false, error: 'Browser not initialized' };
}
try {
const fs = require('fs');
const path = require('path');
const scriptPath = path.join(__dirname, 'browserSide/pageAnalyzer.js');
const scriptContent = fs.readFileSync(scriptPath, 'utf8');
await this.page.evaluate((script) => {
eval(script);
}, scriptContent);
const buttonResult = await this.page.evaluate((selector) => {
const win = window as any;
if (typeof win.autoDetectPagination === 'function') {
const result = win.autoDetectPagination(selector);
return result;
} else {
console.error('autoDetectPagination function not found!');
return {
type: '',
selector: null,
error: 'Pagination auto-detection function not loaded'
};
}
}, listSelector);
if (buttonResult.debug) {
logger.info(`Pagination debug info: ${JSON.stringify(buttonResult.debug)}`);
}
if (buttonResult.error) {
logger.error(`Button detection error: ${buttonResult.error}`);
return {
success: false,
error: buttonResult.error
};
}
if (buttonResult.type && buttonResult.type !== '') {
if (buttonResult.type === 'clickLoadMore' && buttonResult.selector) {
logger.info('Testing Load More button by clicking...');
const loadMoreVerified = await this.testLoadMoreButton(buttonResult.selector, listSelector);
if (!loadMoreVerified) {
logger.warn('Load More button did not load content, trying other detection methods');
} else {
logger.info(`Verified Load More button works`);
return {
success: true,
type: buttonResult.type,
selector: buttonResult.selector
};
}
} else {
logger.info(`Detected pagination type: ${buttonResult.type}${buttonResult.selector ? ` with selector: ${buttonResult.selector}` : ''}`);
return {
success: true,
type: buttonResult.type,
selector: buttonResult.selector
};
}
}
const scrollTestResult = await this.testInfiniteScrollByScrolling(listSelector);
if (scrollTestResult.detected) {
return {
success: true,
type: 'scrollDown',
selector: null
};
}
return {
success: true,
type: '',
selector: null
};
} catch (error: any) {
logger.error('Pagination auto-detection error:', error);
return {
success: false,
error: `Pagination auto-detection failed: ${error.message}`
};
}
}
/**
* Test Load More button by clicking it and checking if content loads
*/
private async testLoadMoreButton(buttonSelector: string, listSelector: string): Promise<boolean> {
if (!this.page) {
return false;
}
try {
const initialState = await this.page.evaluate((selector) => {
function evaluateSelector(sel: string, doc: Document) {
const isXPath = sel.startsWith('//') || sel.startsWith('(//');
if (isXPath) {
const result = doc.evaluate(sel, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
const elements = [];
for (let i = 0; i < result.snapshotLength; i++) {
elements.push(result.snapshotItem(i));
}
return elements;
} else {
return Array.from(doc.querySelectorAll(sel));
}
}
const listElements = evaluateSelector(selector, document);
return {
itemCount: listElements.length,
scrollHeight: document.documentElement.scrollHeight
};
}, listSelector);
try {
const selectors = buttonSelector.split(',').map(s => s.trim());
let clicked = false;
for (const sel of selectors) {
try {
await this.page.click(sel, { timeout: 1000 });
clicked = true;
break;
} catch (e) {
continue;
}
}
if (!clicked) {
return false;
}
await this.page.waitForTimeout(2000);
} catch (clickError: any) {
logger.warn(`Failed to click button: ${clickError.message}`);
return false;
}
const afterClickState = await this.page.evaluate((selector) => {
function evaluateSelector(sel: string, doc: Document) {
const isXPath = sel.startsWith('//') || sel.startsWith('(//');
if (isXPath) {
const result = doc.evaluate(sel, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
const elements = [];
for (let i = 0; i < result.snapshotLength; i++) {
elements.push(result.snapshotItem(i));
}
return elements;
} else {
return Array.from(doc.querySelectorAll(sel));
}
}
const listElements = evaluateSelector(selector, document);
return {
itemCount: listElements.length,
scrollHeight: document.documentElement.scrollHeight
};
}, listSelector);
logger.info(`After click: ${afterClickState.itemCount} items, scrollHeight: ${afterClickState.scrollHeight}`);
const itemsAdded = afterClickState.itemCount > initialState.itemCount;
const heightIncreased = afterClickState.scrollHeight > initialState.scrollHeight + 100;
if (itemsAdded || heightIncreased) {
const details = `Items: ${initialState.itemCount}${afterClickState.itemCount}, Height: ${initialState.scrollHeight}${afterClickState.scrollHeight}`;
logger.info(`Content loaded after click: ${details}`);
return true;
}
logger.info('No content change detected after clicking');
return false;
} catch (error: any) {
logger.error('Error during Load More test:', error.message);
return false;
}
}
/**
* Test for infinite scroll by actually scrolling and checking if content loads
*/
private async testInfiniteScrollByScrolling(listSelector: string): Promise<{
detected: boolean;
details?: string;
}> {
if (!this.page) {
return { detected: false };
}
try {
const initialState = await this.page.evaluate((selector) => {
function evaluateSelector(sel: string, doc: Document) {
const isXPath = sel.startsWith('//') || sel.startsWith('(//');
if (isXPath) {
const result = doc.evaluate(sel, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
const elements = [];
for (let i = 0; i < result.snapshotLength; i++) {
elements.push(result.snapshotItem(i));
}
return elements;
} else {
return Array.from(doc.querySelectorAll(sel));
}
}
const listElements = evaluateSelector(selector, document);
return {
itemCount: listElements.length,
scrollHeight: document.documentElement.scrollHeight,
scrollY: window.scrollY
};
}, listSelector);
logger.info(`Initial state: ${initialState.itemCount} items, scrollHeight: ${initialState.scrollHeight}`);
await this.page.evaluate(() => {
window.scrollTo(0, document.documentElement.scrollHeight);
});
await this.page.waitForTimeout(2000);
const afterScrollState = await this.page.evaluate((selector) => {
function evaluateSelector(sel: string, doc: Document) {
const isXPath = sel.startsWith('//') || sel.startsWith('(//');
if (isXPath) {
const result = doc.evaluate(sel, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
const elements = [];
for (let i = 0; i < result.snapshotLength; i++) {
elements.push(result.snapshotItem(i));
}
return elements;
} else {
return Array.from(doc.querySelectorAll(sel));
}
}
const listElements = evaluateSelector(selector, document);
return {
itemCount: listElements.length,
scrollHeight: document.documentElement.scrollHeight,
scrollY: window.scrollY
};
}, listSelector);
await this.page.evaluate((originalY) => {
window.scrollTo(0, originalY);
}, initialState.scrollY);
const itemsAdded = afterScrollState.itemCount > initialState.itemCount;
const heightIncreased = afterScrollState.scrollHeight > initialState.scrollHeight + 100;
if (itemsAdded || heightIncreased) {
const details = `Items: ${initialState.itemCount}${afterScrollState.itemCount}, Height: ${initialState.scrollHeight}${afterScrollState.scrollHeight}`;
logger.info(`Content changed: ${details}`);
return { detected: true, details };
}
logger.info('No content change detected');
return { detected: false };
} catch (error: any) {
logger.error('Error during scroll test:', error.message);
return { detected: false };
}
}
/**
* Clear page reference
*/
async close(): Promise<void> {
this.page = null;
logger.info('Page reference cleared');
}
}

View File

@@ -0,0 +1,711 @@
/**
* Workflow Enricher
* Converts simplified SDK workflow to full format with validation
*/
import { SelectorValidator } from './selectorValidator';
import { createRemoteBrowserForValidation, destroyRemoteBrowser } from '../browser-management/controller';
import logger from '../logger';
import { v4 as uuid } from 'uuid';
import { encrypt } from '../utils/auth';
import Anthropic from '@anthropic-ai/sdk';
interface SimplifiedAction {
action: string | typeof Symbol.asyncDispose;
args?: any[];
name?: string;
actionId?: string;
}
type RegexableString = string | { $regex: string };
interface SimplifiedWorkflowPair {
where: {
url?: RegexableString;
[key: string]: any;
};
what: SimplifiedAction[];
}
export class WorkflowEnricher {
/**
* Enrich a simplified workflow with full metadata
*/
static async enrichWorkflow(
simplifiedWorkflow: SimplifiedWorkflowPair[],
userId: string = 'sdk-validation-user'
): Promise<{ success: boolean; workflow?: any[]; errors?: string[]; url?: string }> {
const errors: string[] = [];
const enrichedWorkflow: any[] = [];
if (simplifiedWorkflow.length === 0) {
return { success: false, errors: ['Workflow is empty'] };
}
let url: string | undefined;
for (const step of simplifiedWorkflow) {
const rawUrl = step.where.url;
if (rawUrl && rawUrl !== 'about:blank') {
url = typeof rawUrl === 'string' ? rawUrl : rawUrl.$regex;
break;
}
}
if (!url) {
return { success: false, errors: ['No valid URL found in workflow'] };
}
let browserId: string | null = null;
const validator = new SelectorValidator();
try {
logger.info('Creating RemoteBrowser for validation');
const { browserId: id, page } = await createRemoteBrowserForValidation(userId);
browserId = id;
await validator.initialize(page, url);
for (const step of simplifiedWorkflow) {
const enrichedStep: any = {
where: { ...step.where },
what: []
};
const selectors: string[] = [];
for (const action of step.what) {
if (typeof action.action !== 'string') {
continue;
}
if (action.action === 'type') {
if (!action.args || action.args.length < 2) {
errors.push('type action missing selector or value');
continue;
}
const selector = action.args[0];
const value = action.args[1];
const providedInputType = action.args[2];
selectors.push(selector);
const encryptedValue = encrypt(value);
if (!providedInputType) {
try {
const inputType = await validator.detectInputType(selector);
enrichedStep.what.push({
...action,
args: [selector, encryptedValue, inputType]
});
} catch (error: any) {
errors.push(`type action: ${error.message}`);
continue;
}
} else {
enrichedStep.what.push({
...action,
args: [selector, encryptedValue, providedInputType]
});
}
enrichedStep.what.push({
action: 'waitForLoadState',
args: ['networkidle']
});
continue;
}
if (action.action !== 'scrapeSchema' && action.action !== 'scrapeList') {
enrichedStep.what.push(action);
continue;
}
if (action.action === 'scrapeSchema') {
if (!action.args || !action.args[0]) {
errors.push('scrapeSchema action missing fields argument');
continue;
}
const fields = action.args[0];
const result = await validator.validateSchemaFields(fields);
if (!result.valid) {
errors.push(...(result.errors || []));
continue;
}
const enrichedFields: Record<string, any> = {};
for (const [fieldName, enrichedData] of Object.entries(result.enriched!)) {
enrichedFields[fieldName] = {
tag: enrichedData.tag,
isShadow: enrichedData.isShadow,
selector: enrichedData.selector,
attribute: enrichedData.attribute
};
selectors.push(enrichedData.selector);
}
const enrichedAction: any = {
action: 'scrapeSchema',
actionId: `text-${uuid()}`,
args: [enrichedFields]
};
if (action.name) {
enrichedAction.name = action.name;
}
enrichedStep.what.push(enrichedAction);
enrichedStep.what.push({
action: 'waitForLoadState',
args: ['networkidle']
});
} else if (action.action === 'scrapeList') {
if (!action.args || !action.args[0]) {
errors.push('scrapeList action missing config argument');
continue;
}
const config = action.args[0];
let enrichedFields: Record<string, any> = {};
let listSelector: string;
try {
const autoDetectResult = await validator.autoDetectListFields(config.itemSelector);
if (!autoDetectResult.success || !autoDetectResult.fields || Object.keys(autoDetectResult.fields).length === 0) {
errors.push(autoDetectResult.error || 'Failed to auto-detect fields from list selector');
continue;
}
enrichedFields = autoDetectResult.fields;
listSelector = autoDetectResult.listSelector!;
logger.info('Auto-detected', Object.keys(enrichedFields).length, 'fields');
} catch (error: any) {
errors.push(`Field auto-detection failed: ${error.message}`);
continue;
}
let paginationType = 'none';
let paginationSelector = '';
if (config.pagination && config.pagination.type) {
paginationType = config.pagination.type;
paginationSelector = config.pagination.selector || '';
} else {
try {
const paginationResult = await validator.autoDetectPagination(config.itemSelector);
if (paginationResult.success && paginationResult.type) {
paginationType = paginationResult.type;
paginationSelector = paginationResult.selector || '';
}
} catch (error: any) {
logger.warn('Pagination auto-detection failed, using default (none):', error.message);
}
}
const enrichedListAction: any = {
action: 'scrapeList',
actionId: `list-${uuid()}`,
args: [{
fields: enrichedFields,
listSelector: listSelector,
pagination: {
type: paginationType,
selector: paginationSelector
},
limit: config.maxItems || 100
}]
};
if (action.name) {
enrichedListAction.name = action.name;
}
enrichedStep.what.push(enrichedListAction);
enrichedStep.what.push({
action: 'waitForLoadState',
args: ['networkidle']
});
}
}
if (selectors.length > 0) {
enrichedStep.where.selectors = selectors;
}
enrichedWorkflow.push(enrichedStep);
}
await validator.close();
if (browserId) {
await destroyRemoteBrowser(browserId, userId);
logger.info('RemoteBrowser cleaned up successfully');
}
if (errors.length > 0) {
return { success: false, errors };
}
return { success: true, workflow: enrichedWorkflow, url };
} catch (error: any) {
await validator.close();
if (browserId) {
try {
await destroyRemoteBrowser(browserId, userId);
logger.info('RemoteBrowser cleaned up after error');
} catch (cleanupError) {
logger.warn('Failed to cleanup RemoteBrowser:', cleanupError);
}
}
logger.error('Error enriching workflow:', error);
return { success: false, errors: [error.message] };
}
}
/**
* Generate workflow from natural language prompt using LLM with vision
*/
static async generateWorkflowFromPrompt(
url: string,
prompt: string,
llmConfig?: {
provider?: 'anthropic' | 'openai' | 'ollama';
model?: string;
apiKey?: string;
baseUrl?: string;
},
userId: string = 'sdk-validation-user',
): Promise<{ success: boolean; workflow?: any[]; url?: string; errors?: string[] }> {
let browserId: string | null = null;
const validator = new SelectorValidator();
try {
logger.info(`Generating workflow from prompt for URL: ${url}`);
logger.info(`Prompt: ${prompt}`);
logger.info('Creating RemoteBrowser for LLM workflow generation');
const { browserId: id, page } = await createRemoteBrowserForValidation(userId);
browserId = id;
await validator.initialize(page as any, url);
const validatorPage = (validator as any).page;
const screenshotBuffer = await page.screenshot({ fullPage: true, type: 'png' });
const screenshotBase64 = screenshotBuffer.toString('base64');
const elementGroups = await this.analyzePageGroups(validator);
logger.info(`Found ${elementGroups.length} element groups`);
const pageHTML = await validatorPage.content();
const llmDecision = await this.getLLMDecisionWithVision(
prompt,
screenshotBase64,
elementGroups,
pageHTML,
llmConfig
);
logger.info(`LLM decided action type: ${llmDecision.actionType}`);
const workflow = await this.buildWorkflowFromLLMDecision(llmDecision, url, validator);
await validator.close();
if (browserId) {
await destroyRemoteBrowser(browserId, userId);
logger.info('RemoteBrowser cleaned up after LLM workflow generation');
}
return { success: true, workflow, url };
} catch (error: any) {
await validator.close();
if (browserId) {
try {
await destroyRemoteBrowser(browserId, userId);
logger.info('RemoteBrowser cleaned up after LLM generation error');
} catch (cleanupError) {
logger.warn('Failed to cleanup RemoteBrowser:', cleanupError);
}
}
logger.error('Error generating workflow from prompt:', error);
return { success: false, errors: [error.message] };
}
}
/**
* Analyze page groups using browser-side script
*/
private static async analyzePageGroups(validator: SelectorValidator): Promise<any[]> {
try {
const page = (validator as any).page;
const fs = require('fs');
const path = require('path');
const scriptPath = path.join(__dirname, 'browserSide/pageAnalyzer.js');
const scriptContent = fs.readFileSync(scriptPath, 'utf8');
await page.evaluate((script: string) => {
eval(script);
}, scriptContent);
const groups = await page.evaluate(() => {
const win = window as any;
if (typeof win.analyzeElementGroups === 'function') {
return win.analyzeElementGroups();
}
return [];
});
return groups;
} catch (error: any) {
logger.error('Error analyzing page groups:', error);
return [];
}
}
/**
* Use LLM (with or without vision) to decide action and select best element/group
*/
private static async getLLMDecisionWithVision(
prompt: string,
screenshotBase64: string,
elementGroups: any[],
pageHTML: string,
llmConfig?: {
provider?: 'anthropic' | 'openai' | 'ollama';
model?: string;
apiKey?: string;
baseUrl?: string;
}
): Promise<any> {
try {
const provider = llmConfig?.provider || 'ollama';
const axios = require('axios');
const groupsDescription = elementGroups.map((group, index) => {
const sampleText = group.sampleTexts.slice(0, 2).filter((t: string) => t && t.trim().length > 0).join(' | ');
const hasContent = sampleText.length > 0;
const contentPreview = hasContent ? sampleText : '(no text content - likely images/icons)';
return `Group ${index}:
- Tag: ${group.fingerprint.tagName}
- Count: ${group.count} similar elements
- Has text content: ${hasContent ? 'YES' : 'NO'}
- Sample content: ${contentPreview.substring(0, 300)}`;
}).join('\n\n');
const systemPrompt = `You are a request classifier for list extraction. Your job is to:
1. Identify that the user wants to extract a list of items
2. Select the BEST element group that matches what they want
3. Extract any numeric limit from their request
CRITICAL GROUP SELECTION RULES:
- Groups with "Has text content: YES" are usually better than groups with NO text content
- Match the sample content to what the user is asking for
- Avoid groups that only show images/icons (Has text content: NO)
- The group with the most relevant sample content should be selected, NOT just the first group
- Analyze the keywords in the user's request and find the group whose sample content contains related text
LIMIT EXTRACTION:
- Look for numbers in the request that indicate quantity (e.g., "50", "25", "100", "first 30", "top 10")
- If no limit specified, use null
Must return valid JSON: {"actionType": "captureList", "reasoning": "...", "selectedGroupIndex": NUMBER, "limit": NUMBER_OR_NULL}`;
const userPrompt = `User's request: "${prompt}"
Available element groups on page:
${groupsDescription}
TASK:
1. Identify the key terms from the user's request
2. Look through ALL the groups above
3. Find the group whose "Sample content" best matches the key terms from the request
4. Prefer groups with "Has text content: YES" over "NO"
5. Extract any numeric limit from the request if present
Return JSON:
{
"actionType": "captureList",
"reasoning": "Brief explanation of why this group was selected",
"selectedGroupIndex": INDEX_NUMBER,
"limit": NUMBER_OR_NULL
}
Note: selectedGroupIndex must be between 0 and ${elementGroups.length - 1}`;
let llmResponse: string;
if (provider === 'ollama') {
const ollamaBaseUrl = llmConfig?.baseUrl || process.env.OLLAMA_BASE_URL || 'http://localhost:11434';
const ollamaModel = llmConfig?.model || 'llama3.2-vision';
const jsonSchema = {
type: 'object',
required: ['actionType', 'reasoning', 'selectedGroupIndex'],
properties: {
actionType: {
type: 'string',
enum: ['captureList']
},
reasoning: {
type: 'string'
},
selectedGroupIndex: {
type: 'integer'
},
limit: {
type: ['integer', 'null']
}
}
};
const response = await axios.post(`${ollamaBaseUrl}/api/chat`, {
model: ollamaModel,
messages: [
{
role: 'system',
content: systemPrompt
},
{
role: 'user',
content: userPrompt,
images: [screenshotBase64]
}
],
stream: false,
format: jsonSchema,
options: {
temperature: 0.1
}
});
llmResponse = response.data.message.content;
} else if (provider === 'anthropic') {
const anthropic = new Anthropic({
apiKey: llmConfig?.apiKey || process.env.ANTHROPIC_API_KEY
});
const anthropicModel = llmConfig?.model || 'claude-3-5-sonnet-20241022';
const response = await anthropic.messages.create({
model: anthropicModel,
max_tokens: 1024,
messages: [{
role: 'user',
content: [
{
type: 'image',
source: {
type: 'base64',
media_type: 'image/png',
data: screenshotBase64
}
},
{
type: 'text',
text: userPrompt
}
]
}],
system: systemPrompt
});
const textContent = response.content.find((c: any) => c.type === 'text');
llmResponse = textContent?.type === 'text' ? textContent.text : '';
} else if (provider === 'openai') {
const openaiBaseUrl = llmConfig?.baseUrl || 'https://api.openai.com/v1';
const openaiModel = llmConfig?.model || 'gpt-4-vision-preview';
const response = await axios.post(`${openaiBaseUrl}/chat/completions`, {
model: openaiModel,
messages: [
{
role: 'system',
content: systemPrompt
},
{
role: 'user',
content: [
{
type: 'text',
text: userPrompt
},
{
type: 'image_url',
image_url: {
url: `data:image/png;base64,${screenshotBase64}`
}
}
]
}
],
max_tokens: 1024,
temperature: 0.1
}, {
headers: {
'Authorization': `Bearer ${llmConfig?.apiKey || process.env.OPENAI_API_KEY}`,
'Content-Type': 'application/json'
}
});
llmResponse = response.data.choices[0].message.content;
} else {
throw new Error(`Unsupported LLM provider: ${provider}`);
}
logger.info(`LLM Response: ${llmResponse}`);
let jsonStr = llmResponse.trim();
const jsonMatch = jsonStr.match(/```json\s*([\s\S]*?)\s*```/) || jsonStr.match(/```\s*([\s\S]*?)\s*```/);
if (jsonMatch) {
jsonStr = jsonMatch[1].trim();
}
const objectMatch = jsonStr.match(/\{[\s\S]*"actionType"[\s\S]*\}/);
if (objectMatch) {
jsonStr = objectMatch[0];
}
const decision = JSON.parse(jsonStr);
if (!decision.actionType || decision.actionType !== 'captureList') {
throw new Error('LLM response must have actionType: "captureList"');
}
if (decision.selectedGroupIndex === undefined || decision.selectedGroupIndex < 0 || decision.selectedGroupIndex >= elementGroups.length) {
throw new Error(`Invalid selectedGroupIndex: ${decision.selectedGroupIndex}. Must be between 0 and ${elementGroups.length - 1}`);
}
const selectedGroup = elementGroups[decision.selectedGroupIndex];
return {
actionType: 'captureList',
selectedGroup,
itemSelector: selectedGroup.xpath,
reasoning: decision.reasoning,
limit: decision.limit || null
};
} catch (error: any) {
logger.error('LLM decision error:', error);
return this.fallbackHeuristicDecision(prompt, elementGroups);
}
}
/**
* Fallback heuristic decision when LLM fails
*/
private static fallbackHeuristicDecision(prompt: string, elementGroups: any[]): any {
const promptLower = prompt.toLowerCase();
if (elementGroups.length === 0) {
throw new Error('No element groups found on page for list extraction');
}
const scoredGroups = elementGroups.map((group, index) => {
let score = 0;
for (const sampleText of group.sampleTexts) {
const keywords = promptLower.split(' ').filter((w: string) => w.length > 3);
for (const keyword of keywords) {
if (sampleText.toLowerCase().includes(keyword)) score += 2;
}
}
score += Math.min(group.count / 10, 5);
return { group, score, index };
});
scoredGroups.sort((a, b) => b.score - a.score);
const best = scoredGroups[0];
return {
actionType: 'captureList',
selectedGroup: best.group,
itemSelector: best.group.xpath
};
}
/**
* Build workflow from LLM decision
*/
private static async buildWorkflowFromLLMDecision(
llmDecision: any,
url: string,
validator: SelectorValidator
): Promise<any[]> {
const workflow: any[] = [];
workflow.push({
where: { url, selectors: [] },
what: [
{ action: 'goto', args: [url] },
{ action: 'waitForLoadState', args: ['networkidle'] }
]
});
if (llmDecision.actionType === 'captureList') {
logger.info(`Auto-detecting fields for: ${llmDecision.itemSelector}`);
const autoDetectResult = await validator.autoDetectListFields(llmDecision.itemSelector);
if (!autoDetectResult.success || !autoDetectResult.fields || Object.keys(autoDetectResult.fields).length === 0) {
throw new Error('Failed to auto-detect fields from selected group');
}
logger.info(`Auto-detected ${Object.keys(autoDetectResult.fields).length} fields`);
let paginationType = 'none';
let paginationSelector = '';
try {
const paginationResult = await validator.autoDetectPagination(llmDecision.itemSelector);
if (paginationResult.success && paginationResult.type) {
paginationType = paginationResult.type;
paginationSelector = paginationResult.selector || '';
}
} catch (error: any) {
logger.warn('Pagination auto-detection failed:', error.message);
}
const limit = llmDecision.limit || 100;
logger.info(`Using limit: ${limit}`);
workflow[0].what.push({
action: 'scrapeList',
actionId: `list-${uuid()}`,
name: 'List 1',
args: [{
fields: autoDetectResult.fields,
listSelector: autoDetectResult.listSelector,
pagination: {
type: paginationType,
selector: paginationSelector
},
limit: limit
}]
});
workflow[0].what.push({
action: 'waitForLoadState',
args: ['networkidle']
});
} else {
throw new Error(`Unsupported action type: ${llmDecision.actionType}. Only captureList is supported.`);
}
return workflow;
}
}

View File

@@ -13,7 +13,7 @@ import { pgBossClient } from './pgboss';
* @param cronExpression The cron expression for scheduling
* @param timezone The timezone for the cron expression
*/
export async function scheduleWorkflow(id: string, userId: string, cronExpression: string, timezone: string): Promise<void> {
export async function scheduleWorkflow(id: string, cronExpression: string, timezone: string): Promise<void> {
try {
const runId = uuid();
@@ -24,7 +24,7 @@ export async function scheduleWorkflow(id: string, userId: string, cronExpressio
await pgBossClient.createQueue(queueName);
await pgBossClient.schedule(queueName, cronExpression,
{ id, runId, userId },
{ id, runId },
{ tz: timezone }
);