feat: add sdk selector validation, pagination detection

2025-12-07 22:13:34 +05:30
parent 63c9d53272
commit d64cc4dec3
6 changed files with 4545 additions and 3 deletions
--- a/server/src/api/sdk.ts
+++ b/server/src/api/sdk.ts
@@ -0,0 +1,715 @@
+/**
+ * SDK API Routes
+ * Separate API endpoints specifically for Maxun SDKs
+ * All routes require API key authentication
+ */
+
+import { Router, Request, Response } from 'express';
+import { requireAPIKey } from "../middlewares/api";
+import Robot from "../models/Robot";
+import Run from "../models/Run";
+import { v4 as uuid } from 'uuid';
+import { WorkflowFile } from "maxun-core";
+import logger from "../logger";
+import { capture } from "../utils/analytics";
+import { handleRunRecording } from "./record";
+import { WorkflowEnricher } from "../sdk/workflowEnricher";
+import { cancelScheduledWorkflow, scheduleWorkflow } from '../storage/schedule';
+import { computeNextRun } from "../utils/schedule";
+import moment from 'moment-timezone';
+
+const router = Router();
+
+interface AuthenticatedRequest extends Request {
+    user?: any;
+}
+
+/**
+ * Create a new robot programmatically
+ * POST /api/sdk/robots
+ */
+router.post("/sdk/robots", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
+    try {
+        const user = req.user;
+        const workflowFile: WorkflowFile = req.body;
+
+        if (!workflowFile.meta || !workflowFile.workflow) {
+            return res.status(400).json({
+                error: "Invalid workflow structure. Expected { meta, workflow }"
+            });
+        }
+
+        if (!workflowFile.meta.name) {
+            return res.status(400).json({
+                error: "Robot name is required in meta.name"
+            });
+        }
+
+        const type = (workflowFile.meta as any).type || 'extract';
+
+        let enrichedWorkflow: any[] = [];
+        let extractedUrl: string | undefined;
+
+        if (type === 'scrape') {
+            enrichedWorkflow = [];
+            extractedUrl = (workflowFile.meta as any).url;
+
+            if (!extractedUrl) {
+                return res.status(400).json({
+                    error: "URL is required for scrape robots"
+                });
+            }
+        } else {
+            const enrichResult = await WorkflowEnricher.enrichWorkflow(workflowFile.workflow);
+
+            if (!enrichResult.success) {
+                return res.status(400).json({
+                    error: "Workflow validation failed",
+                    details: enrichResult.errors
+                });
+            }
+
+            enrichedWorkflow = enrichResult.workflow!;
+            extractedUrl = enrichResult.url;
+        }
+
+        const robotId = uuid();
+        const metaId = uuid();
+
+        const robotMeta: any = {
+            name: workflowFile.meta.name,
+            id: metaId,
+            createdAt: new Date().toISOString(),
+            updatedAt: new Date().toISOString(),
+            pairs: enrichedWorkflow.length,
+            params: [],
+            type,
+            url: extractedUrl,
+            formats: (workflowFile.meta as any).formats || [],
+        };
+
+        const robot = await Robot.create({
+            id: robotId,
+            userId: user.id,
+            recording_meta: robotMeta,
+            recording: {
+                workflow: enrichedWorkflow
+            }
+        });
+
+        capture("maxun-oss-robot-created", {
+            robot_meta: robot.recording_meta,
+            recording: robot.recording,
+        });
+
+        return res.status(201).json({
+            data: robot,
+            message: "Robot created successfully"
+        });
+
+    } catch (error: any) {
+        logger.error("[SDK] Error creating robot:", error);
+        return res.status(500).json({
+            error: "Failed to create robot",
+            message: error.message
+        });
+    }
+});
+
+/**
+ * List all robots for the authenticated user
+ * GET /api/sdk/robots
+ */
+router.get("/sdk/robots", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
+    try {
+        const robots = await Robot.findAll();
+
+        return res.status(200).json({
+            data: robots
+        });
+    } catch (error: any) {
+        logger.error("[SDK] Error listing robots:", error);
+        return res.status(500).json({
+            error: "Failed to list robots",
+            message: error.message
+        });
+    }
+});
+
+/**
+ * Get a specific robot by ID
+ * GET /api/sdk/robots/:id
+ */
+router.get("/sdk/robots/:id", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
+    try {
+        const robotId = req.params.id;
+
+        const robot = await Robot.findOne({
+            where: {
+                'recording_meta.id': robotId
+            }
+        });
+
+        if (!robot) {
+            return res.status(404).json({
+                error: "Robot not found"
+            });
+        }
+
+        return res.status(200).json({
+            data: robot
+        });
+    } catch (error: any) {
+        logger.error("[SDK] Error getting robot:", error);
+        return res.status(500).json({
+            error: "Failed to get robot",
+            message: error.message
+        });
+    }
+});
+
+/**
+ * Update a robot
+ * PUT /api/sdk/robots/:id
+ */
+router.put("/sdk/robots/:id", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
+    try {
+        const robotId = req.params.id;
+        const updates = req.body;
+
+        const robot = await Robot.findOne({
+            where: {
+                'recording_meta.id': robotId
+            }
+        });
+
+        if (!robot) {
+            return res.status(404).json({
+                error: "Robot not found"
+            });
+        }
+
+        const updateData: any = {};
+
+        if (updates.workflow) {
+            updateData.recording = {
+                workflow: updates.workflow
+            };
+        }
+
+        if (updates.meta) {
+            updateData.recording_meta = {
+                ...robot.recording_meta,
+                ...updates.meta,
+                updatedAt: new Date().toISOString()
+            };
+        }
+
+        if (updates.google_sheet_email !== undefined) {
+            updateData.google_sheet_email = updates.google_sheet_email;
+        }
+        if (updates.google_sheet_name !== undefined) {
+            updateData.google_sheet_name = updates.google_sheet_name;
+        }
+        if (updates.airtable_base_id !== undefined) {
+            updateData.airtable_base_id = updates.airtable_base_id;
+        }
+        if (updates.airtable_table_name !== undefined) {
+            updateData.airtable_table_name = updates.airtable_table_name;
+        }
+
+        if (updates.schedule !== undefined) {
+            if (updates.schedule === null) {
+                try {
+                    await cancelScheduledWorkflow(robotId);
+                } catch (cancelError) {
+                    logger.warn(`[SDK] Failed to cancel existing schedule for robot ${robotId}: ${cancelError}`);
+                }
+                updateData.schedule = null;
+            } else {
+                const {
+                    runEvery,
+                    runEveryUnit,
+                    timezone,
+                    startFrom = 'SUNDAY',
+                    dayOfMonth = 1,
+                    atTimeStart = '00:00',
+                    atTimeEnd = '23:59'
+                } = updates.schedule;
+
+                if (!runEvery || !runEveryUnit || !timezone) {
+                    return res.status(400).json({
+                        error: "Missing required schedule parameters: runEvery, runEveryUnit, timezone"
+                    });
+                }
+
+                if (!moment.tz.zone(timezone)) {
+                    return res.status(400).json({
+                        error: "Invalid timezone"
+                    });
+                }
+
+                const [startHours, startMinutes] = atTimeStart.split(':').map(Number);
+                const [endHours, endMinutes] = atTimeEnd.split(':').map(Number);
+
+                if (isNaN(startHours) || isNaN(startMinutes) || isNaN(endHours) || isNaN(endMinutes) ||
+                    startHours < 0 || startHours > 23 || startMinutes < 0 || startMinutes > 59 ||
+                    endHours < 0 || endHours > 23 || endMinutes < 0 || endMinutes > 59) {
+                    return res.status(400).json({ error: 'Invalid time format. Expected HH:MM (e.g., 09:30)' });
+                }
+
+                const days = ['SUNDAY', 'MONDAY', 'TUESDAY', 'WEDNESDAY', 'THURSDAY', 'FRIDAY', 'SATURDAY'];
+                if (!days.includes(startFrom)) {
+                    return res.status(400).json({ error: 'Invalid startFrom day. Must be one of: SUNDAY, MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY' });
+                }
+
+                let cronExpression;
+                const dayIndex = days.indexOf(startFrom);
+
+                switch (runEveryUnit) {
+                    case 'MINUTES':
+                        cronExpression = `*/${runEvery} * * * *`;
+                        break;
+                    case 'HOURS':
+                        cronExpression = `${startMinutes} */${runEvery} * * *`;
+                        break;
+                    case 'DAYS':
+                        cronExpression = `${startMinutes} ${startHours} */${runEvery} * *`;
+                        break;
+                    case 'WEEKS':
+                        cronExpression = `${startMinutes} ${startHours} * * ${dayIndex}`;
+                        break;
+                    case 'MONTHS':
+                        cronExpression = `${startMinutes} ${startHours} ${dayOfMonth} */${runEvery} *`;
+                        if (startFrom !== 'SUNDAY') {
+                            cronExpression += ` ${dayIndex}`;
+                        }
+                        break;
+                    default:
+                        return res.status(400).json({
+                            error: "Invalid runEveryUnit. Must be one of: MINUTES, HOURS, DAYS, WEEKS, MONTHS"
+                        });
+                }
+
+                try {
+                    await cancelScheduledWorkflow(robotId);
+                } catch (cancelError) {
+                    logger.warn(`[SDK] Failed to cancel existing schedule for robot ${robotId}: ${cancelError}`);
+                }
+
+                try {
+                    await scheduleWorkflow(robotId, cronExpression, timezone);
+                } catch (scheduleError: any) {
+                    logger.error(`[SDK] Failed to schedule workflow for robot ${robotId}: ${scheduleError.message}`);
+                    return res.status(500).json({
+                        error: "Failed to schedule workflow",
+                        message: scheduleError.message
+                    });
+                }
+
+                const nextRunAt = computeNextRun(cronExpression, timezone);
+
+                updateData.schedule = {
+                    runEvery,
+                    runEveryUnit,
+                    timezone,
+                    startFrom,
+                    dayOfMonth,
+                    atTimeStart,
+                    atTimeEnd,
+                    cronExpression,
+                    lastRunAt: undefined,
+                    nextRunAt: nextRunAt || undefined,
+                };
+
+                logger.info(`[SDK] Scheduled robot ${robotId} with cron: ${cronExpression} in timezone: ${timezone}`);
+            }
+        }
+
+        if (updates.webhooks !== undefined) {
+            updateData.webhooks = updates.webhooks;
+        }
+
+        if (updates.proxy_url !== undefined) {
+            updateData.proxy_url = updates.proxy_url;
+        }
+        if (updates.proxy_username !== undefined) {
+            updateData.proxy_username = updates.proxy_username;
+        }
+        if (updates.proxy_password !== undefined) {
+            updateData.proxy_password = updates.proxy_password;
+        }
+
+        await robot.update(updateData);
+
+        logger.info(`[SDK] Robot updated: ${robotId}`);
+
+        return res.status(200).json({
+            data: robot,
+            message: "Robot updated successfully"
+        });
+    } catch (error: any) {
+        logger.error("[SDK] Error updating robot:", error);
+        return res.status(500).json({
+            error: "Failed to update robot",
+            message: error.message
+        });
+    }
+});
+
+/**
+ * Delete a robot
+ * DELETE /api/sdk/robots/:id
+ */
+router.delete("/sdk/robots/:id", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
+    try {
+        const robotId = req.params.id;
+
+        const robot = await Robot.findOne({
+            where: {
+                'recording_meta.id': robotId
+            }
+        });
+
+        if (!robot) {
+            return res.status(404).json({
+                error: "Robot not found"
+            });
+        }
+
+        await Run.destroy({
+            where: {
+                robotMetaId: robot.recording_meta.id
+            }
+        });
+
+        await robot.destroy();
+
+        logger.info(`[SDK] Robot deleted: ${robotId}`);
+
+        capture(
+            'maxun-oss-robot-deleted',
+            {
+                robotId: robotId,
+                user_id: req.user?.id,
+                deleted_at: new Date().toISOString(),
+            }
+        )
+
+        return res.status(200).json({
+            message: "Robot deleted successfully"
+        });
+    } catch (error: any) {
+        logger.error("[SDK] Error deleting robot:", error);
+        return res.status(500).json({
+            error: "Failed to delete robot",
+            message: error.message
+        });
+    }
+});
+
+/**
+ * Execute a robot
+ * POST /api/sdk/robots/:id/execute
+ */
+router.post("/sdk/robots/:id/execute", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
+    try {
+        const user = req.user;
+        const robotId = req.params.id;
+
+        logger.info(`[SDK] Starting execution for robot ${robotId}`);
+
+        const runId = await handleRunRecording(robotId, user.id.toString());
+        if (!runId) {
+            throw new Error('Failed to start robot execution');
+        }
+
+        const run = await waitForRunCompletion(runId, user.id.toString());
+
+        let listData: any[] = [];
+        if (run.serializableOutput?.scrapeList) {
+            const scrapeList: any = run.serializableOutput.scrapeList;
+            
+            if (scrapeList.scrapeList && Array.isArray(scrapeList.scrapeList)) {
+                listData = scrapeList.scrapeList;
+            }
+            else if (Array.isArray(scrapeList)) {
+                listData = scrapeList;
+            }
+            else if (typeof scrapeList === 'object') {
+                const listValues = Object.values(scrapeList);
+                if (listValues.length > 0 && Array.isArray(listValues[0])) {
+                    listData = listValues[0] as any[];
+                }
+            }
+        }
+
+        return res.status(200).json({
+            data: {
+                runId: run.runId,
+                status: run.status,
+                data: {
+                    textData: run.serializableOutput?.scrapeSchema || {},
+                    listData: listData
+                },
+                screenshots: Object.values(run.binaryOutput || {})
+            }
+        });
+    } catch (error: any) {
+        logger.error("[SDK] Error executing robot:", error);
+        return res.status(500).json({
+            error: "Failed to execute robot",
+            message: error.message
+        });
+    }
+});
+
+/**
+ * Wait for run completion
+ */
+async function waitForRunCompletion(runId: string, interval: number = 2000) {
+    const MAX_WAIT_TIME = 180 * 60 * 1000;
+    const startTime = Date.now();
+
+    while (true) {
+        if (Date.now() - startTime > MAX_WAIT_TIME) {
+            throw new Error('Run completion timeout after 3 hours');
+        }
+
+        const run = await Run.findOne({ where: { runId } });
+        if (!run) throw new Error('Run not found');
+
+        if (run.status === 'success') {
+            return run.toJSON();
+        } else if (run.status === 'failed') {
+            throw new Error('Run failed');
+        } else if (run.status === 'aborted') {
+            throw new Error('Run was aborted');
+        }
+
+        await new Promise(resolve => setTimeout(resolve, interval));
+    }
+}
+
+/**
+ * Get all runs for a robot
+ * GET /api/sdk/robots/:id/runs
+ */
+router.get("/sdk/robots/:id/runs", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
+    try {
+        const robotId = req.params.id;
+
+        const robot = await Robot.findOne({
+            where: {
+                'recording_meta.id': robotId
+            }
+        });
+
+        if (!robot) {
+            return res.status(404).json({
+                error: "Robot not found"
+            });
+        }
+
+        const runs = await Run.findAll({
+            where: {
+                robotMetaId: robot.recording_meta.id
+            },
+            order: [['startedAt', 'DESC']]
+        });
+
+        return res.status(200).json({
+            data: runs
+        });
+    } catch (error: any) {
+        logger.error("[SDK] Error getting runs:", error);
+        return res.status(500).json({
+            error: "Failed to get runs",
+            message: error.message
+        });
+    }
+});
+
+/**
+ * Get a specific run
+ * GET /api/sdk/robots/:id/runs/:runId
+ */
+router.get("/sdk/robots/:id/runs/:runId", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
+    try {
+        const robotId = req.params.id;
+        const runId = req.params.runId;
+
+        const robot = await Robot.findOne({
+            where: {
+                'recording_meta.id': robotId
+            }
+        });
+
+        if (!robot) {
+            return res.status(404).json({
+                error: "Robot not found"
+            });
+        }
+
+        const run = await Run.findOne({
+            where: {
+                runId: runId,
+                robotMetaId: robot.recording_meta.id
+            }
+        });
+
+        if (!run) {
+            return res.status(404).json({
+                error: "Run not found"
+            });
+        }
+
+        return res.status(200).json({
+            data: run
+        });
+    } catch (error: any) {
+        logger.error("[SDK] Error getting run:", error);
+        return res.status(500).json({
+            error: "Failed to get run",
+            message: error.message
+        });
+    }
+});
+
+/**
+ * Abort a running execution
+ * POST /api/sdk/robots/:id/runs/:runId/abort
+ */
+router.post("/sdk/robots/:id/runs/:runId/abort", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
+    try {
+        const robotId = req.params.id;
+        const runId = req.params.runId;
+
+        const robot = await Robot.findOne({
+            where: {
+                'recording_meta.id': robotId
+            }
+        });
+
+        if (!robot) {
+            return res.status(404).json({
+                error: "Robot not found"
+            });
+        }
+
+        const run = await Run.findOne({
+            where: {
+                runId: runId,
+                robotMetaId: robot.recording_meta.id
+            }
+        });
+
+        if (!run) {
+            return res.status(404).json({
+                error: "Run not found"
+            });
+        }
+
+        if (run.status !== 'running' && run.status !== 'queued') {
+            return res.status(400).json({
+                error: "Run is not in a state that can be aborted",
+                currentStatus: run.status
+            });
+        }
+
+        await run.update({ status: 'aborted' });
+
+        logger.info(`[SDK] Run ${runId} marked for abortion`);
+
+        return res.status(200).json({
+            message: "Run abortion initiated",
+            data: run
+        });
+    } catch (error: any) {
+        logger.error("[SDK] Error aborting run:", error);
+        return res.status(500).json({
+            error: "Failed to abort run",
+            message: error.message
+        });
+    }
+});
+
+/**
+ * LLM-based extraction - generate workflow from natural language prompt
+ * POST /api/sdk/extract/llm
+ */
+router.post("/sdk/extract/llm", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
+    try {
+        const user = req.user.id
+        const { url, prompt, llmProvider, llmModel, llmApiKey, llmBaseUrl, robotName } = req.body;
+
+        if (!url || !prompt) {
+            return res.status(400).json({
+                error: "URL and prompt are required"
+            });
+        }
+
+        const workflowResult = await WorkflowEnricher.generateWorkflowFromPrompt(url, prompt, {
+            provider: llmProvider,
+            model: llmModel,
+            apiKey: llmApiKey,
+            baseUrl: llmBaseUrl
+        });
+
+        if (!workflowResult.success || !workflowResult.workflow) {
+            return res.status(400).json({
+                error: "Failed to generate workflow from prompt",
+                details: workflowResult.errors
+            });
+        }
+
+        const robotId = uuid();
+        const metaId = uuid();
+
+        const robotMeta: any = {
+            name: robotName || `LLM Extract: ${prompt.substring(0, 50)}`,
+            id: metaId,
+            createdAt: new Date().toISOString(),
+            updatedAt: new Date().toISOString(),
+            pairs: workflowResult.workflow.length,
+            params: [],
+            type: 'extract',
+            url: workflowResult.url,
+        };
+
+        const robot = await Robot.create({
+            id: robotId,
+            userId: user.id,
+            recording_meta: robotMeta,
+            recording: {
+                workflow: workflowResult.workflow
+            },
+        });
+
+        logger.info(`[SDK] Persistent robot created: ${metaId} for LLM extraction`);
+
+        capture("maxun-oss-robot-created", {
+            robot_meta: robot.recording_meta,
+            recording: robot.recording,
+        });
+
+        return res.status(200).json({
+            success: true,
+            data: {
+                robotId: metaId,
+                name: robotMeta.name,
+                description: prompt,
+                url: workflowResult.url,
+                workflow: workflowResult.workflow
+            }
+        });
+    } catch (error: any) {
+        logger.error("[SDK] Error in LLM extraction:", error);
+        return res.status(500).json({
+            error: "Failed to perform LLM extraction",
+            message: error.message
+        });
+    }
+});
+
+export default router;
--- a/server/src/routes/storage.ts
+++ b/server/src/routes/storage.ts
@@ -894,7 +894,7 @@ router.put('/schedule/:id/', requireSignIn, async (req: AuthenticatedRequest, re
      logger.log('warn', `Failed to cancel existing schedule for robot ${id}: ${cancelError}`);
    }

-    const jobId = await scheduleWorkflow(id, req.user.id, cronExpression, timezone);
+    await scheduleWorkflow(id, cronExpression, timezone);

    const nextRunAt = computeNextRun(cronExpression, timezone);

--- a/server/src/sdk/browserSide/pageAnalyzer.js
+++ b/server/src/sdk/browserSide/pageAnalyzer.js
--- a/server/src/sdk/selectorValidator.ts
+++ b/server/src/sdk/selectorValidator.ts
@@ -0,0 +1,576 @@
+/**
+ * Selector Validator
+ * Validates and enriches selectors with metadata using Playwright page instance
+ */
+
+import { Page } from 'playwright-core';
+import logger from '../logger';
+
+interface SelectorInput {
+  selector: string;
+  attribute?: string;
+}
+
+interface EnrichedSelector {
+  tag: string;
+  isShadow: boolean;
+  selector: string;
+  attribute: string;
+}
+
+interface ValidationResult {
+  valid: boolean;
+  enriched?: EnrichedSelector;
+  error?: string;
+}
+
+export class SelectorValidator {
+  private page: Page | null = null;
+
+  /**
+   * Initialize with an existing Page instance and navigate to URL
+   * @param page Page instance from RemoteBrowser
+   * @param url URL to navigate to
+   */
+  async initialize(page: Page, url: string): Promise<void> {
+    this.page = page;
+    await this.page.goto(url, { waitUntil: 'networkidle', timeout: 30000 });
+    logger.info(`Navigated to ${url} using RemoteBrowser page`);
+  }
+
+  /**
+   * Validate and enrich a single selector
+   */
+  async validateSelector(input: SelectorInput): Promise<ValidationResult> {
+    if (!this.page) {
+      return { valid: false, error: 'Browser not initialized' };
+    }
+
+    const { selector, attribute = 'innerText' } = input;
+
+    try {
+      const isXPath = selector.startsWith('//') || selector.startsWith('(//');
+
+      let element;
+      if (isXPath) {
+        element = await this.page.locator(`xpath=${selector}`).first();
+      } else {
+        element = await this.page.locator(selector).first();
+      }
+
+      const count = await element.count();
+      if (count === 0) {
+        return {
+          valid: false,
+          error: `Selector "${selector}" did not match any elements`
+        };
+      }
+
+      const tagName = await element.evaluate((el) => el.tagName);
+
+      const isShadow = await element.evaluate((el) => {
+        let parent = el.parentNode;
+        while (parent) {
+          if (parent instanceof ShadowRoot) {
+            return true;
+          }
+          parent = parent.parentNode;
+        }
+        return false;
+      });
+
+      return {
+        valid: true,
+        enriched: {
+          tag: tagName,
+          isShadow,
+          selector,
+          attribute
+        }
+      };
+    } catch (error: any) {
+      logger.error(`Error validating selector "${selector}":`, error.message);
+      return {
+        valid: false,
+        error: `Invalid selector: ${error.message}`
+      };
+    }
+  }
+
+  /**
+   * Validate and enrich multiple selectors
+   */
+  async validateSchemaFields(
+    fields: Record<string, string | SelectorInput>
+  ): Promise<{ valid: boolean; enriched?: Record<string, EnrichedSelector>; errors?: string[] }> {
+    const enriched: Record<string, EnrichedSelector> = {};
+    const errors: string[] = [];
+
+    for (const [fieldName, fieldInput] of Object.entries(fields)) {
+      const input: SelectorInput = typeof fieldInput === 'string'
+        ? { selector: fieldInput }
+        : fieldInput;
+
+      const result = await this.validateSelector(input);
+
+      if (result.valid && result.enriched) {
+        enriched[fieldName] = result.enriched;
+      } else {
+        errors.push(`Field "${fieldName}": ${result.error}`);
+      }
+    }
+
+    if (errors.length > 0) {
+      return { valid: false, errors };
+    }
+
+    return { valid: true, enriched };
+  }
+
+  /**
+   * Validate list selector and fields
+   */
+  async validateListFields(config: {
+    itemSelector: string;
+    fields: Record<string, string | SelectorInput>;
+  }): Promise<{
+    valid: boolean;
+    enriched?: {
+      listSelector: string;
+      listTag: string;
+      fields: Record<string, EnrichedSelector>;
+    };
+    errors?: string[]
+  }> {
+    const errors: string[] = [];
+
+    const listResult = await this.validateSelector({
+      selector: config.itemSelector,
+      attribute: 'innerText'
+    });
+
+    if (!listResult.valid || !listResult.enriched) {
+      errors.push(`List selector: ${listResult.error}`);
+      return { valid: false, errors };
+    }
+
+    const fieldsResult = await this.validateSchemaFields(config.fields);
+
+    if (!fieldsResult.valid) {
+      errors.push(...(fieldsResult.errors || []));
+      return { valid: false, errors };
+    }
+
+    return {
+      valid: true,
+      enriched: {
+        listSelector: config.itemSelector,
+        listTag: listResult.enriched.tag,
+        fields: fieldsResult.enriched!
+      }
+    };
+  }
+
+  /**
+   * Detect input type for a given selector
+   */
+  async detectInputType(selector: string): Promise<string> {
+    if (!this.page) {
+      throw new Error('Browser not initialized');
+    }
+
+    try {
+      const isXPath = selector.startsWith('//') || selector.startsWith('(//');
+
+      let element;
+      if (isXPath) {
+        element = await this.page.locator(`xpath=${selector}`).first();
+      } else {
+        element = await this.page.locator(selector).first();
+      }
+
+      const count = await element.count();
+      if (count === 0) {
+        throw new Error(`Selector "${selector}" did not match any elements`);
+      }
+
+      const inputType = await element.evaluate((el) => {
+        if (el instanceof HTMLInputElement) {
+          return el.type || 'text';
+        }
+        if (el instanceof HTMLTextAreaElement) {
+          return 'textarea';
+        }
+        if (el instanceof HTMLSelectElement) {
+          return 'select';
+        }
+        return 'text';
+      });
+
+      return inputType;
+    } catch (error: any) {
+      throw new Error(`Failed to detect input type: ${error.message}`);
+    }
+  }
+
+  /**
+   * Auto-detect fields from list selector
+   */
+  async autoDetectListFields(listSelector: string): Promise<{
+    success: boolean;
+    fields?: Record<string, any>;
+    listSelector?: string;
+    error?: string;
+  }> {
+    if (!this.page) {
+      return { success: false, error: 'Browser not initialized' };
+    }
+
+    try {
+      const fs = require('fs');
+      const path = require('path');
+      const scriptPath = path.join(__dirname, 'browserSide/pageAnalyzer.js');
+      const scriptContent = fs.readFileSync(scriptPath, 'utf8');
+
+      await this.page.evaluate((script) => {
+        eval(script);
+      }, scriptContent);
+
+      const result = await this.page.evaluate((selector) => {
+        const win = window as any;
+        if (typeof win.autoDetectListFields === 'function') {
+          return win.autoDetectListFields(selector);
+        } else {
+          return {
+            fields: {},
+            error: 'Auto-detection function not loaded'
+          };
+        }
+      }, listSelector);
+
+      // Log debug information
+      if (result.debug) {
+        logger.info(`Debug info: ${JSON.stringify(result.debug)}`);
+      }
+
+      if (result.error || !result.fields || Object.keys(result.fields).length === 0) {
+        return {
+          success: false,
+          error: result.error || 'No fields detected from list selector'
+        };
+      }
+
+      const convertedListSelector = result.listSelector || listSelector;
+
+      logger.info(`Auto-detected ${Object.keys(result.fields).length} fields from list`);
+
+      return {
+        success: true,
+        fields: result.fields,
+        listSelector: convertedListSelector,
+      };
+    } catch (error: any) {
+      logger.error('Field auto-detection error:', error);
+      return {
+        success: false,
+        error: `Field auto-detection failed: ${error.message}`
+      };
+    }
+  }
+
+  /**
+   * Auto-detect pagination type and selector from list selector
+   */
+  async autoDetectPagination(listSelector: string): Promise<{
+    success: boolean;
+    type?: string;
+    selector?: string | null;
+    error?: string;
+  }> {
+    if (!this.page) {
+      return { success: false, error: 'Browser not initialized' };
+    }
+
+    try {
+      const fs = require('fs');
+      const path = require('path');
+      const scriptPath = path.join(__dirname, 'browserSide/pageAnalyzer.js');
+      const scriptContent = fs.readFileSync(scriptPath, 'utf8');
+
+      await this.page.evaluate((script) => {
+        eval(script);
+      }, scriptContent);
+
+      const buttonResult = await this.page.evaluate((selector) => {
+        const win = window as any;
+
+        if (typeof win.autoDetectPagination === 'function') {
+          const result = win.autoDetectPagination(selector);
+          return result;
+        } else {
+          console.error('autoDetectPagination function not found!');
+          return {
+            type: '',
+            selector: null,
+            error: 'Pagination auto-detection function not loaded'
+          };
+        }
+      }, listSelector);
+
+      if (buttonResult.debug) {
+        logger.info(`Pagination debug info: ${JSON.stringify(buttonResult.debug)}`);
+      }
+
+      if (buttonResult.error) {
+        logger.error(`Button detection error: ${buttonResult.error}`);
+        return {
+          success: false,
+          error: buttonResult.error
+        };
+      }
+
+      if (buttonResult.type && buttonResult.type !== '') {
+        if (buttonResult.type === 'clickLoadMore' && buttonResult.selector) {
+          logger.info('Testing Load More button by clicking...');
+          const loadMoreVerified = await this.testLoadMoreButton(buttonResult.selector, listSelector);
+
+          if (!loadMoreVerified) {
+            logger.warn('Load More button did not load content, trying other detection methods');
+          } else {
+            logger.info(`Verified Load More button works`);
+            return {
+              success: true,
+              type: buttonResult.type,
+              selector: buttonResult.selector
+            };
+          }
+        } else {
+          logger.info(`Detected pagination type: ${buttonResult.type}${buttonResult.selector ? ` with selector: ${buttonResult.selector}` : ''}`);
+          return {
+            success: true,
+            type: buttonResult.type,
+            selector: buttonResult.selector
+          };
+        }
+      }
+
+      const scrollTestResult = await this.testInfiniteScrollByScrolling(listSelector);
+
+      if (scrollTestResult.detected) {
+        return {
+          success: true,
+          type: 'scrollDown',
+          selector: null
+        };
+      }
+
+      return {
+        success: true,
+        type: '',
+        selector: null
+      };
+
+    } catch (error: any) {
+      logger.error('Pagination auto-detection error:', error);
+      return {
+        success: false,
+        error: `Pagination auto-detection failed: ${error.message}`
+      };
+    }
+  }
+
+  /**
+   * Test Load More button by clicking it and checking if content loads
+   */
+  private async testLoadMoreButton(buttonSelector: string, listSelector: string): Promise<boolean> {
+    if (!this.page) {
+      return false;
+    }
+
+    try {
+      const initialState = await this.page.evaluate((selector) => {
+        function evaluateSelector(sel: string, doc: Document) {
+          const isXPath = sel.startsWith('//') || sel.startsWith('(//');
+          if (isXPath) {
+            const result = doc.evaluate(sel, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
+            const elements = [];
+            for (let i = 0; i < result.snapshotLength; i++) {
+              elements.push(result.snapshotItem(i));
+            }
+            return elements;
+          } else {
+            return Array.from(doc.querySelectorAll(sel));
+          }
+        }
+
+        const listElements = evaluateSelector(selector, document);
+        return {
+          itemCount: listElements.length,
+          scrollHeight: document.documentElement.scrollHeight
+        };
+      }, listSelector);
+
+      try {
+        const selectors = buttonSelector.split(',').map(s => s.trim());
+        let clicked = false;
+
+        for (const sel of selectors) {
+          try {
+            await this.page.click(sel, { timeout: 1000 });
+            clicked = true;
+            break;
+          } catch (e) {
+            continue;
+          }
+        }
+
+        if (!clicked) {
+          return false;
+        }
+
+        await this.page.waitForTimeout(2000);
+
+      } catch (clickError: any) {
+        logger.warn(`Failed to click button: ${clickError.message}`);
+        return false;
+      }
+
+      const afterClickState = await this.page.evaluate((selector) => {
+        function evaluateSelector(sel: string, doc: Document) {
+          const isXPath = sel.startsWith('//') || sel.startsWith('(//');
+          if (isXPath) {
+            const result = doc.evaluate(sel, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
+            const elements = [];
+            for (let i = 0; i < result.snapshotLength; i++) {
+              elements.push(result.snapshotItem(i));
+            }
+            return elements;
+          } else {
+            return Array.from(doc.querySelectorAll(sel));
+          }
+        }
+
+        const listElements = evaluateSelector(selector, document);
+        return {
+          itemCount: listElements.length,
+          scrollHeight: document.documentElement.scrollHeight
+        };
+      }, listSelector);
+
+      logger.info(`After click: ${afterClickState.itemCount} items, scrollHeight: ${afterClickState.scrollHeight}`);
+
+      const itemsAdded = afterClickState.itemCount > initialState.itemCount;
+      const heightIncreased = afterClickState.scrollHeight > initialState.scrollHeight + 100;
+
+      if (itemsAdded || heightIncreased) {
+        const details = `Items: ${initialState.itemCount} → ${afterClickState.itemCount}, Height: ${initialState.scrollHeight} → ${afterClickState.scrollHeight}`;
+        logger.info(`Content loaded after click: ${details}`);
+        return true;
+      }
+
+      logger.info('No content change detected after clicking');
+      return false;
+
+    } catch (error: any) {
+      logger.error('Error during Load More test:', error.message);
+      return false;
+    }
+  }
+
+  /**
+   * Test for infinite scroll by actually scrolling and checking if content loads
+   */
+  private async testInfiniteScrollByScrolling(listSelector: string): Promise<{
+    detected: boolean;
+    details?: string;
+  }> {
+    if (!this.page) {
+      return { detected: false };
+    }
+
+    try {
+      const initialState = await this.page.evaluate((selector) => {
+        function evaluateSelector(sel: string, doc: Document) {
+          const isXPath = sel.startsWith('//') || sel.startsWith('(//');
+          if (isXPath) {
+            const result = doc.evaluate(sel, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
+            const elements = [];
+            for (let i = 0; i < result.snapshotLength; i++) {
+              elements.push(result.snapshotItem(i));
+            }
+            return elements;
+          } else {
+            return Array.from(doc.querySelectorAll(sel));
+          }
+        }
+
+        const listElements = evaluateSelector(selector, document);
+        return {
+          itemCount: listElements.length,
+          scrollHeight: document.documentElement.scrollHeight,
+          scrollY: window.scrollY
+        };
+      }, listSelector);
+
+      logger.info(`Initial state: ${initialState.itemCount} items, scrollHeight: ${initialState.scrollHeight}`);
+
+      await this.page.evaluate(() => {
+        window.scrollTo(0, document.documentElement.scrollHeight);
+      });
+
+      await this.page.waitForTimeout(2000);
+
+      const afterScrollState = await this.page.evaluate((selector) => {
+        function evaluateSelector(sel: string, doc: Document) {
+          const isXPath = sel.startsWith('//') || sel.startsWith('(//');
+          if (isXPath) {
+            const result = doc.evaluate(sel, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
+            const elements = [];
+            for (let i = 0; i < result.snapshotLength; i++) {
+              elements.push(result.snapshotItem(i));
+            }
+            return elements;
+          } else {
+            return Array.from(doc.querySelectorAll(sel));
+          }
+        }
+
+        const listElements = evaluateSelector(selector, document);
+        return {
+          itemCount: listElements.length,
+          scrollHeight: document.documentElement.scrollHeight,
+          scrollY: window.scrollY
+        };
+      }, listSelector);
+
+      await this.page.evaluate((originalY) => {
+        window.scrollTo(0, originalY);
+      }, initialState.scrollY);
+
+  
+      const itemsAdded = afterScrollState.itemCount > initialState.itemCount;
+      const heightIncreased = afterScrollState.scrollHeight > initialState.scrollHeight + 100;
+
+      if (itemsAdded || heightIncreased) {
+        const details = `Items: ${initialState.itemCount} → ${afterScrollState.itemCount}, Height: ${initialState.scrollHeight} → ${afterScrollState.scrollHeight}`;
+        logger.info(`Content changed: ${details}`);
+        return { detected: true, details };
+      }
+
+      logger.info('No content change detected');
+      return { detected: false };
+
+    } catch (error: any) {
+      logger.error('Error during scroll test:', error.message);
+      return { detected: false };
+    }
+  }
+
+  /**
+   * Clear page reference
+   */
+  async close(): Promise<void> {
+    this.page = null;
+    logger.info('Page reference cleared');
+  }
+}
--- a/server/src/sdk/workflowEnricher.ts
+++ b/server/src/sdk/workflowEnricher.ts
@@ -0,0 +1,711 @@
+/**
+ * Workflow Enricher
+ * Converts simplified SDK workflow to full format with validation
+ */
+
+import { SelectorValidator } from './selectorValidator';
+import { createRemoteBrowserForValidation, destroyRemoteBrowser } from '../browser-management/controller';
+import logger from '../logger';
+import { v4 as uuid } from 'uuid';
+import { encrypt } from '../utils/auth';
+import Anthropic from '@anthropic-ai/sdk';
+
+interface SimplifiedAction {
+  action: string | typeof Symbol.asyncDispose;
+  args?: any[];
+  name?: string;
+  actionId?: string;
+}
+
+type RegexableString = string | { $regex: string };
+
+interface SimplifiedWorkflowPair {
+  where: {
+    url?: RegexableString;
+    [key: string]: any;
+  };
+  what: SimplifiedAction[];
+}
+
+export class WorkflowEnricher {
+  /**
+   * Enrich a simplified workflow with full metadata
+   */
+  static async enrichWorkflow(
+    simplifiedWorkflow: SimplifiedWorkflowPair[],
+    userId: string = 'sdk-validation-user'
+  ): Promise<{ success: boolean; workflow?: any[]; errors?: string[]; url?: string }> {
+    const errors: string[] = [];
+    const enrichedWorkflow: any[] = [];
+
+    if (simplifiedWorkflow.length === 0) {
+      return { success: false, errors: ['Workflow is empty'] };
+    }
+
+    let url: string | undefined;
+    for (const step of simplifiedWorkflow) {
+      const rawUrl = step.where.url;
+      if (rawUrl && rawUrl !== 'about:blank') {
+        url = typeof rawUrl === 'string' ? rawUrl : rawUrl.$regex;
+        break;
+      }
+    }
+
+    if (!url) {
+      return { success: false, errors: ['No valid URL found in workflow'] };
+    }
+
+    let browserId: string | null = null;
+    const validator = new SelectorValidator();
+
+    try {
+      logger.info('Creating RemoteBrowser for validation');
+      const { browserId: id, page } = await createRemoteBrowserForValidation(userId);
+      browserId = id;
+
+      await validator.initialize(page, url);
+
+      for (const step of simplifiedWorkflow) {
+        const enrichedStep: any = {
+          where: { ...step.where },
+          what: []
+        };
+
+        const selectors: string[] = [];
+
+        for (const action of step.what) {
+          if (typeof action.action !== 'string') {
+            continue;
+          }
+
+          if (action.action === 'type') {
+            if (!action.args || action.args.length < 2) {
+              errors.push('type action missing selector or value');
+              continue;
+            }
+
+            const selector = action.args[0];
+            const value = action.args[1];
+            const providedInputType = action.args[2];
+
+            selectors.push(selector);
+
+            const encryptedValue = encrypt(value);
+
+            if (!providedInputType) {
+              try {
+                const inputType = await validator.detectInputType(selector);
+                enrichedStep.what.push({
+                  ...action,
+                  args: [selector, encryptedValue, inputType]
+                });
+              } catch (error: any) {
+                errors.push(`type action: ${error.message}`);
+                continue;
+              }
+            } else {
+              enrichedStep.what.push({
+                ...action,
+                args: [selector, encryptedValue, providedInputType]
+              });
+            }
+
+            enrichedStep.what.push({
+              action: 'waitForLoadState',
+              args: ['networkidle']
+            });
+
+            continue;
+          }
+
+          if (action.action !== 'scrapeSchema' && action.action !== 'scrapeList') {
+            enrichedStep.what.push(action);
+            continue;
+          }
+
+          if (action.action === 'scrapeSchema') {
+            if (!action.args || !action.args[0]) {
+              errors.push('scrapeSchema action missing fields argument');
+              continue;
+            }
+            const fields = action.args[0];
+            const result = await validator.validateSchemaFields(fields);
+
+            if (!result.valid) {
+              errors.push(...(result.errors || []));
+              continue;
+            }
+
+            const enrichedFields: Record<string, any> = {};
+            for (const [fieldName, enrichedData] of Object.entries(result.enriched!)) {
+              enrichedFields[fieldName] = {
+                tag: enrichedData.tag,
+                isShadow: enrichedData.isShadow,
+                selector: enrichedData.selector,
+                attribute: enrichedData.attribute
+              };
+
+              selectors.push(enrichedData.selector);
+            }
+
+            const enrichedAction: any = {
+              action: 'scrapeSchema',
+              actionId: `text-${uuid()}`,
+              args: [enrichedFields]
+            };
+            if (action.name) {
+              enrichedAction.name = action.name;
+            }
+            enrichedStep.what.push(enrichedAction);
+
+            enrichedStep.what.push({
+              action: 'waitForLoadState',
+              args: ['networkidle']
+            });
+
+          } else if (action.action === 'scrapeList') {
+            if (!action.args || !action.args[0]) {
+              errors.push('scrapeList action missing config argument');
+              continue;
+            }
+            const config = action.args[0];
+
+            let enrichedFields: Record<string, any> = {};
+            let listSelector: string;
+
+            try {
+              const autoDetectResult = await validator.autoDetectListFields(config.itemSelector);
+
+              if (!autoDetectResult.success || !autoDetectResult.fields || Object.keys(autoDetectResult.fields).length === 0) {
+                errors.push(autoDetectResult.error || 'Failed to auto-detect fields from list selector');
+                continue;
+              }
+
+              enrichedFields = autoDetectResult.fields;
+              listSelector = autoDetectResult.listSelector!;
+              logger.info('Auto-detected', Object.keys(enrichedFields).length, 'fields');
+            } catch (error: any) {
+              errors.push(`Field auto-detection failed: ${error.message}`);
+              continue;
+            }
+
+            let paginationType = 'none';
+            let paginationSelector = '';
+
+            if (config.pagination && config.pagination.type) {
+              paginationType = config.pagination.type;
+              paginationSelector = config.pagination.selector || '';
+            } else {
+              try {
+                const paginationResult = await validator.autoDetectPagination(config.itemSelector);
+
+                if (paginationResult.success && paginationResult.type) {
+                  paginationType = paginationResult.type;
+                  paginationSelector = paginationResult.selector || '';
+                } 
+              } catch (error: any) {
+                logger.warn('Pagination auto-detection failed, using default (none):', error.message);
+              }
+            }
+
+            const enrichedListAction: any = {
+              action: 'scrapeList',
+              actionId: `list-${uuid()}`,
+              args: [{
+                fields: enrichedFields,
+                listSelector: listSelector,
+                pagination: {
+                  type: paginationType,
+                  selector: paginationSelector
+                },
+                limit: config.maxItems || 100
+              }]
+            };
+            if (action.name) {
+              enrichedListAction.name = action.name;
+            }
+            enrichedStep.what.push(enrichedListAction);
+
+            enrichedStep.what.push({
+              action: 'waitForLoadState',
+              args: ['networkidle']
+            });
+          }
+        }
+
+        if (selectors.length > 0) {
+          enrichedStep.where.selectors = selectors;
+        }
+
+        enrichedWorkflow.push(enrichedStep);
+      }
+
+      await validator.close();
+
+      if (browserId) {
+        await destroyRemoteBrowser(browserId, userId);
+        logger.info('RemoteBrowser cleaned up successfully');
+      }
+
+      if (errors.length > 0) {
+        return { success: false, errors };
+      }
+
+      return { success: true, workflow: enrichedWorkflow, url };
+
+    } catch (error: any) {
+      await validator.close();
+
+      if (browserId) {
+        try {
+          await destroyRemoteBrowser(browserId, userId);
+          logger.info('RemoteBrowser cleaned up after error');
+        } catch (cleanupError) {
+          logger.warn('Failed to cleanup RemoteBrowser:', cleanupError);
+        }
+      }
+
+      logger.error('Error enriching workflow:', error);
+      return { success: false, errors: [error.message] };
+    }
+  }
+
+
+  /**
+   * Generate workflow from natural language prompt using LLM with vision
+   */
+  static async generateWorkflowFromPrompt(
+    url: string,
+    prompt: string,
+    llmConfig?: {
+      provider?: 'anthropic' | 'openai' | 'ollama';
+      model?: string;
+      apiKey?: string;
+      baseUrl?: string;
+    },
+    userId: string = 'sdk-validation-user',
+  ): Promise<{ success: boolean; workflow?: any[]; url?: string; errors?: string[] }> {
+    let browserId: string | null = null;
+    const validator = new SelectorValidator();
+
+    try {
+      logger.info(`Generating workflow from prompt for URL: ${url}`);
+      logger.info(`Prompt: ${prompt}`);
+
+      logger.info('Creating RemoteBrowser for LLM workflow generation');
+      const { browserId: id, page } = await createRemoteBrowserForValidation(userId);
+      browserId = id;
+
+      await validator.initialize(page as any, url);
+
+      const validatorPage = (validator as any).page;
+      const screenshotBuffer = await page.screenshot({ fullPage: true, type: 'png' });
+      const screenshotBase64 = screenshotBuffer.toString('base64');
+
+      const elementGroups = await this.analyzePageGroups(validator);
+      logger.info(`Found ${elementGroups.length} element groups`);
+
+      const pageHTML = await validatorPage.content();
+
+      const llmDecision = await this.getLLMDecisionWithVision(
+        prompt,
+        screenshotBase64,
+        elementGroups,
+        pageHTML,
+        llmConfig
+      );
+      logger.info(`LLM decided action type: ${llmDecision.actionType}`);
+
+      const workflow = await this.buildWorkflowFromLLMDecision(llmDecision, url, validator);
+
+      await validator.close();
+
+      if (browserId) {
+        await destroyRemoteBrowser(browserId, userId);
+        logger.info('RemoteBrowser cleaned up after LLM workflow generation');
+      }
+
+      return { success: true, workflow, url };
+    } catch (error: any) {
+      await validator.close();
+
+      if (browserId) {
+        try {
+          await destroyRemoteBrowser(browserId, userId);
+          logger.info('RemoteBrowser cleaned up after LLM generation error');
+        } catch (cleanupError) {
+          logger.warn('Failed to cleanup RemoteBrowser:', cleanupError);
+        }
+      }
+
+      logger.error('Error generating workflow from prompt:', error);
+      return { success: false, errors: [error.message] };
+    }
+  }
+
+  /**
+   * Analyze page groups using browser-side script
+   */
+  private static async analyzePageGroups(validator: SelectorValidator): Promise<any[]> {
+    try {
+      const page = (validator as any).page;
+      const fs = require('fs');
+      const path = require('path');
+      const scriptPath = path.join(__dirname, 'browserSide/pageAnalyzer.js');
+      const scriptContent = fs.readFileSync(scriptPath, 'utf8');
+
+      await page.evaluate((script: string) => {
+        eval(script);
+      }, scriptContent);
+
+      const groups = await page.evaluate(() => {
+        const win = window as any;
+        if (typeof win.analyzeElementGroups === 'function') {
+          return win.analyzeElementGroups();
+        }
+        return [];
+      });
+
+      return groups;
+    } catch (error: any) {
+      logger.error('Error analyzing page groups:', error);
+      return [];
+    }
+  }
+
+  /**
+   * Use LLM (with or without vision) to decide action and select best element/group
+   */
+  private static async getLLMDecisionWithVision(
+    prompt: string,
+    screenshotBase64: string,
+    elementGroups: any[],
+    pageHTML: string,
+    llmConfig?: {
+      provider?: 'anthropic' | 'openai' | 'ollama';
+      model?: string;
+      apiKey?: string;
+      baseUrl?: string;
+    }
+  ): Promise<any> {
+    try {
+      const provider = llmConfig?.provider || 'ollama';
+      const axios = require('axios');
+
+      const groupsDescription = elementGroups.map((group, index) => {
+        const sampleText = group.sampleTexts.slice(0, 2).filter((t: string) => t && t.trim().length > 0).join(' | ');
+        const hasContent = sampleText.length > 0;
+        const contentPreview = hasContent ? sampleText : '(no text content - likely images/icons)';
+
+        return `Group ${index}:
+- Tag: ${group.fingerprint.tagName}
+- Count: ${group.count} similar elements
+- Has text content: ${hasContent ? 'YES' : 'NO'}
+- Sample content: ${contentPreview.substring(0, 300)}`;
+      }).join('\n\n');
+
+      const systemPrompt = `You are a request classifier for list extraction. Your job is to:
+1. Identify that the user wants to extract a list of items
+2. Select the BEST element group that matches what they want
+3. Extract any numeric limit from their request
+
+CRITICAL GROUP SELECTION RULES:
+- Groups with "Has text content: YES" are usually better than groups with NO text content
+- Match the sample content to what the user is asking for
+- Avoid groups that only show images/icons (Has text content: NO)
+- The group with the most relevant sample content should be selected, NOT just the first group
+- Analyze the keywords in the user's request and find the group whose sample content contains related text
+
+LIMIT EXTRACTION:
+- Look for numbers in the request that indicate quantity (e.g., "50", "25", "100", "first 30", "top 10")
+- If no limit specified, use null
+
+Must return valid JSON: {"actionType": "captureList", "reasoning": "...", "selectedGroupIndex": NUMBER, "limit": NUMBER_OR_NULL}`;
+
+      const userPrompt = `User's request: "${prompt}"
+
+Available element groups on page:
+${groupsDescription}
+
+TASK:
+1. Identify the key terms from the user's request
+2. Look through ALL the groups above
+3. Find the group whose "Sample content" best matches the key terms from the request
+4. Prefer groups with "Has text content: YES" over "NO"
+5. Extract any numeric limit from the request if present
+
+Return JSON:
+{
+  "actionType": "captureList",
+  "reasoning": "Brief explanation of why this group was selected",
+  "selectedGroupIndex": INDEX_NUMBER,
+  "limit": NUMBER_OR_NULL
+}
+
+Note: selectedGroupIndex must be between 0 and ${elementGroups.length - 1}`;
+
+
+      let llmResponse: string;
+
+      if (provider === 'ollama') {
+        const ollamaBaseUrl = llmConfig?.baseUrl || process.env.OLLAMA_BASE_URL || 'http://localhost:11434';
+        const ollamaModel = llmConfig?.model || 'llama3.2-vision';
+
+        const jsonSchema = {
+          type: 'object',
+          required: ['actionType', 'reasoning', 'selectedGroupIndex'],
+          properties: {
+            actionType: {
+              type: 'string',
+              enum: ['captureList']
+            },
+            reasoning: {
+              type: 'string'
+            },
+            selectedGroupIndex: {
+              type: 'integer'
+            },
+            limit: {
+              type: ['integer', 'null']
+            }
+          }
+        };
+
+        const response = await axios.post(`${ollamaBaseUrl}/api/chat`, {
+          model: ollamaModel,
+          messages: [
+            {
+              role: 'system',
+              content: systemPrompt
+            },
+            {
+              role: 'user',
+              content: userPrompt,
+              images: [screenshotBase64]
+            }
+          ],
+          stream: false,
+          format: jsonSchema,
+          options: {
+            temperature: 0.1
+          }
+        });
+
+        llmResponse = response.data.message.content;
+
+      } else if (provider === 'anthropic') {
+        const anthropic = new Anthropic({
+          apiKey: llmConfig?.apiKey || process.env.ANTHROPIC_API_KEY
+        });
+        const anthropicModel = llmConfig?.model || 'claude-3-5-sonnet-20241022';
+
+        const response = await anthropic.messages.create({
+          model: anthropicModel,
+          max_tokens: 1024,
+          messages: [{
+            role: 'user',
+            content: [
+              {
+                type: 'image',
+                source: {
+                  type: 'base64',
+                  media_type: 'image/png',
+                  data: screenshotBase64
+                }
+              },
+              {
+                type: 'text',
+                text: userPrompt
+              }
+            ]
+          }],
+          system: systemPrompt
+        });
+
+        const textContent = response.content.find((c: any) => c.type === 'text');
+        llmResponse = textContent?.type === 'text' ? textContent.text : '';
+
+      } else if (provider === 'openai') {
+        const openaiBaseUrl = llmConfig?.baseUrl || 'https://api.openai.com/v1';
+        const openaiModel = llmConfig?.model || 'gpt-4-vision-preview';
+
+        const response = await axios.post(`${openaiBaseUrl}/chat/completions`, {
+          model: openaiModel,
+          messages: [
+            {
+              role: 'system',
+              content: systemPrompt
+            },
+            {
+              role: 'user',
+              content: [
+                {
+                  type: 'text',
+                  text: userPrompt
+                },
+                {
+                  type: 'image_url',
+                  image_url: {
+                    url: `data:image/png;base64,${screenshotBase64}`
+                  }
+                }
+              ]
+            }
+          ],
+          max_tokens: 1024,
+          temperature: 0.1
+        }, {
+          headers: {
+            'Authorization': `Bearer ${llmConfig?.apiKey || process.env.OPENAI_API_KEY}`,
+            'Content-Type': 'application/json'
+          }
+        });
+
+        llmResponse = response.data.choices[0].message.content;
+
+      } else {
+        throw new Error(`Unsupported LLM provider: ${provider}`);
+      }
+
+      logger.info(`LLM Response: ${llmResponse}`);
+
+      let jsonStr = llmResponse.trim();
+
+      const jsonMatch = jsonStr.match(/```json\s*([\s\S]*?)\s*```/) || jsonStr.match(/```\s*([\s\S]*?)\s*```/);
+      if (jsonMatch) {
+        jsonStr = jsonMatch[1].trim();
+      }
+
+      const objectMatch = jsonStr.match(/\{[\s\S]*"actionType"[\s\S]*\}/);
+      if (objectMatch) {
+        jsonStr = objectMatch[0];
+      }
+
+      const decision = JSON.parse(jsonStr);
+
+      if (!decision.actionType || decision.actionType !== 'captureList') {
+        throw new Error('LLM response must have actionType: "captureList"');
+      }
+
+      if (decision.selectedGroupIndex === undefined || decision.selectedGroupIndex < 0 || decision.selectedGroupIndex >= elementGroups.length) {
+        throw new Error(`Invalid selectedGroupIndex: ${decision.selectedGroupIndex}. Must be between 0 and ${elementGroups.length - 1}`);
+      }
+
+      const selectedGroup = elementGroups[decision.selectedGroupIndex];
+      return {
+        actionType: 'captureList',
+        selectedGroup,
+        itemSelector: selectedGroup.xpath,
+        reasoning: decision.reasoning,
+        limit: decision.limit || null
+      };
+
+    } catch (error: any) {
+      logger.error('LLM decision error:', error);
+      return this.fallbackHeuristicDecision(prompt, elementGroups);
+    }
+  }
+
+  /**
+   * Fallback heuristic decision when LLM fails
+   */
+  private static fallbackHeuristicDecision(prompt: string, elementGroups: any[]): any {
+    const promptLower = prompt.toLowerCase();
+
+    if (elementGroups.length === 0) {
+      throw new Error('No element groups found on page for list extraction');
+    }
+
+    const scoredGroups = elementGroups.map((group, index) => {
+      let score = 0;
+      for (const sampleText of group.sampleTexts) {
+        const keywords = promptLower.split(' ').filter((w: string) => w.length > 3);
+        for (const keyword of keywords) {
+          if (sampleText.toLowerCase().includes(keyword)) score += 2;
+        }
+      }
+      score += Math.min(group.count / 10, 5);
+      return { group, score, index };
+    });
+
+    scoredGroups.sort((a, b) => b.score - a.score);
+    const best = scoredGroups[0];
+
+    return {
+      actionType: 'captureList',
+      selectedGroup: best.group,
+      itemSelector: best.group.xpath
+    };
+  }
+
+  /**
+   * Build workflow from LLM decision
+   */
+  private static async buildWorkflowFromLLMDecision(
+    llmDecision: any,
+    url: string,
+    validator: SelectorValidator
+  ): Promise<any[]> {
+    const workflow: any[] = [];
+
+    workflow.push({
+      where: { url, selectors: [] },
+      what: [
+        { action: 'goto', args: [url] },
+        { action: 'waitForLoadState', args: ['networkidle'] }
+      ]
+    });
+
+    if (llmDecision.actionType === 'captureList') {
+      logger.info(`Auto-detecting fields for: ${llmDecision.itemSelector}`);
+
+      const autoDetectResult = await validator.autoDetectListFields(llmDecision.itemSelector);
+
+      if (!autoDetectResult.success || !autoDetectResult.fields || Object.keys(autoDetectResult.fields).length === 0) {
+        throw new Error('Failed to auto-detect fields from selected group');
+      }
+
+      logger.info(`Auto-detected ${Object.keys(autoDetectResult.fields).length} fields`);
+
+      let paginationType = 'none';
+      let paginationSelector = '';
+
+      try {
+        const paginationResult = await validator.autoDetectPagination(llmDecision.itemSelector);
+        if (paginationResult.success && paginationResult.type) {
+          paginationType = paginationResult.type;
+          paginationSelector = paginationResult.selector || '';
+        }
+      } catch (error: any) {
+        logger.warn('Pagination auto-detection failed:', error.message);
+      }
+
+      const limit = llmDecision.limit || 100;
+      logger.info(`Using limit: ${limit}`);
+
+      workflow[0].what.push({
+        action: 'scrapeList',
+        actionId: `list-${uuid()}`,
+        name: 'List 1',
+        args: [{
+          fields: autoDetectResult.fields,
+          listSelector: autoDetectResult.listSelector,
+          pagination: {
+            type: paginationType,
+            selector: paginationSelector
+          },
+          limit: limit
+        }]
+      });
+
+      workflow[0].what.push({
+        action: 'waitForLoadState',
+        args: ['networkidle']
+      });
+    } else {
+      throw new Error(`Unsupported action type: ${llmDecision.actionType}. Only captureList is supported.`);
+    }
+
+    return workflow;
+  }
+}
--- a/server/src/storage/schedule.ts
+++ b/server/src/storage/schedule.ts
@@ -13,7 +13,7 @@ import { pgBossClient } from './pgboss';
 * @param cronExpression The cron expression for scheduling
 * @param timezone The timezone for the cron expression
 */
-export async function scheduleWorkflow(id: string, userId: string, cronExpression: string, timezone: string): Promise<void> {
+export async function scheduleWorkflow(id: string, cronExpression: string, timezone: string): Promise<void> {
  try {
    const runId = uuid();

@@ -24,7 +24,7 @@ export async function scheduleWorkflow(id: string, userId: string, cronExpressio
    await pgBossClient.createQueue(queueName);

    await pgBossClient.schedule(queueName, cronExpression,
-      { id, runId, userId },
+      { id, runId },
      { tz: timezone }
    );