Merge branch 'develop' into crawl-search

2026-01-04 18:21:47 +05:30
parent 9898dc410d f0b37a88e7
commit 8ab2defdeb
17 changed files with 462 additions and 53 deletions
--- a/server/src/api/sdk.ts
+++ b/server/src/api/sdk.ts
@@ -88,6 +88,7 @@ router.post("/sdk/robots", requireAPIKey, async (req: AuthenticatedRequest, res:
            type,
            url: extractedUrl,
            formats: (workflowFile.meta as any).formats || [],
+            isLLM: (workflowFile.meta as any).isLLM,
        };

        const robot = await Robot.create({
@@ -102,10 +103,14 @@ router.post("/sdk/robots", requireAPIKey, async (req: AuthenticatedRequest, res:
        const eventName = robotMeta.isLLM
            ? "maxun-oss-llm-robot-created"
            : "maxun-oss-robot-created";
-        capture(eventName, {
+        const telemetryData: any = {
            robot_meta: robot.recording_meta,
            recording: robot.recording,
-        });
+        };
+        if (robotMeta.isLLM && (workflowFile.meta as any).prompt) {
+            telemetryData.prompt = (workflowFile.meta as any).prompt;
+        }
+        capture(eventName, telemetryData);

        return res.status(201).json({
            data: robot,
@@ -916,6 +921,7 @@ router.post("/sdk/extract/llm", requireAPIKey, async (req: AuthenticatedRequest,
        capture("maxun-oss-llm-robot-created", {
            robot_meta: robot.recording_meta,
            recording: robot.recording,
+            prompt: prompt,
        });

        return res.status(200).json({
--- a/server/src/models/User.ts
+++ b/server/src/models/User.ts
@@ -7,6 +7,7 @@ interface UserAttributes {
    password: string;
    api_key_name?: string | null;
    api_key?: string | null;
+    api_key_created_at?: Date | null;
    proxy_url?: string | null;
    proxy_username?: string | null;
    proxy_password?: string | null;
@@ -20,6 +21,7 @@ class User extends Model<UserAttributes, UserCreationAttributes> implements User
    public password!: string;
    public api_key_name!: string | null;
    public api_key!: string | null;
+    public api_key_created_at!: Date | null;
    public proxy_url!: string | null;
    public proxy_username!: string | null;
    public proxy_password!: string | null;
@@ -53,6 +55,10 @@ User.init(
            type: DataTypes.STRING,
            allowNull: true,
        },
+        api_key_created_at: {
+            type: DataTypes.DATE,
+            allowNull: true,
+        },
        proxy_url: {
            type: DataTypes.STRING,
            allowNull: true,
--- a/server/src/routes/auth.ts
+++ b/server/src/routes/auth.ts
@@ -255,8 +255,9 @@ router.post(
        return res.status(400).json({ message: "API key already exists" });
      }
      const apiKey = genAPIKey();
+      const createdAt = new Date();

-      await user.update({ api_key: apiKey });
+      await user.update({ api_key: apiKey, api_key_created_at: createdAt })

      capture("maxun-oss-api-key-created", {
        user_id: user.id,
@@ -266,6 +267,7 @@ router.post(
      return res.status(200).json({
        message: "API key generated successfully",
        api_key: apiKey,
+        api_key_created_at: createdAt,
      });
    } catch (error) {
      return res
@@ -290,7 +292,7 @@ router.get(

      const user = await User.findByPk(req.user.id, {
        raw: true,
-        attributes: ["api_key"],
+        attributes: ["api_key", "api_key_created_at"]
      });

      if (!user) {
@@ -305,6 +307,7 @@ router.get(
        ok: true,
        message: "API key fetched successfully",
        api_key: user.api_key || null,
+        api_key_created_at: user.api_key_created_at || null,
      });
    } catch (error) {
      console.error('API Key fetch error:', error);
@@ -336,7 +339,7 @@ router.delete(
        return res.status(404).json({ message: "API Key not found" });
      }

-      await User.update({ api_key: null }, { where: { id: req.user.id } });
+      await User.update({ api_key: null, api_key_created_at: null }, { where: { id: req.user.id } });

      capture("maxun-oss-api-key-deleted", {
        user_id: user.id,
--- a/server/src/routes/storage.ts
+++ b/server/src/routes/storage.ts
@@ -583,6 +583,7 @@ router.post('/recordings/llm', requireSignIn, async (req: AuthenticatedRequest,
      robot_meta: newRobot.recording_meta,
      recording: newRobot.recording,
      llm_provider: llmProvider || 'ollama',
+      prompt: prompt,
    });

    return res.status(201).json({
--- a/server/src/sdk/workflowEnricher.ts
+++ b/server/src/sdk/workflowEnricher.ts
@@ -1240,6 +1240,168 @@ Rules:
    }
  }

+  /**
+   * Generate semantic list name using LLM based on user prompt and field context
+   */
+  private static async generateListName(
+    prompt: string,
+    url: string,
+    fieldNames: string[],
+    llmConfig?: {
+      provider?: 'anthropic' | 'openai' | 'ollama';
+      model?: string;
+      apiKey?: string;
+      baseUrl?: string;
+    }
+  ): Promise<string> {
+    try {
+      const provider = llmConfig?.provider || 'ollama';
+      const axios = require('axios');
+
+      const fieldContext = fieldNames.length > 0
+        ? `\n\nDetected fields in the list:\n${fieldNames.slice(0, 10).map((name, idx) => `${idx + 1}. ${name}`).join('\n')}`
+        : '';
+
+      const systemPrompt = `You are a list naming assistant. Your job is to generate a clear, concise name for a data list based on the user's extraction request and the fields being extracted.
+
+RULES FOR LIST NAMING:
+1. Use 1-3 words maximum (prefer 2 words)
+2. Use Title Case (e.g., "Product Listings", "Job Postings")
+3. Be specific and descriptive
+4. Match the user's terminology when possible
+5. Adapt to the domain: e-commerce (Products, Listings), jobs (Jobs, Postings), articles (Articles, News), etc.
+6. Avoid generic terms like "List", "Data", "Items" unless absolutely necessary
+7. Focus on WHAT is being extracted, not HOW
+
+Examples:
+- User wants "product listings" → "Product Listings" or "Products"
+- User wants "job postings" → "Job Postings" or "Jobs"
+- User wants "article titles" → "Articles"
+- User wants "company information" → "Companies"
+- User wants "quotes from page" → "Quotes"
+
+You must return ONLY the list name, nothing else. No JSON, no explanation, just the name.`;
+
+      const userPrompt = `URL: ${url}
+
+User's extraction request: "${prompt}"
+${fieldContext}
+
+TASK: Generate a concise, descriptive name for this list (1-3 words in Title Case).
+
+Return ONLY the list name, nothing else:`;
+
+      let llmResponse: string;
+
+      if (provider === 'ollama') {
+        const ollamaBaseUrl = llmConfig?.baseUrl || process.env.OLLAMA_BASE_URL || 'http://localhost:11434';
+        const ollamaModel = llmConfig?.model || 'llama3.2-vision';
+
+        try {
+          const response = await axios.post(`${ollamaBaseUrl}/api/chat`, {
+            model: ollamaModel,
+            messages: [
+              {
+                role: 'system',
+                content: systemPrompt
+              },
+              {
+                role: 'user',
+                content: userPrompt
+              }
+            ],
+            stream: false,
+            options: {
+              temperature: 0.1,
+              top_p: 0.9,
+              num_predict: 20
+            }
+          });
+
+          llmResponse = response.data.message.content;
+        } catch (ollamaError: any) {
+          logger.error(`Ollama request failed for list naming: ${ollamaError.message}`);
+          logger.info('Using fallback list name: "List 1"');
+          return 'List 1';
+        }
+      } else if (provider === 'anthropic') {
+        const anthropic = new Anthropic({
+          apiKey: llmConfig?.apiKey || process.env.ANTHROPIC_API_KEY
+        });
+        const anthropicModel = llmConfig?.model || 'claude-3-5-sonnet-20241022';
+
+        const response = await anthropic.messages.create({
+          model: anthropicModel,
+          max_tokens: 20,
+          temperature: 0.1,
+          messages: [{
+            role: 'user',
+            content: userPrompt
+          }],
+          system: systemPrompt
+        });
+
+        const textContent = response.content.find((c: any) => c.type === 'text');
+        llmResponse = textContent?.type === 'text' ? textContent.text : '';
+
+      } else if (provider === 'openai') {
+        const openaiBaseUrl = llmConfig?.baseUrl || 'https://api.openai.com/v1';
+        const openaiModel = llmConfig?.model || 'gpt-4o-mini';
+
+        const response = await axios.post(`${openaiBaseUrl}/chat/completions`, {
+          model: openaiModel,
+          messages: [
+            {
+              role: 'system',
+              content: systemPrompt
+            },
+            {
+              role: 'user',
+              content: userPrompt
+            }
+          ],
+          max_tokens: 20,
+          temperature: 0.1
+        }, {
+          headers: {
+            'Authorization': `Bearer ${llmConfig?.apiKey || process.env.OPENAI_API_KEY}`,
+            'Content-Type': 'application/json'
+          }
+        });
+
+        llmResponse = response.data.choices[0].message.content;
+      } else {
+        throw new Error(`Unsupported LLM provider: ${provider}`);
+      }
+
+      let listName = (llmResponse || '').trim();
+      logger.info(`LLM List Naming Response: "${listName}"`);
+
+      listName = listName.replace(/^["']|["']$/g, '');
+      listName = listName.split('\n')[0];
+      listName = listName.trim();
+
+      if (!listName || listName.length === 0) {
+        throw new Error('LLM returned empty list name');
+      }
+
+      if (listName.length > 50) {
+        throw new Error('LLM returned list name that is too long');
+      }
+
+      listName = listName.split(' ')
+        .map((word: string) => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
+        .join(' ');
+
+      logger.info(`✓ Generated list name: "${listName}"`);
+      return listName;
+    } catch (error: any) {
+      logger.error(`Error in generateListName: ${error.message}`);
+      logger.info('Using fallback list name: "List 1"');
+      return 'List 1';
+    }
+  }
+
  /**
   * Build workflow from LLM decision
   */
@@ -1333,10 +1495,19 @@ Rules:
      const limit = llmDecision.limit || 100;
      logger.info(`Using limit: ${limit}`);

+      logger.info('Generating semantic list name with LLM...');
+      const listName = await this.generateListName(
+        prompt || 'Extract list data',
+        url,
+        Object.keys(finalFields),
+        llmConfig
+      );
+      logger.info(`Using list name: "${listName}"`);
+
      workflow[0].what.push({
        action: 'scrapeList',
        actionId: `list-${uuid()}`,
-        name: 'List 1',
+        name: listName,
        args: [{
          fields: finalFields,
          listSelector: autoDetectResult.listSelector,
--- a/server/src/workflow-management/classes/Interpreter.ts
+++ b/server/src/workflow-management/classes/Interpreter.ts
@@ -580,6 +580,13 @@ export class WorkflowInterpreter {
        setActionName: (name: string) => {
          this.currentActionName = name;
        },
+        progressUpdate: (current: number, total: number, percentage: number) => {
+          this.socket.nsp.emit('workflowProgress', {
+            current,
+            total,
+            percentage
+          });
+        },
      },
      serializableCallback: async (data: any) => {
        try {