Merge pull request #957 from getmaxun/revert-946-auto-search

Revert "feat: add auto search logic"
2026-01-23 17:45:27 +05:30
parent 8151b8d321 536f046b60
commit 4e1a3fdc5d
6 changed files with 41 additions and 610 deletions
--- a/server/src/api/sdk.ts
+++ b/server/src/api/sdk.ts
@@ -870,46 +870,24 @@ router.post("/sdk/search", requireAPIKey, async (req: AuthenticatedRequest, res:
 /**
 * LLM-based extraction - generate workflow from natural language prompt
 * POST /api/sdk/extract/llm
- * URL is optional - if not provided, the system will search for the target website based on the prompt
 */
 router.post("/sdk/extract/llm", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
    try {
        const user = req.user
        const { url, prompt, llmProvider, llmModel, llmApiKey, llmBaseUrl, robotName } = req.body;

-        if (!prompt) {
+        if (!url || !prompt) {
            return res.status(400).json({
-                error: "Prompt is required"
+                error: "URL and prompt are required"
            });
        }

-        if (url) {
-            try {
-                new URL(url);
-            } catch (err) {
-                return res.status(400).json({
-                    error: "Invalid URL format"
-                });
-            }
-        }
-
-        const llmConfig = {
+        const workflowResult = await WorkflowEnricher.generateWorkflowFromPrompt(url, prompt, user.id, {
            provider: llmProvider,
            model: llmModel,
            apiKey: llmApiKey,
            baseUrl: llmBaseUrl
-        };
-
-        let workflowResult: any;
-        let finalUrl: string;
-
-        if (url) {
-            workflowResult = await WorkflowEnricher.generateWorkflowFromPrompt(url, prompt, user.id, llmConfig);
-            finalUrl = workflowResult.url || url;
-        } else {
-            workflowResult = await WorkflowEnricher.generateWorkflowFromPromptWithSearch(prompt, user.id, llmConfig);
-            finalUrl = workflowResult.url || '';
-        }
+        });

        if (!workflowResult.success || !workflowResult.workflow) {
            return res.status(400).json({
@@ -929,8 +907,8 @@ router.post("/sdk/extract/llm", requireAPIKey, async (req: AuthenticatedRequest,
            pairs: workflowResult.workflow.length,
            params: [],
            type: 'extract',
-            url: finalUrl,
-            isLLM: true
+            url: workflowResult.url,
+            isLLM: true,
        };

        const robot = await Robot.create({
@@ -947,7 +925,7 @@ router.post("/sdk/extract/llm", requireAPIKey, async (req: AuthenticatedRequest,
        capture("maxun-oss-llm-robot-created", {
            robot_meta: robot.recording_meta,
            recording: robot.recording,
-            prompt: prompt
+            prompt: prompt,
        });

        return res.status(200).json({
@@ -956,7 +934,7 @@ router.post("/sdk/extract/llm", requireAPIKey, async (req: AuthenticatedRequest,
                robotId: metaId,
                name: robotMeta.name,
                description: prompt,
-                url: finalUrl,
+                url: workflowResult.url,
                workflow: workflowResult.workflow
            }
        });
--- a/server/src/routes/storage.ts
+++ b/server/src/routes/storage.ts
@@ -442,51 +442,33 @@ router.post('/recordings/scrape', requireSignIn, async (req: AuthenticatedReques

 /**
 * POST endpoint for creating an LLM-powered extraction robot
- * URL is optional - if not provided, the system will search for the target website based on the prompt
 */
 router.post('/recordings/llm', requireSignIn, async (req: AuthenticatedRequest, res) => {
  try {
    const { url, prompt, llmProvider, llmModel, llmApiKey, llmBaseUrl, robotName } = req.body;

-    if (!prompt) {
-      return res.status(400).json({ error: 'The "prompt" field is required.' });
+    if (!url || !prompt) {
+      return res.status(400).json({ error: 'Both "url" and "prompt" fields are required.' });
    }

    if (!req.user) {
      return res.status(401).send({ error: 'Unauthorized' });
    }

-    // Validate URL format if provided
-    if (url) {
-      try {
-        new URL(url);
-      } catch (err) {
-        return res.status(400).json({ error: 'Invalid URL format' });
-      }
+    try {
+      new URL(url);
+    } catch (err) {
+      return res.status(400).json({ error: 'Invalid URL format' });
    }

-    let workflowResult: any;
-    let finalUrl: string;
+    logger.log('info', `Starting LLM workflow generation for URL: ${url}`);

-    const llmConfig = {
+    const workflowResult = await WorkflowEnricher.generateWorkflowFromPrompt(url, prompt, req.user.id, {
      provider: llmProvider || 'ollama',
      model: llmModel,
      apiKey: llmApiKey,
      baseUrl: llmBaseUrl
-    };
-
-    if (url) {
-      logger.log('info', `Starting LLM workflow generation for provided URL: ${url}`);
-      workflowResult = await WorkflowEnricher.generateWorkflowFromPrompt(url, prompt, req.user.id, llmConfig);
-      finalUrl = workflowResult.url || url;
-    } else {
-      logger.log('info', `Starting LLM workflow generation with automatic URL detection for prompt: "${prompt}"`);
-      workflowResult = await WorkflowEnricher.generateWorkflowFromPromptWithSearch(prompt, req.user.id, llmConfig);
-      finalUrl = workflowResult.url || '';
-      if (finalUrl) {
-        logger.log('info', `Auto-detected URL: ${finalUrl}`);
-      }
-    }
+    });

    if (!workflowResult.success || !workflowResult.workflow) {
      logger.log('error', `Failed to generate workflow: ${JSON.stringify(workflowResult.errors)}`);
@@ -511,7 +493,7 @@ router.post('/recordings/llm', requireSignIn, async (req: AuthenticatedRequest,
        pairs: workflowResult.workflow.length,
        params: [],
        type: 'extract',
-        url: finalUrl,
+        url: workflowResult.url || url,
        isLLM: true,
      },
      recording: { workflow: workflowResult.workflow },
@@ -529,7 +511,6 @@ router.post('/recordings/llm', requireSignIn, async (req: AuthenticatedRequest,
      recording: newRobot.recording,
      llm_provider: llmProvider || 'ollama',
      prompt: prompt,
-      urlAutoDetected: !url,
    });

    return res.status(201).json({
--- a/server/src/sdk/workflowEnricher.ts
+++ b/server/src/sdk/workflowEnricher.ts
@@ -1529,521 +1529,4 @@ Return ONLY the list name, nothing else:`;

    return workflow;
  }
-
-  /**
-   * Generate workflow from prompt with automatic URL detection via search
-   * This method searches for the target website based on the user's prompt,
-   * then generates a workflow for the best matching URL
-   */
-  static async generateWorkflowFromPromptWithSearch(
-    userPrompt: string,
-    userId: string,
-    llmConfig?: {
-      provider?: 'anthropic' | 'openai' | 'ollama';
-      model?: string;
-      apiKey?: string;
-      baseUrl?: string;
-    }
-  ): Promise<{
-    success: boolean;
-    workflow?: any[];
-    url?: string;
-    errors?: string[];
-  }> {
-    let browserId: string | null = null;
-
-    try {
-      const { browserId: id, page } = await createRemoteBrowserForValidation(userId);
-      browserId = id;
-
-      const intent = await this.parseSearchIntent(userPrompt, llmConfig);
-
-      const searchResults = await this.performDuckDuckGoSearch(intent.searchQuery, page);
-      if (searchResults.length === 0) {
-        if (browserId) {
-          await destroyRemoteBrowser(browserId, userId);
-        }
-        return {
-          success: false,
-          errors: [`No search results found for query: "${intent.searchQuery}". Please provide a URL manually or refine your prompt.`]
-        };
-      }
-
-      const selection = await this.selectBestUrlFromResults(searchResults, userPrompt, llmConfig);
-      
-      await page.goto(selection.url, { waitUntil: 'networkidle', timeout: 30000 });
-      await page.waitForTimeout(2000);
-
-      const validator = new SelectorValidator();
-      await validator.initialize(page, selection.url);
-
-      const validatorPage = (validator as any).page;
-      const screenshotBuffer = await validatorPage.screenshot({ 
-        fullPage: true, 
-        type: 'jpeg',
-        quality: 85
-      });
-      const screenshotBase64 = screenshotBuffer.toString('base64');
-
-      const elementGroups = await this.analyzePageGroups(validator);
-      const pageHTML = await validatorPage.content();
-
-      const llmDecision = await this.getLLMDecisionWithVision(
-        userPrompt,
-        screenshotBase64,
-        elementGroups,
-        pageHTML,
-        llmConfig
-      );
-
-      if (intent.limit !== undefined && intent.limit !== null) {
-        llmDecision.limit = intent.limit;
-      }
-
-      const workflow = await this.buildWorkflowFromLLMDecision(llmDecision, selection.url, validator, userPrompt, llmConfig);
-
-      await validator.close();
-
-      if (browserId) {
-        await destroyRemoteBrowser(browserId, userId);
-      }
-
-      return {
-        success: true,
-        workflow,
-        url: selection.url
-      };
-
-    } catch (error: any) {
-      if (browserId) {
-        try {
-          await destroyRemoteBrowser(browserId, userId);
-        } catch (cleanupError) {
-          logger.warn('Failed to cleanup RemoteBrowser:', cleanupError);
-        }
-      }
-
-      logger.error('Error in generateWorkflowFromPromptWithSearch:', error);
-      return {
-        success: false,
-        errors: [error.message]
-      };
-    }
-  }
-
-  /**
-   * Parse user prompt to extract search intent
-   */
-  private static async parseSearchIntent(
-    userPrompt: string,
-    llmConfig?: {
-      provider?: 'anthropic' | 'openai' | 'ollama';
-      model?: string;
-      apiKey?: string;
-      baseUrl?: string;
-    }
-  ): Promise<{
-    searchQuery: string;
-    extractionGoal: string;
-    limit?: number | null;
-  }> {
-    const systemPrompt = `You are a search query extractor. Analyze the user's extraction request and identify:
-1. The website or page they want to extract from (for searching)
-2. What data they want to extract
-3. Any limit/quantity specified
-
-Examples:
- "Extract top 10 company data from YCombinator Companies site" → searchQuery: "YCombinator Companies", goal: "company data", limit: 10
- "Get first 20 laptop names and prices from Amazon" → searchQuery: "Amazon laptops", goal: "laptop names and prices", limit: 20
- "Scrape articles from TechCrunch AI section" → searchQuery: "TechCrunch AI section", goal: "articles", limit: null
-
-Return ONLY valid JSON: {"searchQuery": "...", "extractionGoal": "...", "limit": NUMBER_OR_NULL}`;
-
-    const userMessage = `User request: "${userPrompt}"
-
-Extract the search query, extraction goal, and limit. Return JSON only.`;
-
-    try {
-      const provider = llmConfig?.provider || 'ollama';
-      const axios = require('axios');
-
-      let llmResponse: string;
-
-      if (provider === 'ollama') {
-        const ollamaBaseUrl = llmConfig?.baseUrl || process.env.OLLAMA_BASE_URL || 'http://localhost:11434';
-        const ollamaModel = llmConfig?.model || 'llama3.2-vision';
-
-        const jsonSchema = {
-          type: 'object',
-          required: ['searchQuery', 'extractionGoal'],
-          properties: {
-            searchQuery: { type: 'string' },
-            extractionGoal: { type: 'string' },
-            limit: { type: ['integer', 'null'] }
-          }
-        };
-
-        const response = await axios.post(`${ollamaBaseUrl}/api/chat`, {
-          model: ollamaModel,
-          messages: [
-            { role: 'system', content: systemPrompt },
-            { role: 'user', content: userMessage }
-          ],
-          stream: false,
-          format: jsonSchema,
-          options: { temperature: 0.1 }
-        });
-
-        llmResponse = response.data.message.content;
-
-      } else if (provider === 'anthropic') {
-        const anthropic = new Anthropic({
-          apiKey: llmConfig?.apiKey || process.env.ANTHROPIC_API_KEY
-        });
-        const anthropicModel = llmConfig?.model || 'claude-3-5-sonnet-20241022';
-
-        const response = await anthropic.messages.create({
-          model: anthropicModel,
-          max_tokens: 256,
-          temperature: 0.1,
-          messages: [{ role: 'user', content: userMessage }],
-          system: systemPrompt
-        });
-
-        const textContent = response.content.find((c: any) => c.type === 'text');
-        llmResponse = textContent?.type === 'text' ? textContent.text : '';
-
-      } else if (provider === 'openai') {
-        const openaiBaseUrl = llmConfig?.baseUrl || 'https://api.openai.com/v1';
-        const openaiModel = llmConfig?.model || 'gpt-4o-mini';
-
-        const response = await axios.post(`${openaiBaseUrl}/chat/completions`, {
-          model: openaiModel,
-          messages: [
-            { role: 'system', content: systemPrompt },
-            { role: 'user', content: userMessage }
-          ],
-          max_tokens: 256,
-          temperature: 0.1,
-          response_format: { type: 'json_object' }
-        }, {
-          headers: {
-            'Authorization': `Bearer ${llmConfig?.apiKey || process.env.OPENAI_API_KEY}`,
-            'Content-Type': 'application/json'
-          }
-        });
-
-        llmResponse = response.data.choices[0].message.content;
-
-      } else {
-        throw new Error(`Unsupported LLM provider: ${provider}`);
-      }
-
-      logger.info(`[WorkflowEnricher] Intent parsing response: ${llmResponse}`);
-
-      let jsonStr = llmResponse.trim();
-      const jsonMatch = jsonStr.match(/```json\s*([\s\S]*?)\s*```/) || jsonStr.match(/```\s*([\s\S]*?)\s*```/);
-      if (jsonMatch) {
-        jsonStr = jsonMatch[1].trim();
-      }
-
-      const objectMatch = jsonStr.match(/\{[\s\S]*"searchQuery"[\s\S]*\}/);
-      if (objectMatch) {
-        jsonStr = objectMatch[0];
-      }
-
-      const intent = JSON.parse(jsonStr);
-
-      if (!intent.searchQuery || !intent.extractionGoal) {
-        throw new Error('Invalid intent parsing response - missing required fields');
-      }
-
-      return {
-        searchQuery: intent.searchQuery,
-        extractionGoal: intent.extractionGoal,
-        limit: intent.limit || null
-      };
-
-    } catch (error: any) {
-      logger.warn(`Failed to parse intent with LLM: ${error.message}`);
-      logger.info('Using fallback heuristic intent parsing');
-
-      const fromMatch = userPrompt.match(/from\s+([^,\.]+)/i);
-      const searchQuery = fromMatch ? fromMatch[1].trim() : userPrompt.slice(0, 50);
-
-      const numberMatch = userPrompt.match(/(\d+)/);
-      const limit = numberMatch ? parseInt(numberMatch[1], 10) : null;
-
-      return {
-        searchQuery,
-        extractionGoal: userPrompt,
-        limit
-      };
-    }
-  }
-
-  /**
-   * Perform DuckDuckGo search and return FIRST URL only
-   * Simplified version - just returns the first valid URL from search results
-   */
-  private static async performDuckDuckGoSearch(
-    query: string,
-    page: any
-  ): Promise<Array<{ url: string; title: string; description: string; position: number }>> {
-    logger.info(`[WorkflowEnricher] Searching DuckDuckGo for: "${query}"`);
-
-    try {
-      const searchUrl = `https://duckduckgo.com/?q=${encodeURIComponent(query)}`;
-      const initialDelay = 500 + Math.random() * 1000;
-      await new Promise(resolve => setTimeout(resolve, initialDelay));
-
-      await page.goto(searchUrl, { waitUntil: 'domcontentloaded', timeout: 30000 });
-      await page.waitForLoadState('load', { timeout: 10000 }).catch(() => {
-        logger.warn('[WorkflowEnricher] Load state timeout, continuing anyway');
-      });
-
-      const pageLoadDelay = 2000 + Math.random() * 1500;
-      await new Promise(resolve => setTimeout(resolve, pageLoadDelay));
-
-      await page.waitForSelector('[data-testid="result"], .result', { timeout: 5000 }).catch(() => {
-        logger.warn('[WorkflowEnricher] DuckDuckGo results not found on initial wait');
-      });
-
-      const firstUrl = await page.evaluate(() => {
-        const selectors = [
-          '[data-testid="result"]',
-          'article[data-testid="result"]',
-          'li[data-layout="organic"]',
-          '.result',
-          'article[data-testid]'
-        ];
-
-        let allElements: Element[] = [];
-        for (const selector of selectors) {
-          const elements = Array.from(document.querySelectorAll(selector));
-          if (elements.length > 0) {
-            console.log(`Found ${elements.length} DDG elements with: ${selector}`);
-            allElements = elements;
-            break;
-          }
-        }
-
-        if (allElements.length === 0) {
-          console.error('No search result elements found');
-          return null;
-        }
-
-        const element = allElements[0];
-        const titleEl = element.querySelector('h2, [data-testid="result-title-a"], h3, [data-testid="result-title"]');
-
-        let linkEl = titleEl?.querySelector('a[href]') as HTMLAnchorElement;
-        if (!linkEl) {
-          linkEl = element.querySelector('a[href]') as HTMLAnchorElement;
-        }
-
-        if (!linkEl || !linkEl.href) return null;
-
-        let actualUrl = linkEl.href;
-
-        if (actualUrl.includes('uddg=')) {
-          try {
-            const urlParams = new URLSearchParams(actualUrl.split('?')[1]);
-            const uddgUrl = urlParams.get('uddg');
-            if (uddgUrl) {
-              actualUrl = decodeURIComponent(uddgUrl);
-            }
-          } catch (e) {
-            console.log('Failed to parse uddg parameter:', e);
-          }
-        }
-
-        if (actualUrl.includes('duckduckgo.com')) {
-          console.log(`Skipping DDG internal URL: ${actualUrl}`);
-          return null;
-        }
-
-        return actualUrl;
-      });
-
-      if (!firstUrl) {
-        logger.error('[WorkflowEnricher] No valid URL found in search results');
-        return [];
-      }
-
-      logger.info(`[WorkflowEnricher] Successfully extracted first URL: ${firstUrl}`);
-
-      return [{
-        url: firstUrl,
-        title: '',
-        description: '',
-        position: 1
-      }];
-
-    } catch (error: any) {
-      logger.error(`[WorkflowEnricher] Search failed: ${error.message}`);
-      throw new Error(`DuckDuckGo search failed: ${error.message}`);
-    }
-  }
-
-  /**
-   * Use LLM to select the best URL from search results
-   */
-  private static async selectBestUrlFromResults(
-    searchResults: any[],
-    userPrompt: string,
-    llmConfig?: {
-      provider?: 'anthropic' | 'openai' | 'ollama';
-      model?: string;
-      apiKey?: string;
-      baseUrl?: string;
-    }
-  ): Promise<{
-    url: string;
-    confidence: number;
-    reasoning: string;
-  }> {
-    if (searchResults.length === 1) {
-      return {
-        url: searchResults[0].url,
-        confidence: 0.8,
-        reasoning: 'Selected first search result from DuckDuckGo'
-      };
-    }
-
-    const systemPrompt = `You are a URL selector. Given a list of search results and a user's extraction request, select the BEST URL that is most likely to contain the data the user wants.
-
-Consider:
-1. Title and description relevance to the user's request
-2. Official/authoritative sources are usually better than aggregators
-3. List/directory pages are better than individual item pages
-4. The URL path often gives hints about the page content
-
-Return ONLY valid JSON: {"selectedIndex": NUMBER, "confidence": NUMBER_0_TO_1, "reasoning": "brief explanation"}`;
-
-    const resultsDescription = searchResults.map((r, i) =>
-      `Result ${i}:
- Title: ${r.title}
- URL: ${r.url}
- Description: ${r.description}`
-    ).join('\n\n');
-
-    const userMessage = `User wants to: "${userPrompt}"
-
-Available search results:
-${resultsDescription}
-
-Select the BEST result index (0-${searchResults.length - 1}). Return JSON only.`;
-
-    try {
-      const provider = llmConfig?.provider || 'ollama';
-      const axios = require('axios');
-
-      let llmResponse: string;
-
-      if (provider === 'ollama') {
-        const ollamaBaseUrl = llmConfig?.baseUrl || process.env.OLLAMA_BASE_URL || 'http://localhost:11434';
-        const ollamaModel = llmConfig?.model || 'llama3.2-vision';
-
-        const jsonSchema = {
-          type: 'object',
-          required: ['selectedIndex', 'confidence', 'reasoning'],
-          properties: {
-            selectedIndex: { type: 'integer' },
-            confidence: { type: 'number' },
-            reasoning: { type: 'string' }
-          }
-        };
-
-        const response = await axios.post(`${ollamaBaseUrl}/api/chat`, {
-          model: ollamaModel,
-          messages: [
-            { role: 'system', content: systemPrompt },
-            { role: 'user', content: userMessage }
-          ],
-          stream: false,
-          format: jsonSchema,
-          options: { temperature: 0.1 }
-        });
-
-        llmResponse = response.data.message.content;
-
-      } else if (provider === 'anthropic') {
-        const anthropic = new Anthropic({
-          apiKey: llmConfig?.apiKey || process.env.ANTHROPIC_API_KEY
-        });
-        const anthropicModel = llmConfig?.model || 'claude-3-5-sonnet-20241022';
-
-        const response = await anthropic.messages.create({
-          model: anthropicModel,
-          max_tokens: 256,
-          temperature: 0.1,
-          messages: [{ role: 'user', content: userMessage }],
-          system: systemPrompt
-        });
-
-        const textContent = response.content.find((c: any) => c.type === 'text');
-        llmResponse = textContent?.type === 'text' ? textContent.text : '';
-
-      } else if (provider === 'openai') {
-        const openaiBaseUrl = llmConfig?.baseUrl || 'https://api.openai.com/v1';
-        const openaiModel = llmConfig?.model || 'gpt-4o-mini';
-
-        const response = await axios.post(`${openaiBaseUrl}/chat/completions`, {
-          model: openaiModel,
-          messages: [
-            { role: 'system', content: systemPrompt },
-            { role: 'user', content: userMessage }
-          ],
-          max_tokens: 256,
-          temperature: 0.1,
-          response_format: { type: 'json_object' }
-        }, {
-          headers: {
-            'Authorization': `Bearer ${llmConfig?.apiKey || process.env.OPENAI_API_KEY}`,
-            'Content-Type': 'application/json'
-          }
-        });
-
-        llmResponse = response.data.choices[0].message.content;
-
-      } else {
-        throw new Error(`Unsupported LLM provider: ${provider}`);
-      }
-
-      logger.info(`[WorkflowEnricher] URL selection response: ${llmResponse}`);
-
-      let jsonStr = llmResponse.trim();
-      const jsonMatch = jsonStr.match(/```json\s*([\s\S]*?)\s*```/) || jsonStr.match(/```\s*([\s\S]*?)\s*```/);
-      if (jsonMatch) {
-        jsonStr = jsonMatch[1].trim();
-      }
-
-      const objectMatch = jsonStr.match(/\{[\s\S]*"selectedIndex"[\s\S]*\}/);
-      if (objectMatch) {
-        jsonStr = objectMatch[0];
-      }
-
-      const decision = JSON.parse(jsonStr);
-
-      if (decision.selectedIndex === undefined || decision.selectedIndex < 0 || decision.selectedIndex >= searchResults.length) {
-        throw new Error(`Invalid selectedIndex: ${decision.selectedIndex}`);
-      }
-
-      return {
-        url: searchResults[decision.selectedIndex].url,
-        confidence: decision.confidence || 0.5,
-        reasoning: decision.reasoning || 'No reasoning provided'
-      };
-
-    } catch (error: any) {
-      logger.warn(`[WorkflowEnricher] Failed to select URL with LLM: ${error.message}`);
-      logger.info('[WorkflowEnricher] Using fallback: selecting first search result');
-
-      return {
-        url: searchResults[0].url,
-        confidence: 0.6,
-        reasoning: 'Selected first search result (LLM selection failed)'
-      };
-    }
-  }
 }
--- a/src/api/storage.ts
+++ b/src/api/storage.ts
@@ -59,7 +59,7 @@ export const createScrapeRobot = async (
 };

 export const createLLMRobot = async (
-  url: string | undefined,
+  url: string,
  prompt: string,
  llmProvider?: 'anthropic' | 'openai' | 'ollama',
  llmModel?: string,
@@ -71,7 +71,7 @@ export const createLLMRobot = async (
    const response = await axios.post(
      `${apiUrl}/storage/recordings/llm`,
      {
-        url: url || undefined,
+        url,
        prompt,
        llmProvider,
        llmModel,
--- a/src/components/robot/RecordingsTable.tsx
+++ b/src/components/robot/RecordingsTable.tsx
@@ -97,7 +97,7 @@ const LoadingRobotRow = memo(({ row, columns }: any) => {
        } else if (column.id === 'interpret') {
          return (
            <MemoizedTableCell key={column.id} align={column.align}>
-               <Box sx={{ opacity: 0.3 }}>-</Box>
+              <CircularProgress size={20} />
            </MemoizedTableCell>
          );
        } else {
--- a/src/components/robot/pages/RobotCreate.tsx
+++ b/src/components/robot/pages/RobotCreate.tsx
@@ -65,7 +65,7 @@ const RobotCreate: React.FC = () => {
  const [isWarningModalOpen, setWarningModalOpen] = useState(false);
  const [activeBrowserId, setActiveBrowserId] = useState('');
  const [outputFormats, setOutputFormats] = useState<string[]>([]);
-  const [generationMode, setGenerationMode] = useState<'agent' | 'recorder' | null>('recorder');
+  const [generationMode, setGenerationMode] = useState<'agent' | 'recorder' | null>(null);

  const [aiPrompt, setAiPrompt] = useState('');
  const [llmProvider, setLlmProvider] = useState<'anthropic' | 'openai' | 'ollama'>('ollama');
@@ -323,6 +323,17 @@ const RobotCreate: React.FC = () => {
              <Typography variant="body2" color="text.secondary" mb={3}>
                Extract structured data from websites using AI or record your own extraction workflow.
              </Typography>
+              <Box sx={{ width: '100%', maxWidth: 700, mb: 3 }}>
+                <TextField
+                  placeholder="Example: https://www.ycombinator.com/companies/"
+                  variant="outlined"
+                  fullWidth
+                  value={url}
+                  onChange={(e) => setUrl(e.target.value)}
+                  label="Website URL"
+                />
+              </Box>
+
              <Box sx={{ width: '100%', maxWidth: 700, mb: 3 }}>
                <Typography variant="subtitle1" gutterBottom sx={{ mb: 2 }} color="text.secondary">
                  Choose How to Build
@@ -421,17 +432,6 @@ const RobotCreate: React.FC = () => {
                      />
                    </Box>

-                    <Box sx={{ mb: 3 }}>
-                      <TextField
-                        placeholder="Example: https://www.ycombinator.com/companies/"
-                        variant="outlined"
-                        fullWidth
-                        value={url}
-                        onChange={(e) => setUrl(e.target.value)}
-                        label="Website URL (Optional)"
-                      />
-                    </Box>
-
                    <Box sx={{ display: 'flex', gap: 2, mb: 3 }}>
                      <FormControl sx={{ flex: 1 }}>
                        <InputLabel>LLM Provider</InputLabel>
@@ -517,7 +517,10 @@ const RobotCreate: React.FC = () => {
                      variant="contained"
                      fullWidth
                      onClick={async () => {
-                        // URL is optional for AI mode - it will auto-search if not provided
+                        if (!url.trim()) {
+                          notify('error', 'Please enter a valid URL');
+                          return;
+                        }
                        if (!extractRobotName.trim()) {
                          notify('error', 'Please enter a robot name');
                          return;
@@ -540,7 +543,7 @@ const RobotCreate: React.FC = () => {
                            pairs: 0,
                            params: [],
                            type: 'extract',
-                            url: url || '(auto-detecting...)',
+                            url: url,
                          },
                          recording: { workflow: [] },
                          isLoading: true,
@@ -549,14 +552,12 @@ const RobotCreate: React.FC = () => {

                        addOptimisticRobot(optimisticRobot);

-                        notify('info', url.trim() 
-                          ? `Robot ${robotDisplayName} creation started` 
-                          : `Robot ${robotDisplayName} creation started (searching for website...)`);
+                        notify('info', `Robot ${robotDisplayName} creation started`);
                        navigate('/robots');

                        try {
                          const result = await createLLMRobot(
-                            url.trim() || undefined,
+                            url,
                            aiPrompt,
                            llmProvider,
                            llmModel === 'default' ? undefined : llmModel,
@@ -616,7 +617,7 @@ const RobotCreate: React.FC = () => {
                          notify('error', error?.message || 'Failed to create and run AI robot');
                        }
                      }}
-                      disabled={!extractRobotName.trim() || !aiPrompt.trim() || isLoading}
+                      disabled={!url.trim() || !extractRobotName.trim() || !aiPrompt.trim() || isLoading}
                      sx={{
                        bgcolor: '#ff00c3',
                        py: 1.4,
@@ -632,17 +633,6 @@ const RobotCreate: React.FC = () => {
                )}

                {generationMode === 'recorder' && (
-                <>
-                  <Box sx={{ width: '100%', maxWidth: 700, mb: 3 }}>
-                    <TextField
-                      placeholder="Example: https://www.ycombinator.com/companies/"
-                      variant="outlined"
-                      fullWidth
-                      value={url}
-                      onChange={(e) => setUrl(e.target.value)}
-                      label="Website URL"
-                    />
-                  </Box>
                  <Box sx={{ width: '100%', maxWidth: 700 }}>
                    <Button
                      variant="contained"
@@ -661,7 +651,6 @@ const RobotCreate: React.FC = () => {
                      {isLoading ? 'Starting...' : 'Start Recording'}
                    </Button>
                  </Box>
-                </>
                )}
              </Box>
          </Card>