feat: add server side crawl and search logic

2026-01-02 15:46:10 +05:30
parent 3689eb96bd
commit 9898dc410d
15 changed files with 1577 additions and 124 deletions
--- a/maxun-core/src/interpret.ts
+++ b/maxun-core/src/interpret.ts
@@ -79,7 +79,9 @@ export default class Interpreter extends EventEmitter {

  private serializableDataByType: Record<string, Record<string, any>> = {
    scrapeList: {},
-    scrapeSchema: {}
+    scrapeSchema: {},
+    crawl: {},
+    search: {}
  };

  private scrapeListCounter: number = 0;
@@ -565,7 +567,9 @@ export default class Interpreter extends EventEmitter {

        await this.options.serializableCallback({
          scrapeList: this.serializableDataByType.scrapeList,
-          scrapeSchema: this.serializableDataByType.scrapeSchema
+          scrapeSchema: this.serializableDataByType.scrapeSchema,
+          crawl: this.serializableDataByType.crawl || {},
+          search: this.serializableDataByType.search || {}
        });
      },

@@ -703,6 +707,750 @@ export default class Interpreter extends EventEmitter {
        }
      },

+      crawl: async (crawlConfig: {
+        mode: 'domain' | 'subdomain' | 'path';
+        limit: number;
+        maxDepth: number;
+        includePaths: string[];
+        excludePaths: string[];
+        useSitemap: boolean;
+        followLinks: boolean;
+        respectRobots: boolean;
+      }) => {
+        if (this.isAborted) {
+          this.log('Workflow aborted, stopping crawl', Level.WARN);
+          return;
+        }
+
+
+        if (this.options.debugChannel?.setActionType) {
+          this.options.debugChannel.setActionType('crawl');
+        }
+
+        this.log('Starting crawl operation', Level.LOG);
+
+        try {
+          const currentUrl = page.url();
+          this.log(`Current page URL: ${currentUrl}`, Level.LOG);
+
+          if (!currentUrl || currentUrl === 'about:blank' || currentUrl === '') {
+            this.log('Page not yet navigated, waiting for navigation...', Level.WARN);
+            await page.waitForLoadState('load', { timeout: 10000 }).catch(() => {});
+          }
+
+          const baseUrl = page.url();
+          this.log(`Using base URL for crawl: ${baseUrl}`, Level.LOG);
+
+          const parsedBase = new URL(baseUrl);
+          const baseDomain = parsedBase.hostname;
+
+          let discoveredUrls: string[] = [];
+
+          if (crawlConfig.useSitemap) {
+            this.log('Fetching sitemap URLs...', Level.LOG);
+            try {
+              const sitemapUrl = `${parsedBase.protocol}//${parsedBase.host}/sitemap.xml`;
+
+              const sitemapUrls = await page.evaluate((url) => {
+                return new Promise<string[]>((resolve) => {
+                  const xhr = new XMLHttpRequest();
+                  xhr.open('GET', url, true);
+                  xhr.onload = function() {
+                    if (xhr.status === 200) {
+                      const text = xhr.responseText;
+                      const locMatches = text.match(/<loc>(.*?)<\/loc>/g) || [];
+                      const urls = locMatches.map(match => match.replace(/<\/?loc>/g, ''));
+                      resolve(urls);
+                    } else {
+                      resolve([]);
+                    }
+                  };
+                  xhr.onerror = function() {
+                    resolve([]);
+                  };
+                  xhr.send();
+                });
+              }, sitemapUrl);
+
+              if (sitemapUrls.length > 0) {
+                const nestedSitemaps = sitemapUrls.filter(url =>
+                  url.endsWith('/sitemap') || url.endsWith('sitemap.xml') || url.includes('/sitemap/')
+                );
+                const regularUrls = sitemapUrls.filter(url =>
+                  !url.endsWith('/sitemap') && !url.endsWith('sitemap.xml') && !url.includes('/sitemap/')
+                );
+
+                discoveredUrls.push(...regularUrls);
+                this.log(`Found ${regularUrls.length} regular URLs from main sitemap`, Level.LOG);
+
+                for (const nestedUrl of nestedSitemaps.slice(0, 10)) {
+                  try {
+                    this.log(`Fetching nested sitemap: ${nestedUrl}`, Level.LOG);
+                    const nestedUrls = await page.evaluate((url) => {
+                      return new Promise<string[]>((resolve) => {
+                        const xhr = new XMLHttpRequest();
+                        xhr.open('GET', url, true);
+                        xhr.onload = function() {
+                          if (xhr.status === 200) {
+                            const text = xhr.responseText;
+                            const locMatches = text.match(/<loc>(.*?)<\/loc>/g) || [];
+                            const urls = locMatches.map(match => match.replace(/<\/?loc>/g, ''));
+                            resolve(urls);
+                          } else {
+                            resolve([]);
+                          }
+                        };
+                        xhr.onerror = function() {
+                          resolve([]);
+                        };
+                        xhr.send();
+                      });
+                    }, nestedUrl);
+
+                    if (nestedUrls.length > 0) {
+                      discoveredUrls.push(...nestedUrls);
+                      this.log(`Found ${nestedUrls.length} URLs from nested sitemap ${nestedUrl}`, Level.LOG);
+                    }
+                  } catch (error) {
+                    this.log(`Failed to fetch nested sitemap ${nestedUrl}: ${error.message}`, Level.WARN);
+                  }
+                }
+
+                this.log(`Total URLs from all sitemaps: ${discoveredUrls.length}`, Level.LOG);
+              } else {
+                this.log('No URLs found in sitemap or sitemap not available', Level.WARN);
+              }
+            } catch (error) {
+              this.log(`Sitemap fetch failed: ${error.message}`, Level.WARN);
+            }
+          }
+
+          if (crawlConfig.followLinks) {
+            this.log('Extracting links from current page...', Level.LOG);
+            try {
+              await page.waitForLoadState('load', { timeout: 15000 }).catch(() => {});
+
+              await page.waitForLoadState('networkidle', { timeout: 10000 }).catch(() => {
+                this.log('Network did not become idle, continuing anyway', Level.WARN);
+              });
+
+              await new Promise(resolve => setTimeout(resolve, 5000));
+
+              const anchorCount = await page.evaluate(() => {
+                return document.querySelectorAll('a').length;
+              });
+              this.log(`Page has ${anchorCount} total anchor tags`, Level.LOG);
+
+              const pageLinks = await page.evaluate(() => {
+                const links: string[] = [];
+                const allAnchors = document.querySelectorAll('a');
+                console.log('Total anchors found:', allAnchors.length);
+
+                for (let i = 0; i < allAnchors.length; i++) {
+                  const anchor = allAnchors[i] as HTMLAnchorElement;
+                  const href = anchor.getAttribute('href');
+                  const fullHref = anchor.href;
+
+                  if (fullHref && (fullHref.startsWith('http://') || fullHref.startsWith('https://'))) {
+                    links.push(fullHref);
+                  }
+                }
+
+                console.log('Links extracted:', links.length);
+                return links;
+              });
+
+              discoveredUrls.push(...pageLinks);
+              this.log(`Found ${pageLinks.length} links from page`, Level.LOG);
+            } catch (error) {
+              this.log(`Link extraction failed: ${error.message}`, Level.WARN);
+            }
+          }
+
+          const filteredUrls = discoveredUrls.filter(url => {
+            try {
+              const urlObj = new URL(url);
+
+              if (crawlConfig.mode === 'domain') {
+                if (urlObj.hostname !== baseDomain) return false;
+              } else if (crawlConfig.mode === 'subdomain') {
+                if (!urlObj.hostname.endsWith(baseDomain) && urlObj.hostname !== baseDomain) return false;
+              } else if (crawlConfig.mode === 'path') {
+                if (urlObj.hostname !== baseDomain || !urlObj.pathname.startsWith(parsedBase.pathname)) return false;
+              }
+
+              if (crawlConfig.includePaths && crawlConfig.includePaths.length > 0) {
+                const matches = crawlConfig.includePaths.some(pattern => {
+                  const regex = new RegExp(pattern);
+                  return regex.test(url);
+                });
+                if (!matches) return false;
+              }
+
+              if (crawlConfig.excludePaths && crawlConfig.excludePaths.length > 0) {
+                const matches = crawlConfig.excludePaths.some(pattern => {
+                  const regex = new RegExp(pattern);
+                  return regex.test(url);
+                });
+                if (matches) return false;
+              }
+
+              return true;
+            } catch (error) {
+              return false;
+            }
+          });
+
+          const uniqueUrls = Array.from(new Set(filteredUrls.map(url => {
+            return url.replace(/#.*$/, '').replace(/\/$/, '');
+          })));
+
+          const basePathname = parsedBase.pathname;
+          const prioritizedUrls = uniqueUrls.sort((a, b) => {
+            try {
+              const aUrl = new URL(a);
+              const bUrl = new URL(b);
+              const aMatchesBase = aUrl.pathname.startsWith(basePathname);
+              const bMatchesBase = bUrl.pathname.startsWith(basePathname);
+
+              if (aMatchesBase && !bMatchesBase) return -1;
+              if (!aMatchesBase && bMatchesBase) return 1;
+
+              return 0;
+            } catch (error) {
+              return 0;
+            }
+          });
+
+          const finalUrls = prioritizedUrls.slice(0, crawlConfig.limit);
+
+          this.log(`Crawl discovered ${finalUrls.length} URLs (from ${discoveredUrls.length} total)`, Level.LOG);
+
+          this.log(`Starting to scrape content from ${finalUrls.length} discovered URLs...`, Level.LOG);
+          const crawlResults = [];
+
+          for (let i = 0; i < finalUrls.length; i++) {
+            const url = finalUrls[i];
+            try {
+              this.log(`[${i + 1}/${finalUrls.length}] Scraping: ${url}`, Level.LOG);
+
+              await page.goto(url, {
+                waitUntil: 'domcontentloaded',
+                timeout: 30000
+              }).catch(() => {
+                this.log(`Failed to navigate to ${url}, skipping...`, Level.WARN);
+              });
+
+              await page.waitForLoadState('load', { timeout: 10000 }).catch(() => {});
+
+              const pageData = await page.evaluate(() => {
+                const getMeta = (name: string) => {
+                  const meta = document.querySelector(`meta[name="${name}"], meta[property="${name}"]`);
+                  return meta?.getAttribute('content') || '';
+                };
+
+                const getAllMeta = () => {
+                  const metadata: Record<string, string> = {};
+                  const metaTags = document.querySelectorAll('meta');
+                  metaTags.forEach(tag => {
+                    const name = tag.getAttribute('name') || tag.getAttribute('property');
+                    const content = tag.getAttribute('content');
+                    if (name && content) {
+                      metadata[name] = content;
+                    }
+                  });
+                  return metadata;
+                };
+
+                const title = document.title || '';
+                const bodyText = document.body?.innerText || '';
+
+                const elementsWithMxId = document.querySelectorAll('[data-mx-id]');
+                elementsWithMxId.forEach(el => el.removeAttribute('data-mx-id'));
+
+                const html = document.documentElement.outerHTML;
+                const links = Array.from(document.querySelectorAll('a')).map(a => a.href);
+                const allMetadata = getAllMeta();
+
+                return {
+                  title,
+                  description: getMeta('description'),
+                  text: bodyText,
+                  html: html,
+                  links: links,
+                  wordCount: bodyText.split(/\s+/).filter(w => w.length > 0).length,
+                  metadata: {
+                    ...allMetadata,
+                    title,
+                    language: document.documentElement.lang || '',
+                    favicon: (document.querySelector('link[rel="icon"], link[rel="shortcut icon"]') as HTMLLinkElement)?.href || '',
+                    statusCode: 200
+                  }
+                };
+              });
+
+              crawlResults.push({
+                metadata: {
+                  ...pageData.metadata,
+                  url: url,
+                  sourceURL: url
+                },
+                html: pageData.html,
+                text: pageData.text,
+                links: pageData.links,
+                wordCount: pageData.wordCount,
+                scrapedAt: new Date().toISOString()
+              });
+
+              this.log(`✓ Scraped ${url} (${pageData.wordCount} words)`, Level.LOG);
+
+            } catch (error) {
+              this.log(`Failed to scrape ${url}: ${error.message}`, Level.WARN);
+              crawlResults.push({
+                url: url,
+                error: error.message,
+                scrapedAt: new Date().toISOString()
+              });
+            }
+          }
+
+          this.log(`Successfully scraped ${crawlResults.length} pages`, Level.LOG);
+
+          const actionType = "crawl";
+          const actionName = "Crawl Results";
+
+          if (!this.serializableDataByType[actionType]) {
+            this.serializableDataByType[actionType] = {};
+          }
+          if (!this.serializableDataByType[actionType][actionName]) {
+            this.serializableDataByType[actionType][actionName] = [];
+          }
+
+          this.serializableDataByType[actionType][actionName] = crawlResults;
+
+          await this.options.serializableCallback({
+            scrapeList: this.serializableDataByType.scrapeList || {},
+            scrapeSchema: this.serializableDataByType.scrapeSchema || {},
+            crawl: this.serializableDataByType.crawl || {},
+            search: this.serializableDataByType.search || {}
+          });
+
+        } catch (error) {
+          this.log(`Crawl action failed: ${error.message}`, Level.ERROR);
+          throw new Error(`Crawl execution error: ${error.message}`);
+        }
+      },
+
+      search: async (searchConfig: {
+        query: string;
+        limit: number;
+        provider?: 'duckduckgo';
+        filters?: {
+          timeRange?: 'day' | 'week' | 'month' | 'year';
+          location?: string;
+          lang?: string;
+        };
+        mode: 'discover' | 'scrape';
+      }) => {
+        if (this.isAborted) {
+          this.log('Workflow aborted, stopping search', Level.WARN);
+          return;
+        }
+
+        if (this.options.debugChannel?.setActionType) {
+          this.options.debugChannel.setActionType('search');
+        }
+
+        searchConfig.provider = 'duckduckgo';
+
+        this.log(`Performing DuckDuckGo search for: ${searchConfig.query}`, Level.LOG);
+
+        try {
+          let searchUrl = `https://duckduckgo.com/?q=${encodeURIComponent(searchConfig.query)}`;
+
+          if (searchConfig.filters?.timeRange) {
+            const timeMap: Record<string, string> = {
+              'day': 'd',
+              'week': 'w',
+              'month': 'm',
+              'year': 'y'
+            };
+            searchUrl += `&df=${timeMap[searchConfig.filters.timeRange]}`;
+          }
+
+          const initialDelay = 500 + Math.random() * 1000;
+          await new Promise(resolve => setTimeout(resolve, initialDelay));
+
+          await page.goto(searchUrl, { waitUntil: 'domcontentloaded', timeout: 30000 });
+
+          await page.waitForLoadState('load', { timeout: 10000 }).catch(() => {
+            this.log('Load state timeout, continuing anyway', Level.WARN);
+          });
+
+          const pageLoadDelay = 2000 + Math.random() * 1500;
+          await new Promise(resolve => setTimeout(resolve, pageLoadDelay));
+
+          let searchResults: any[] = [];
+          let retryCount = 0;
+          const maxRetries = 2;
+
+          while (searchResults.length === 0 && retryCount <= maxRetries) {
+            if (retryCount > 0) {
+              this.log(`Retry attempt ${retryCount}/${maxRetries} for DuckDuckGo search...`, Level.LOG);
+              const retryDelay = 1000 * Math.pow(2, retryCount) + Math.random() * 1000;
+              await new Promise(resolve => setTimeout(resolve, retryDelay));
+            }
+
+            this.log('Attempting to extract DuckDuckGo search results...', Level.LOG);
+
+            await page.waitForSelector('[data-testid="result"], .result', { timeout: 5000 }).catch(() => {
+              this.log('DuckDuckGo results not found on initial wait', Level.WARN);
+            });
+
+            let currentResultCount = 0;
+            const maxLoadAttempts = Math.ceil(searchConfig.limit / 10) * 2;
+            let loadAttempts = 0;
+            let noNewResultsCount = 0;
+
+            while (currentResultCount < searchConfig.limit && loadAttempts < maxLoadAttempts && noNewResultsCount < 3) {
+              const previousCount = currentResultCount;
+
+              currentResultCount = await page.evaluate(() => {
+                const selectors = [
+                  '[data-testid="result"]',
+                  'article[data-testid="result"]',
+                  'li[data-layout="organic"]',
+                  '.result',
+                  'article[data-testid]'
+                ];
+
+                for (const selector of selectors) {
+                  const elements = document.querySelectorAll(selector);
+                  if (elements.length > 0) {
+                    return elements.length;
+                  }
+                }
+                return 0;
+              });
+
+              if (currentResultCount >= searchConfig.limit) {
+                this.log(`Reached desired result count: ${currentResultCount}`, Level.LOG);
+                break;
+              }
+
+              if (currentResultCount === previousCount) {
+                noNewResultsCount++;
+                this.log(`No new results after load more (attempt ${noNewResultsCount}/3)`, Level.WARN);
+                if (noNewResultsCount >= 3) break;
+              } else {
+                noNewResultsCount = 0;
+                this.log(`Current results count: ${currentResultCount}/${searchConfig.limit}`, Level.LOG);
+              }
+
+              await page.evaluate(() => {
+                window.scrollTo(0, document.body.scrollHeight);
+              });
+
+              await new Promise(resolve => setTimeout(resolve, 800));
+
+              const loadMoreClicked = await page.evaluate(() => {
+                const selectors = [
+                  '#more-results',
+                  'button:has-text("More results")',
+                  'button:has-text("more results")',
+                  'button[id*="more"]',
+                  'button:has-text("Load more")'
+                ];
+
+                for (const selector of selectors) {
+                  try {
+                    const button = document.querySelector(selector) as HTMLButtonElement;
+                    if (button && button.offsetParent !== null) {
+                      button.click();
+                      console.log(`Clicked load more button with selector: ${selector}`);
+                      return true;
+                    }
+                  } catch (e) {
+                    continue;
+                  }
+                }
+                return false;
+              });
+
+              if (loadMoreClicked) {
+                this.log('Clicked "More results" button', Level.LOG);
+                await new Promise(resolve => setTimeout(resolve, 1500 + Math.random() * 1000));
+              } else {
+                this.log('No "More results" button found, results may be limited', Level.WARN);
+                break;
+              }
+
+              loadAttempts++;
+            }
+
+            this.log(`Finished pagination. Total results available: ${currentResultCount}`, Level.LOG);
+
+            searchResults = await page.evaluate((limit: number) => {
+              const results: any[] = [];
+
+              const cleanDescription = (text: string): string => {
+                if (!text) return '';
+                let cleaned = text.replace(/^\d+\s+(second|minute|hour|day|week|month|year)s?\s+ago\s*/i, '');
+                cleaned = cleaned.replace(/^[A-Z][a-z]{2}\s+\d{1,2},?\s+\d{4}\s*[—\-]\s*/i, '');
+                cleaned = cleaned.replace(/^\d{4}-\d{2}-\d{2}\s*[—\-]\s*/i, '');
+                cleaned = cleaned.trim().replace(/\s+/g, ' ');
+                return cleaned;
+              };
+
+              const selectors = [
+                '[data-testid="result"]',
+                'article[data-testid="result"]',
+                'li[data-layout="organic"]',
+                '.result',
+                'article[data-testid]'
+              ];
+              let allElements: Element[] = [];
+
+              for (const selector of selectors) {
+                const elements = Array.from(document.querySelectorAll(selector));
+                if (elements.length > 0) {
+                  console.log(`Found ${elements.length} DDG elements with: ${selector}`);
+                  allElements = elements;
+                  break;
+                }
+              }
+
+              for (let i = 0; i < Math.min(allElements.length, limit); i++) {
+                const element = allElements[i];
+
+                const titleEl = element.querySelector('h2, [data-testid="result-title-a"], h3, [data-testid="result-title"]');
+
+                let linkEl = titleEl?.querySelector('a[href]') as HTMLAnchorElement;
+                if (!linkEl) {
+                  linkEl = element.querySelector('a[href]') as HTMLAnchorElement;
+                }
+
+                if (!linkEl || !linkEl.href) continue;
+
+                let actualUrl = linkEl.href;
+
+                if (actualUrl.includes('uddg=')) {
+                  try {
+                    const urlParams = new URLSearchParams(actualUrl.split('?')[1]);
+                    const uddgUrl = urlParams.get('uddg');
+                    if (uddgUrl) {
+                      actualUrl = decodeURIComponent(uddgUrl);
+                    }
+                  } catch (e) {
+                    console.log('Failed to parse uddg parameter:', e);
+                  }
+                }
+
+                if (actualUrl.includes('duckduckgo.com')) {
+                  console.log(`Skipping DDG internal URL: ${actualUrl}`);
+                  continue;
+                }
+
+                const descEl = element.querySelector('[data-result="snippet"], .result__snippet, [data-testid="result-snippet"]');
+
+                if (titleEl && titleEl.textContent && actualUrl) {
+                  const rawDescription = (descEl?.textContent || '').trim();
+                  const cleanedDescription = cleanDescription(rawDescription);
+
+                  results.push({
+                    url: actualUrl,
+                    title: titleEl.textContent.trim(),
+                    description: cleanedDescription,
+                    position: results.length + 1
+                  });
+                }
+              }
+
+              console.log(`Extracted ${results.length} DuckDuckGo search results`);
+              return results;
+            }, searchConfig.limit);
+
+            if (searchResults.length === 0) {
+              this.log(`No DuckDuckGo results found (attempt ${retryCount + 1}/${maxRetries + 1})`, Level.WARN);
+              retryCount++;
+            } else {
+              this.log(`Successfully extracted ${searchResults.length} results`, Level.LOG);
+              break;
+            }
+          }
+
+          this.log(`Search found ${searchResults.length} results`, Level.LOG);
+
+          if (searchConfig.mode === 'discover') {
+            const actionType = "search";
+            const actionName = "Search Results";
+
+            if (!this.serializableDataByType[actionType]) {
+              this.serializableDataByType[actionType] = {};
+            }
+            if (!this.serializableDataByType[actionType][actionName]) {
+              this.serializableDataByType[actionType][actionName] = {};
+            }
+
+            const searchData = {
+              query: searchConfig.query,
+              provider: searchConfig.provider,
+              filters: searchConfig.filters || {},
+              resultsCount: searchResults.length,
+              results: searchResults,
+              searchedAt: new Date().toISOString()
+            };
+
+            this.serializableDataByType[actionType][actionName] = searchData;
+
+            await this.options.serializableCallback({
+              scrapeList: this.serializableDataByType.scrapeList || {},
+              scrapeSchema: this.serializableDataByType.scrapeSchema || {},
+              crawl: this.serializableDataByType.crawl || {},
+              search: this.serializableDataByType.search || {}
+            });
+
+            this.log(`Search completed in discover mode with ${searchResults.length} results`, Level.LOG);
+            return;
+          }
+
+          this.log(`Starting to scrape content from ${searchResults.length} search results...`, Level.LOG);
+          const scrapedResults = [];
+
+          for (let i = 0; i < searchResults.length; i++) {
+            const result = searchResults[i];
+            try {
+              this.log(`[${i + 1}/${searchResults.length}] Scraping: ${result.url}`, Level.LOG);
+
+              await page.goto(result.url, {
+                waitUntil: 'domcontentloaded',
+                timeout: 30000
+              }).catch(() => {
+                this.log(`Failed to navigate to ${result.url}, skipping...`, Level.WARN);
+              });
+
+              await page.waitForLoadState('load', { timeout: 10000 }).catch(() => {});
+
+              const pageData = await page.evaluate(() => {
+                const getMeta = (name: string) => {
+                  const meta = document.querySelector(`meta[name="${name}"], meta[property="${name}"]`);
+                  return meta?.getAttribute('content') || '';
+                };
+
+                const getAllMeta = () => {
+                  const metadata: Record<string, string> = {};
+                  const metaTags = document.querySelectorAll('meta');
+                  metaTags.forEach(tag => {
+                    const name = tag.getAttribute('name') || tag.getAttribute('property');
+                    const content = tag.getAttribute('content');
+                    if (name && content) {
+                      metadata[name] = content;
+                    }
+                  });
+                  return metadata;
+                };
+
+                const title = document.title || '';
+                const bodyText = document.body?.innerText || '';
+
+                const elementsWithMxId = document.querySelectorAll('[data-mx-id]');
+                elementsWithMxId.forEach(el => el.removeAttribute('data-mx-id'));
+
+                const html = document.documentElement.outerHTML;
+                const links = Array.from(document.querySelectorAll('a')).map(a => a.href);
+                const allMetadata = getAllMeta();
+
+                return {
+                  title,
+                  description: getMeta('description'),
+                  text: bodyText,
+                  html: html,
+                  links: links,
+                  wordCount: bodyText.split(/\s+/).filter(w => w.length > 0).length,
+                  metadata: {
+                    ...allMetadata,
+                    title,
+                    language: document.documentElement.lang || '',
+                    favicon: (document.querySelector('link[rel="icon"], link[rel="shortcut icon"]') as HTMLLinkElement)?.href || '',
+                    statusCode: 200
+                  }
+                };
+              });
+
+              scrapedResults.push({
+                searchResult: {
+                  query: searchConfig.query,
+                  position: result.position,
+                  searchTitle: result.title,
+                  searchDescription: result.description,
+                },
+                metadata: {
+                  ...pageData.metadata,
+                  url: result.url,
+                  sourceURL: result.url
+                },
+                html: pageData.html,
+                text: pageData.text,
+                links: pageData.links,
+                wordCount: pageData.wordCount,
+                scrapedAt: new Date().toISOString()
+              });
+
+              this.log(`✓ Scraped ${result.url} (${pageData.wordCount} words)`, Level.LOG);
+
+            } catch (error) {
+              this.log(`Failed to scrape ${result.url}: ${error.message}`, Level.WARN);
+              scrapedResults.push({
+                searchResult: {
+                  query: searchConfig.query,
+                  position: result.position,
+                  searchTitle: result.title,
+                  searchDescription: result.description,
+                },
+                url: result.url,
+                error: error.message,
+                scrapedAt: new Date().toISOString()
+              });
+            }
+          }
+
+          this.log(`Successfully scraped ${scrapedResults.length} search results`, Level.LOG);
+
+          const actionType = "search";
+          const actionName = "Search Results";
+
+          if (!this.serializableDataByType[actionType]) {
+            this.serializableDataByType[actionType] = {};
+          }
+          if (!this.serializableDataByType[actionType][actionName]) {
+            this.serializableDataByType[actionType][actionName] = {};
+          }
+
+          const searchData = {
+            query: searchConfig.query,
+            provider: searchConfig.provider,
+            filters: searchConfig.filters || {},
+            mode: searchConfig.mode,
+            resultsCount: scrapedResults.length,
+            results: scrapedResults,
+            searchedAt: new Date().toISOString()
+          };
+
+          this.serializableDataByType[actionType][actionName] = searchData;
+
+          await this.options.serializableCallback({
+            scrapeList: this.serializableDataByType.scrapeList || {},
+            scrapeSchema: this.serializableDataByType.scrapeSchema || {},
+            crawl: this.serializableDataByType.crawl || {},
+            search: this.serializableDataByType.search || {}
+          });
+
+        } catch (error) {
+          this.log(`Search action failed: ${error.message}`, Level.ERROR);
+          throw new Error(`Search execution error: ${error.message}`);
+        }
+      },
+
      flag: async () => new Promise((res) => {
        if (this.options.debugChannel?.setActionType) {
          this.options.debugChannel.setActionType('flag');
@@ -885,7 +1633,9 @@ export default class Interpreter extends EventEmitter {
        this.serializableDataByType[actionType][actionName] = [...allResults];
        await this.options.serializableCallback({
          scrapeList: this.serializableDataByType.scrapeList,
-          scrapeSchema: this.serializableDataByType.scrapeSchema
+          scrapeSchema: this.serializableDataByType.scrapeSchema,
+          crawl: this.serializableDataByType.crawl || {},
+          search: this.serializableDataByType.search || {}
        });
    };

@@ -1735,7 +2485,7 @@ export default class Interpreter extends EventEmitter {
      // Clear accumulated data to free memory
      this.cumulativeResults = [];
      this.namedResults = {};
-      this.serializableDataByType = { scrapeList: {}, scrapeSchema: {} };
+      this.serializableDataByType = { scrapeList: {}, scrapeSchema: {}, crawl: {}, search: {} };

      // Reset state
      this.isAborted = false;
--- a/maxun-core/src/types/workflow.ts
+++ b/maxun-core/src/types/workflow.ts
@@ -28,7 +28,7 @@ type MethodNames<T> = {
  [K in keyof T]: T[K] extends Function ? K : never;
 }[keyof T];

-export type CustomFunctions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag' | 'scrapeList' | 'scrapeListAuto';
+export type CustomFunctions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag' | 'scrapeList' | 'scrapeListAuto' | 'crawl' | 'search';

 export type What = {
  action: MethodNames<Page> | CustomFunctions,
--- a/server/src/api/record.ts
+++ b/server/src/api/record.ts
@@ -13,8 +13,8 @@ import { AuthenticatedRequest } from "../routes/record"
 import {capture} from "../utils/analytics";
 import { Page } from "playwright-core";
 import { WorkflowFile } from "maxun-core";
-import { addGoogleSheetUpdateTask, googleSheetUpdateTasks, processGoogleSheetUpdates } from "../workflow-management/integrations/gsheet";
-import { addAirtableUpdateTask, airtableUpdateTasks, processAirtableUpdates } from "../workflow-management/integrations/airtable";
+import { addGoogleSheetUpdateTask, processGoogleSheetUpdates } from "../workflow-management/integrations/gsheet";
+import { addAirtableUpdateTask, processAirtableUpdates } from "../workflow-management/integrations/airtable";
 import { sendWebhook } from "../routes/webhook";
 import { convertPageToHTML, convertPageToMarkdown, convertPageToScreenshot } from '../markdownify/scrape';

@@ -309,8 +309,8 @@ router.get("/robots/:id/runs",requireAPIKey, async (req: Request, res: Response)
            statusCode: 200,
            messageCode: "success",
            runs: {
-            totalCount: formattedRuns.length,
-            items: formattedRuns,
+                totalCount: formattedRuns.length,
+                items: formattedRuns,
            },
        };

@@ -342,6 +342,8 @@ function formatRunResponse(run: any) {
        data: {
            textData: {},
            listData: {},
+            crawlData: {},
+            searchData: {},
            markdown: '',
            html: ''
        },
@@ -358,6 +360,14 @@ function formatRunResponse(run: any) {
        formattedRun.data.listData = output.scrapeList;
    }

+    if (output.crawl && typeof output.crawl === 'object') {
+        formattedRun.data.crawlData = output.crawl;
+    }
+
+    if (output.search && typeof output.search === 'object') {
+        formattedRun.data.searchData = output.search;
+    }
+
    if (output.markdown && Array.isArray(output.markdown)) {
        formattedRun.data.markdown = output.markdown[0]?.content || '';
    }
@@ -466,7 +476,7 @@ router.get("/robots/:id/runs/:runId", requireAPIKey, async (req: Request, res: R
    }
 });

-async function createWorkflowAndStoreMetadata(id: string, userId: string) {
+async function createWorkflowAndStoreMetadata(id: string, userId: string, isSDK: boolean) {
    try {
        const recording = await Robot.findOne({
            where: {
@@ -510,7 +520,9 @@ async function createWorkflowAndStoreMetadata(id: string, userId: string) {
            interpreterSettings: { maxConcurrency: 1, maxRepeats: 1, debug: true },
            log: '',
            runId,
-            runByAPI: true,
+            runByUserId: userId,
+            runByAPI: !isSDK,
+            runBySDK: isSDK,
            serializableOutput: {},
            binaryOutput: {},
            retryCount: 0
@@ -687,7 +699,6 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[

            let formats = recording.recording_meta.formats || ['markdown'];

-            // Override if API request defines formats
            if (requestedFormats && Array.isArray(requestedFormats) && requestedFormats.length > 0) {
                formats = requestedFormats.filter((f): f is 'markdown' | 'html' | 'screenshot-visible' | 'screenshot-fullpage' =>
                    ['markdown', 'html', 'screenshot-visible', 'screenshot-fullpage'].includes(f)
@@ -714,50 +725,70 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
                const SCRAPE_TIMEOUT = 120000;

                if (formats.includes('markdown')) {
-                    const markdownPromise = convertPageToMarkdown(url, currentPage);
-                    const timeoutPromise = new Promise<never>((_, reject) => {
-                        setTimeout(() => reject(new Error(`Markdown conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT);
-                    });
-                    markdown = await Promise.race([markdownPromise, timeoutPromise]);
-                    serializableOutput.markdown = [{ content: markdown }];
+                    try {
+                        const markdownPromise = convertPageToMarkdown(url, currentPage);
+                        const timeoutPromise = new Promise<never>((_, reject) => {
+                            setTimeout(() => reject(new Error(`Markdown conversion timed out after ${SCRAPE_TIMEOUT / 1000}s`)), SCRAPE_TIMEOUT);
+                        });
+                        markdown = await Promise.race([markdownPromise, timeoutPromise]);
+                        if (markdown && markdown.trim().length > 0) {
+                            serializableOutput.markdown = [{ content: markdown }];
+                        }
+                    } catch (error: any) {
+                        logger.log('warn', `Markdown conversion failed for API run ${plainRun.runId}: ${error.message}`);
+                    }
                }

                if (formats.includes('html')) {
-                    const htmlPromise = convertPageToHTML(url, currentPage);
-                    const timeoutPromise = new Promise<never>((_, reject) => {
-                        setTimeout(() => reject(new Error(`HTML conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT);
-                    });
-                    html = await Promise.race([htmlPromise, timeoutPromise]);
-                    serializableOutput.html = [{ content: html }];
+                    try {
+                        const htmlPromise = convertPageToHTML(url, currentPage);
+                        const timeoutPromise = new Promise<never>((_, reject) => {
+                            setTimeout(() => reject(new Error(`HTML conversion timed out after ${SCRAPE_TIMEOUT / 1000}s`)), SCRAPE_TIMEOUT);
+                        });
+                        html = await Promise.race([htmlPromise, timeoutPromise]);
+                        if (html && html.trim().length > 0) {
+                            serializableOutput.html = [{ content: html }];
+                        }
+                    } catch (error: any) {
+                        logger.log('warn', `HTML conversion failed for API run ${plainRun.runId}: ${error.message}`);
+                    }
                }

                if (formats.includes("screenshot-visible")) {
-                    const screenshotPromise = convertPageToScreenshot(url, currentPage, false);
-                    const timeoutPromise = new Promise<never>((_, reject) => {
-                        setTimeout(() => reject(new Error(`Screenshot conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT);
-                    });
-                    const screenshotBuffer = await Promise.race([screenshotPromise, timeoutPromise]);
+                    try {
+                        const screenshotPromise = convertPageToScreenshot(url, currentPage, false);
+                        const timeoutPromise = new Promise<never>((_, reject) => {
+                            setTimeout(() => reject(new Error(`Screenshot conversion timed out after ${SCRAPE_TIMEOUT / 1000}s`)), SCRAPE_TIMEOUT);
+                        });
+                        const screenshotBuffer = await Promise.race([screenshotPromise, timeoutPromise]);

-                    if (!binaryOutput['screenshot-visible']) {
-                        binaryOutput['screenshot-visible'] = {
-                            data: screenshotBuffer.toString('base64'),
-                            mimeType: 'image/png'
-                        };
+                        if (screenshotBuffer && screenshotBuffer.length > 0) {
+                            binaryOutput['screenshot-visible'] = {
+                                data: screenshotBuffer.toString('base64'),
+                                mimeType: 'image/png'
+                            };
+                        }
+                    } catch (error: any) {
+                        logger.log('warn', `Screenshot-visible conversion failed for API run ${plainRun.runId}: ${error.message}`);
                    }
                }

                if (formats.includes("screenshot-fullpage")) {
-                    const screenshotPromise = convertPageToScreenshot(url, currentPage, true);
-                    const timeoutPromise = new Promise<never>((_, reject) => {
-                        setTimeout(() => reject(new Error(`Screenshot conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT);
-                    });
-                    const screenshotBuffer = await Promise.race([screenshotPromise, timeoutPromise]);
+                    try {
+                        const screenshotPromise = convertPageToScreenshot(url, currentPage, true);
+                        const timeoutPromise = new Promise<never>((_, reject) => {
+                            setTimeout(() => reject(new Error(`Screenshot conversion timed out after ${SCRAPE_TIMEOUT / 1000}s`)), SCRAPE_TIMEOUT);
+                        });
+                        const screenshotBuffer = await Promise.race([screenshotPromise, timeoutPromise]);

-                    if (!binaryOutput['screenshot-fullpage']) {
-                        binaryOutput['screenshot-fullpage'] = {
-                            data: screenshotBuffer.toString('base64'),
-                            mimeType: 'image/png'
-                        };
+                        if (screenshotBuffer && screenshotBuffer.length > 0) {
+                            binaryOutput['screenshot-fullpage'] = {
+                                data: screenshotBuffer.toString('base64'),
+                                mimeType: 'image/png'
+                            };
+                        }
+                    } catch (error: any) {
+                        logger.log('warn', `Screenshot-fullpage conversion failed for API run ${plainRun.runId}: ${error.message}`);
                    }
                }

@@ -769,7 +800,6 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
                    binaryOutput,
                });

-                // Upload binary output (screenshots) to MinIO if present
                let uploadedBinaryOutput: Record<string, string> = {};
                if (Object.keys(binaryOutput).length > 0) {
                    const binaryOutputService = new BinaryOutputService('maxun-run-screenshots');
@@ -779,7 +809,6 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[

                logger.log('info', `Markdown robot execution completed for API run ${id}`);

-                // Push success socket event
                try {
                    const completionData = {
                        runId: plainRun.runId,
@@ -800,7 +829,6 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
                    );
                }

-                // Build webhook payload
                const webhookPayload: any = {
                    robot_id: plainRun.robotMetaId,
                    run_id: plainRun.runId,
@@ -814,8 +842,8 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
                    },
                };

-                if (formats.includes('markdown')) webhookPayload.markdown = markdown;
-                if (formats.includes('html')) webhookPayload.html = html;
+                if (serializableOutput.markdown) webhookPayload.markdown = markdown;
+                if (serializableOutput.html) webhookPayload.html = html;
                if (uploadedBinaryOutput['screenshot-visible']) webhookPayload.screenshot_visible = uploadedBinaryOutput['screenshot-visible'];
                if (uploadedBinaryOutput['screenshot-fullpage']) webhookPayload.screenshot_fullpage = uploadedBinaryOutput['screenshot-fullpage'];

@@ -834,9 +862,12 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[

                capture("maxun-oss-run-created-api", {
                    runId: plainRun.runId,
-                    user_id: userId,
+                    userId: userId,
+                    robotId: recording.recording_meta.id,
+                    robotType: "scrape",
+                    source: "api",
                    status: "success",
-                    robot_type: "scrape",
+                    createdAt: new Date().toISOString(),
                    formats
                });

@@ -858,14 +889,14 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
                    log: `${formats.join(', ')} conversion failed: ${error.message}`,
                });

-                // Send failure socket event
                try {
                    const failureData = {
                        runId: plainRun.runId,
                        robotMetaId: plainRun.robotMetaId,
                        robotName: recording.recording_meta.name,
                        status: 'failed',
-                        finishedAt: new Date().toLocaleString()
+                        finishedAt: new Date().toLocaleString(),
+                        error: error.message
                    };

                    serverIo
@@ -895,11 +926,14 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
                    logger.log('warn', `Failed to send webhook for failed API scrape run ${plainRun.runId}: ${webhookError.message}`);
                }

-                capture("maxun-oss-run-created-api", {
+                capture("maxun-oss-run-created", {
                    runId: plainRun.runId,
-                    user_id: userId,
+                    userId: userId,
+                    robotId: recording.recording_meta.id,
+                    robotType: "scrape",
+                    source: "api",
                    status: "failed",
-                    robot_type: "scrape",
+                    createdAt: new Date().toISOString(),
                    formats
                });

@@ -993,15 +1027,18 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
        
        const totalRowsExtracted = totalSchemaItemsExtracted + totalListItemsExtracted;

-        capture('maxun-oss-run-created-api',{
+        capture('maxun-oss-run-created',{
                runId: id,
-                created_at: new Date().toISOString(),
+                userId: userId,
+                robotId: recording.recording_meta.id,
+                robotType: recording.recording_meta.type || 'extract',
+                source: 'api',
+                createdAt: new Date().toISOString(),
                status: 'success',
-                totalRowsExtracted,
-                schemaItemsExtracted: totalSchemaItemsExtracted,
-                listItemsExtracted: totalListItemsExtracted,
+                totalSchemaItemsExtracted,
+                totalListItemsExtracted,
                extractedScreenshotsCount,
-                is_llm: (recording.recording_meta as any).isLLM,
+                totalRowsExtracted
            }
        )

@@ -1019,6 +1056,16 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
            typeof parsedOutput.scrapeSchema === "string"
                ? JSON.parse(parsedOutput.scrapeSchema)
                : parsedOutput.scrapeSchema || {};
+                
+        const parsedCrawl =
+            typeof parsedOutput.crawl === "string"
+                ? JSON.parse(parsedOutput.crawl)
+                : parsedOutput.crawl || {};
+
+        const parsedSearch =
+            typeof parsedOutput.search === "string"
+                ? JSON.parse(parsedOutput.search)
+                : parsedOutput.search || {};

        const webhookPayload = {
            robot_id: plainRun.robotMetaId,
@@ -1030,6 +1077,8 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
            extracted_data: {
                captured_texts: parsedSchema || {},
                captured_lists: parsedList || {},
+                crawl_data: parsedCrawl || {},
+                search_data: parsedSearch || {},
                captured_texts_count: totalSchemaItemsExtracted,
                captured_lists_count: totalListItemsExtracted,
                screenshots_count: extractedScreenshotsCount
@@ -1097,7 +1146,6 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[

            const recording = await Robot.findOne({ where: { 'recording_meta.id': run.robotMetaId }, raw: true });

-            // Trigger webhooks for run failure
            const failedWebhookPayload = {
                robot_id: run.robotMetaId,
                run_id: run.runId,
@@ -1123,10 +1171,14 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
                logger.log('error', `Failed to send failure webhooks for run ${run.runId}: ${webhookError.message}`);
            }
            capture(
-               'maxun-oss-run-created-api',
+               'maxun-oss-run-created',
               {
                    runId: id,
-                    created_at: new Date().toISOString(),
+                    userId: userId,
+                    robotId: recording?.recording_meta?.id || run.robotMetaId,
+                    robotType: recording?.recording_meta?.type || 'extract',
+                    source: 'api',
+                    createdAt: new Date().toISOString(),
                    status: 'failed',
                    is_llm: (recording?.recording_meta as any)?.isLLM,
                }
@@ -1139,11 +1191,11 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
    }
 }

-export async function handleRunRecording(id: string, userId: string, requestedFormats?: string[]) {
+export async function handleRunRecording(id: string, userId: string, isSDK: boolean = false) {
    let socket: Socket | null = null;

    try {
-        const result = await createWorkflowAndStoreMetadata(id, userId);
+        const result = await createWorkflowAndStoreMetadata(id, userId, isSDK);
        const { browserId, runId: newRunId } = result;

        if (!browserId || !newRunId || !userId) {
@@ -1167,6 +1219,10 @@ export async function handleRunRecording(id: string, userId: string, requestedFo
            cleanupSocketConnection(socket!, browserId, newRunId);
        });

+        socket.on('error', (error: Error) => {
+            logger.error(`Socket error for API run ${newRunId}: ${error.message}`);
+        });
+
        socket.on('disconnect', () => {
            cleanupSocketConnection(socket!, browserId, newRunId);
        });
@@ -1318,9 +1374,7 @@ router.post("/robots/:id/runs", requireAPIKey, async (req: AuthenticatedRequest,
            return res.status(401).json({ ok: false, error: 'Unauthorized' });
        }

-        const requestedFormats = req.body.formats;
-
-        const runId = await handleRunRecording(req.params.id, req.user.id, requestedFormats);
+        const runId = await handleRunRecording(req.params.id, req.user.id);

        if (!runId) {
            throw new Error('Run ID is undefined');
--- a/server/src/api/sdk.ts
+++ b/server/src/api/sdk.ts
@@ -450,13 +450,35 @@ router.post("/sdk/robots/:id/execute", requireAPIKey, async (req: AuthenticatedR
            }
        }

+        let crawlData: any[] = [];
+        if (run.serializableOutput?.crawl) {
+            const crawl: any = run.serializableOutput.crawl;
+
+            if (Array.isArray(crawl)) {
+                crawlData = crawl;
+            }
+            else if (typeof crawl === 'object') {
+                const crawlValues = Object.values(crawl);
+                if (crawlValues.length > 0 && Array.isArray(crawlValues[0])) {
+                    crawlData = crawlValues[0] as any[];
+                }
+            }
+        }
+
+        let searchData: any = {};
+        if (run.serializableOutput?.search) {
+            searchData = run.serializableOutput.search;
+        }
+
        return res.status(200).json({
            data: {
                runId: run.runId,
                status: run.status,
                data: {
                    textData: run.serializableOutput?.scrapeSchema || {},
-                    listData: listData
+                    listData: listData,
+                    crawlData: crawlData,
+                    searchData: searchData
                },
                screenshots: Object.values(run.binaryOutput || {})
            }
@@ -640,6 +662,202 @@ router.post("/sdk/robots/:id/runs/:runId/abort", requireAPIKey, async (req: Auth
    }
 });

+/**
+ * Create a crawl robot programmatically
+ * POST /api/sdk/crawl
+ */
+router.post("/sdk/crawl", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
+    try {
+        const user = req.user;
+        const { url, name, crawlConfig } = req.body;
+
+        if (!url || !crawlConfig) {
+            return res.status(400).json({
+                error: "URL and crawl configuration are required"
+            });
+        }
+
+        try {
+            new URL(url);
+        } catch (err) {
+            return res.status(400).json({
+                error: "Invalid URL format"
+            });
+        }
+
+        if (typeof crawlConfig !== 'object') {
+            return res.status(400).json({
+                error: "crawlConfig must be an object"
+            });
+        }
+
+        const robotName = name || `Crawl Robot - ${new URL(url).hostname}`;
+        const robotId = uuid();
+        const metaId = uuid();
+
+        const robot = await Robot.create({
+            id: robotId,
+            userId: user.id,
+            recording_meta: {
+                name: robotName,
+                id: metaId,
+                createdAt: new Date().toISOString(),
+                updatedAt: new Date().toISOString(),
+                pairs: 1,
+                params: [],
+                type: 'crawl',
+                url: url,
+            },
+            recording: {
+                workflow: [
+                    {
+                        where: { url },
+                        what: [
+                            { action: 'flag', args: ['generated'] },
+                            {
+                                action: 'crawl',
+                                args: [crawlConfig],
+                                name: 'Crawl'
+                            }
+                        ]
+                    },
+                    {
+                        where: { url: 'about:blank' },
+                        what: [
+                            {
+                                action: 'goto',
+                                args: [url]
+                            },
+                            {
+                                action: 'waitForLoadState',
+                                args: ['networkidle']
+                            }
+                        ]
+                    }
+                ]
+            }
+        });
+
+        logger.info(`[SDK] Crawl robot created: ${metaId} (db: ${robotId}) by user ${user.id}`);
+
+        capture("maxun-oss-robot-created", {
+            userId: user.id.toString(),
+            robotId: metaId,
+            robotName: robotName,
+            url: url,
+            robotType: 'crawl',
+            crawlConfig: crawlConfig,
+            source: 'sdk'
+        });
+
+        return res.status(201).json({
+            data: robot,
+            message: "Crawl robot created successfully"
+        });
+
+    } catch (error: any) {
+        logger.error("[SDK] Error creating crawl robot:", error);
+        return res.status(500).json({
+            error: "Failed to create crawl robot",
+            message: error.message
+        });
+    }
+});
+
+/**
+ * Create a search robot programmatically
+ * POST /api/sdk/search
+ */
+router.post("/sdk/search", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
+    try {
+        const user = req.user;
+        const { name, searchConfig } = req.body;
+
+        if (!searchConfig) {
+            return res.status(400).json({
+                error: "Search configuration is required"
+            });
+        }
+
+        if (!searchConfig.query) {
+            return res.status(400).json({
+                error: "searchConfig must include a query"
+            });
+        }
+
+        if (typeof searchConfig !== 'object') {
+            return res.status(400).json({
+                error: "searchConfig must be an object"
+            });
+        }
+
+        if (searchConfig.mode && !['discover', 'scrape'].includes(searchConfig.mode)) {
+            return res.status(400).json({
+                error: "searchConfig.mode must be either 'discover' or 'scrape'"
+            });
+        }
+
+        searchConfig.provider = 'duckduckgo';
+
+        const robotName = name || `Search Robot - ${searchConfig.query}`;
+        const robotId = uuid();
+        const metaId = uuid();
+
+        const robot = await Robot.create({
+            id: robotId,
+            userId: user.id,
+            recording_meta: {
+                name: robotName,
+                id: metaId,
+                createdAt: new Date().toISOString(),
+                updatedAt: new Date().toISOString(),
+                pairs: 1,
+                params: [],
+                type: 'search',
+            },
+            recording: {
+                workflow: [
+                    {
+                        where: { url: 'about:blank' },
+                        what: [
+                            {
+                                action: 'search',
+                                args: [searchConfig],
+                                name: 'Search'
+                            }
+                        ]
+                    }
+                ]
+            }
+        });
+
+        logger.info(`[SDK] Search robot created: ${metaId} (db: ${robotId}) by user ${user.id}`);
+
+        capture("maxun-oss-robot-created", {
+            userId: user.id.toString(),
+            robotId: metaId,
+            robotName: robotName,
+            robotType: 'search',
+            searchQuery: searchConfig.query,
+            searchProvider: searchConfig.provider || 'duckduckgo',
+            searchLimit: searchConfig.limit || 10,
+            source: 'sdk'
+        });
+
+        return res.status(201).json({
+            data: robot,
+            message: "Search robot created successfully"
+        });
+
+    } catch (error: any) {
+        logger.error("[SDK] Error creating search robot:", error);
+        return res.status(500).json({
+            error: "Failed to create search robot",
+            message: error.message
+        });
+    }
+});
+
 /**
 * LLM-based extraction - generate workflow from natural language prompt
 * POST /api/sdk/extract/llm
--- a/server/src/models/Robot.ts
+++ b/server/src/models/Robot.ts
@@ -9,7 +9,7 @@ interface RobotMeta {
  pairs: number;
  updatedAt: string;
  params: any[];
-  type?: 'extract' | 'scrape';
+  type?: 'extract' | 'scrape' | 'crawl' | 'search';
  url?: string;
  formats?: ('markdown' | 'html' | 'screenshot-visible' | 'screenshot-fullpage')[];
  isLLM?: boolean;
--- a/server/src/models/Run.ts
+++ b/server/src/models/Run.ts
@@ -23,6 +23,7 @@ interface RunAttributes {
  runByUserId?: string;
  runByScheduleId?: string;
  runByAPI?: boolean;
+  runBySDK?: boolean;
  serializableOutput: Record<string, any>;
  binaryOutput: Record<string, string>;
  retryCount?: number;
--- a/server/src/pgboss-worker.ts
+++ b/server/src/pgboss-worker.ts
@@ -132,7 +132,6 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
  logger.log('info', `Processing run execution job for runId: ${data.runId}, browserId: ${data.browserId}`);
  
  try { 
-    // Find the run
    const run = await Run.findOne({ where: { runId: data.runId } });
    if (!run) {
      logger.log('error', `Run ${data.runId} not found in database`);
@@ -193,7 +192,6 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
    logger.log('info', `Browser ${browserId} found and ready for execution`);

    try {
-      // Find the recording
      const recording = await Robot.findOne({ where: { 'recording_meta.id': plainRun.robotMetaId }, raw: true });

      if (!recording) {
@@ -473,11 +471,12 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
        interpretationInfo.binaryOutput
      );

-      // Get the already persisted and credit-validated data from the run record
      const finalRun = await Run.findByPk(run.id);
      const categorizedOutput = {
        scrapeSchema: finalRun?.serializableOutput?.scrapeSchema || {},
-        scrapeList: finalRun?.serializableOutput?.scrapeList || {}
+        scrapeList: finalRun?.serializableOutput?.scrapeList || {},
+        crawl: finalRun?.serializableOutput?.crawl || {},
+        search: finalRun?.serializableOutput?.search || {}
      };
      
      if (await isRunAborted()) {
@@ -489,10 +488,6 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
        status: 'success',
        finishedAt: new Date().toLocaleString(),
        log: interpretationInfo.log.join('\n'),
-        serializableOutput: JSON.parse(JSON.stringify({
-          scrapeSchema: categorizedOutput.scrapeSchema || {},
-          scrapeList: categorizedOutput.scrapeList || {},
-        })),
        binaryOutput: uploadedBinaryOutput,
      });

@@ -572,6 +567,8 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
              }, {} as Record<string, any[]>)
            : {},
          captured_lists: categorizedOutput.scrapeList,
+          crawl_data: categorizedOutput.crawl,
+          search_data: categorizedOutput.search,
          captured_texts_count: totalSchemaItemsExtracted,
          captured_lists_count: totalListItemsExtracted,
          screenshots_count: extractedScreenshotsCount
--- a/server/src/routes/storage.ts
+++ b/server/src/routes/storage.ts
@@ -251,21 +251,18 @@ function handleWorkflowActions(workflow: any[], credentials: Credentials) {
 router.put('/recordings/:id', requireSignIn, async (req: AuthenticatedRequest, res) => {
  try {
    const { id } = req.params;
-  const { name, limits, credentials, targetUrl, workflow: incomingWorkflow } = req.body;
+    const { name, limits, credentials, targetUrl, workflow: incomingWorkflow } = req.body;

-    // Validate input
-    if (!name && !limits && !credentials && !targetUrl) {
+    if (!name && !limits && !credentials && !targetUrl && !incomingWorkflow) {
      return res.status(400).json({ error: 'Either "name", "limits", "credentials" or "target_url" must be provided.' });
    }

-    // Fetch the robot by ID
    const robot = await Robot.findOne({ where: { 'recording_meta.id': id } });
-
    if (!robot) {
      return res.status(404).json({ error: 'Robot not found.' });
    }

-    // Update fields if provided
+
    if (name) {
      robot.set('recording_meta', { ...robot.recording_meta, name });
    }
@@ -274,7 +271,6 @@ router.put('/recordings/:id', requireSignIn, async (req: AuthenticatedRequest, r
      robot.set('recording_meta', { ...robot.recording_meta, url: targetUrl });

      const updatedWorkflow = [...robot.recording.workflow];
-      let foundGoto = false;

      for (let i = updatedWorkflow.length - 1; i >= 0; i--) {
        const step = updatedWorkflow[i];
@@ -289,7 +285,6 @@ router.put('/recordings/:id', requireSignIn, async (req: AuthenticatedRequest, r

            robot.set('recording', { ...robot.recording, workflow: updatedWorkflow });
            robot.changed('recording', true);
-            foundGoto = true;
            i = -1;
            break;
          }
@@ -299,10 +294,9 @@ router.put('/recordings/:id', requireSignIn, async (req: AuthenticatedRequest, r

    await robot.save();

-    // Start with existing workflow or allow client to supply a full workflow replacement
    let workflow = incomingWorkflow && Array.isArray(incomingWorkflow)
      ? JSON.parse(JSON.stringify(incomingWorkflow))
-      : [...robot.recording.workflow]; // Create a copy of the workflow
+      : [...robot.recording.workflow];

    if (credentials) {
      workflow = handleWorkflowActions(workflow, credentials);
@@ -344,7 +338,7 @@ router.put('/recordings/:id', requireSignIn, async (req: AuthenticatedRequest, r
      where: { 'recording_meta.id': id }
    });

-    const updatedRobot = await Robot.findOne({ where: { 'recording_meta.id': id } });
+    await Robot.findOne({ where: { 'recording_meta.id': id } });

    logger.log('info', `Robot with ID ${id} was updated successfully.`);

@@ -1322,4 +1316,198 @@ export async function recoverOrphanedRuns() {
  }
 }

+/**
+ * POST endpoint for creating a crawl robot
+ * @route POST /recordings/crawl
+ * @auth requireSignIn - JWT authentication required
+ */
+router.post('/recordings/crawl', requireSignIn, async (req: AuthenticatedRequest, res) => {
+  try {
+    const { url, name, crawlConfig } = req.body;
+
+    if (!url || !crawlConfig) {
+      return res.status(400).json({ error: 'URL and crawl configuration are required.' });
+    }
+
+    if (!req.user) {
+      return res.status(401).send({ error: 'Unauthorized' });
+    }
+
+    try {
+      new URL(url);
+    } catch (err) {
+      return res.status(400).json({ error: 'Invalid URL format' });
+    }
+
+    const robotName = name || `Crawl Robot - ${new URL(url).hostname}`;
+    const currentTimestamp = new Date().toLocaleString('en-US');
+    const robotId = uuid();
+
+    const newRobot = await Robot.create({
+      id: uuid(),
+      userId: req.user.id,
+      recording_meta: {
+        name: robotName,
+        id: robotId,
+        createdAt: currentTimestamp,
+        updatedAt: currentTimestamp,
+        pairs: 1,
+        params: [],
+        type: 'crawl',
+        url: url,
+      },
+      recording: {
+        workflow: [
+          {
+            where: { url },
+            what: [
+              { action: 'flag', args: ['generated'] },
+              {
+                action: 'crawl',
+                args: [crawlConfig],
+                name: 'Crawl'
+              }
+            ]
+          },
+          {
+            where: { url: 'about:blank' },
+            what: [
+              {
+                action: 'goto',
+                args: [url]
+              },
+              {
+                action: 'waitForLoadState',
+                args: ['networkidle']
+              }
+            ]
+          }
+        ]
+      },
+      google_sheet_email: null,
+      google_sheet_name: null,
+      google_sheet_id: null,
+      google_access_token: null,
+      google_refresh_token: null,
+      airtable_base_id: null,
+      airtable_base_name: null,
+      airtable_table_name: null,
+      airtable_table_id: null,
+      airtable_access_token: null,
+      airtable_refresh_token: null,
+      schedule: null,
+      webhooks: null
+    });
+
+    logger.log('info', `Crawl robot created with id: ${newRobot.id}`);
+    capture('maxun-oss-robot-created', {
+      userId: req.user.id.toString(),
+      robotId: robotId,
+      robotName: robotName,
+      url: url,
+      robotType: 'crawl',
+      crawlConfig: crawlConfig
+    });
+
+    return res.status(201).json({
+      message: 'Crawl robot created successfully.',
+      robot: newRobot,
+    });
+  } catch (error) {
+    if (error instanceof Error) {
+      logger.log('error', `Error creating crawl robot: ${error.message}`);
+      return res.status(500).json({ error: error.message });
+    } else {
+      logger.log('error', 'Unknown error creating crawl robot');
+      return res.status(500).json({ error: 'An unknown error occurred.' });
+    }
+  }
+});
+
+/**
+ * POST endpoint for creating a search robot
+ * @route POST /recordings/search
+ * @auth requireSignIn - JWT authentication required
+ */
+router.post('/recordings/search', requireSignIn, async (req: AuthenticatedRequest, res) => {
+  try {
+    const { searchConfig, name } = req.body;
+
+    if (!searchConfig || !searchConfig.query) {
+      return res.status(400).json({ error: 'Search configuration with query is required.' });
+    }
+
+    if (!req.user) {
+      return res.status(401).send({ error: 'Unauthorized' });
+    }
+
+    const robotName = name || `Search Robot - ${searchConfig.query.substring(0, 50)}`;
+    const currentTimestamp = new Date().toLocaleString('en-US');
+    const robotId = uuid();
+
+    const newRobot = await Robot.create({
+      id: uuid(),
+      userId: req.user.id,
+      recording_meta: {
+        name: robotName,
+        id: robotId,
+        createdAt: currentTimestamp,
+        updatedAt: currentTimestamp,
+        pairs: 1,
+        params: [],
+        type: 'search',
+      },
+      recording: {
+        workflow: [
+          {
+            where: { url: 'about:blank' },
+            what: [{
+              action: 'search',
+              args: [searchConfig],
+              name: 'Search'
+            }]
+          }
+        ]
+      },
+      google_sheet_email: null,
+      google_sheet_name: null,
+      google_sheet_id: null,
+      google_access_token: null,
+      google_refresh_token: null,
+      airtable_base_id: null,
+      airtable_base_name: null,
+      airtable_table_name: null,
+      airtable_table_id: null,
+      airtable_access_token: null,
+      airtable_refresh_token: null,
+      schedule: null,
+      webhooks: null
+    });
+
+    logger.log('info', `Search robot created with id: ${newRobot.id}`);
+    capture('maxun-oss-robot-created', {
+      userId: req.user.id.toString(),
+      robotId: robotId,
+      robotName: robotName,
+      robotType: 'search',
+      searchQuery: searchConfig.query,
+      searchProvider: searchConfig.provider || 'duckduckgo',
+      searchLimit: searchConfig.limit || 10
+    });
+
+    return res.status(201).json({
+      message: 'Search robot created successfully.',
+      robot: newRobot,
+    });
+  } catch (error) {
+    if (error instanceof Error) {
+      logger.log('error', `Error creating search robot: ${error.message}`);
+      return res.status(500).json({ error: error.message });
+    } else {
+      logger.log('error', 'Unknown error creating search robot');
+      return res.status(500).json({ error: 'An unknown error occurred.' });
+    }
+  }
+});
+
 export { processQueuedRuns };
--- a/server/src/workflow-management/classes/Interpreter.ts
+++ b/server/src/workflow-management/classes/Interpreter.ts
@@ -16,7 +16,6 @@ function processWorkflow(workflow: WorkflowFile, checkLimit: boolean = false): W

  processedWorkflow.workflow.forEach((pair) => {
    pair.what.forEach((action) => {
-      // Handle limit validation for scrapeList action
      if (action.action === 'scrapeList' && checkLimit && Array.isArray(action.args) && action.args.length > 0) {
        const scrapeConfig = action.args[0];
        if (scrapeConfig && typeof scrapeConfig === 'object' && 'limit' in scrapeConfig) {
@@ -26,7 +25,6 @@ function processWorkflow(workflow: WorkflowFile, checkLimit: boolean = false): W
        }
      }

-      // Handle decryption for type and press actions
      if ((action.action === 'type' || action.action === 'press') && Array.isArray(action.args) && action.args.length > 1) {
        try {
          const encryptedValue = action.args[1];
@@ -93,10 +91,14 @@ export class WorkflowInterpreter {
  public serializableDataByType: {
    scrapeSchema: Record<string, any>;
    scrapeList: Record<string, any>;
+    crawl: Record<string, any>;
+    search: Record<string, any>;
    [key: string]: any;
  } = {
    scrapeSchema: {},
    scrapeList: {},
+    crawl: {},
+    search: {},
  };

  private currentActionName: string | null = null;
@@ -282,7 +284,6 @@ export class WorkflowInterpreter {
          }
        } else if (this.currentActionType === 'scrapeList') {
          if (data && Array.isArray(data) && data.length > 0) {
-            // Use the current index for persistence
            await this.persistDataToDatabase('scrapeList', data, this.currentScrapeListIndex);
          }

@@ -293,7 +294,6 @@ export class WorkflowInterpreter {
        } 
      },
      binaryCallback: async (data: string, mimetype: string) => {
-        // For editor mode, we don't have the name yet, so use a timestamp-based name
        const binaryItem = {
          name: `Screenshot ${Date.now()}`,
          mimeType: mimetype,
@@ -301,7 +301,6 @@ export class WorkflowInterpreter {
        };
        this.binaryData.push(binaryItem);

-        // Persist binary data to database
        await this.persistBinaryDataToDatabase(binaryItem);

        this.socket.emit('binaryCallback', {
@@ -340,7 +339,6 @@ export class WorkflowInterpreter {
  
    logger.log('debug', `Interpretation finished`);

-    // Flush any remaining data in persistence buffer before completing
    await this.flushPersistenceBuffer();

    this.interpreter = null;
@@ -419,6 +417,8 @@ export class WorkflowInterpreter {
    this.serializableDataByType = {
      scrapeSchema: {},
      scrapeList: {},
+      crawl: {},
+      search: {},
    };
    this.binaryData = [];
    this.currentScrapeListIndex = 0;
@@ -591,12 +591,20 @@ export class WorkflowInterpreter {
            typeKey = "scrapeList";
          } else if (this.currentActionType === "scrapeSchema") {
            typeKey = "scrapeSchema";
+          } else if (this.currentActionType === "crawl") {
+            typeKey = "crawl";
+          } else if (this.currentActionType === "search") {
+            typeKey = "search";
          }

          if (typeKey === "scrapeList" && data.scrapeList) {
            data = data.scrapeList;
          } else if (typeKey === "scrapeSchema" && data.scrapeSchema) {
            data = data.scrapeSchema;
+          } else if (typeKey === "crawl" && data.crawl) {
+            data = data.crawl;
+          } else if (typeKey === "search" && data.search) {
+            data = data.search;
          }

          let actionName = "";
@@ -609,38 +617,65 @@ export class WorkflowInterpreter {
              actionName = keys[keys.length - 1];
              data = data[actionName];
            }
+          } else if (typeKey === "crawl" && data && typeof data === "object" && !Array.isArray(data)) {
+            const keys = Object.keys(data);
+            if (keys.length === 1) {
+              actionName = keys[0];
+              data = data[actionName];
+            } else if (keys.length > 1) {
+              actionName = keys[keys.length - 1];
+              data = data[actionName];
+            }
+          } else if (typeKey === "search" && data && typeof data === "object" && !Array.isArray(data)) {
+            const keys = Object.keys(data);
+            if (keys.length === 1) {
+              actionName = keys[0];
+              data = data[actionName];
+            } else if (keys.length > 1) {
+              actionName = keys[keys.length - 1];
+              data = data[actionName];
+            }
          }

          if (!actionName) {
            actionName = this.currentActionName || "";
            if (typeKey === "scrapeList" && !actionName) {
              actionName = this.getUniqueActionName(typeKey, "");
+            } else if (typeKey === "crawl" && !actionName) {
+              actionName = this.getUniqueActionName(typeKey, "Crawl Results");
+            } else if (typeKey === "search" && !actionName) {
+              actionName = this.getUniqueActionName(typeKey, "Search Results");
            }
          }

-          const flattened = Array.isArray(data)
-            ? data
-            : (
-              data?.List ??
-              (data && typeof data === "object"
-                ? Object.values(data).flat?.() ?? data
-                : [])
-            );
+          let processedData;
+          if (typeKey === "search") {
+            processedData = data;
+          } else {
+            processedData = Array.isArray(data)
+              ? data
+              : (
+                data?.List ??
+                (data && typeof data === "object"
+                  ? Object.values(data).flat?.() ?? data
+                  : [])
+              );
+          }

          if (!this.serializableDataByType[typeKey]) {
            this.serializableDataByType[typeKey] = {};
          }

-          this.serializableDataByType[typeKey][actionName] = flattened;
+          this.serializableDataByType[typeKey][actionName] = processedData;

          await this.persistDataToDatabase(typeKey, {
-            [actionName]: flattened,
+            [actionName]: processedData,
          });

          this.socket.emit("serializableCallback", {
            type: typeKey,
            name: actionName,
-            data: flattened,
+            data: processedData,
          });
        } catch (err: any) {
          logger.log('error', `serializableCallback handler failed: ${err.message}`);
@@ -698,7 +733,6 @@ export class WorkflowInterpreter {

    await this.flushPersistenceBuffer();

-    // Structure the output to maintain separate data for each action type
    const result = {
      log: this.debugMessages,
      result: status,
@@ -794,7 +828,7 @@ export class WorkflowInterpreter {

        const currentSerializableOutput = run.serializableOutput ?
          JSON.parse(JSON.stringify(run.serializableOutput)) :
-          { scrapeSchema: [], scrapeList: [] };
+          { scrapeSchema: {}, scrapeList: {}, crawl: {}, search: {} };
        
        if (Array.isArray(currentSerializableOutput.scrapeList)) {
          currentSerializableOutput.scrapeList = {};
@@ -802,6 +836,9 @@ export class WorkflowInterpreter {
        if (Array.isArray(currentSerializableOutput.scrapeSchema)) {
          currentSerializableOutput.scrapeSchema = {};
        }
+        if (!currentSerializableOutput.search) {
+          currentSerializableOutput.search = {};
+        }

        let hasUpdates = false;

@@ -827,6 +864,18 @@ export class WorkflowInterpreter {
            }
            mergeLists(currentSerializableOutput.scrapeList, item.data);
            hasUpdates = true;
+          } else if (item.actionType === 'crawl') {
+            currentSerializableOutput.crawl = { 
+              ...(currentSerializableOutput.crawl || {}), 
+              ...item.data 
+            };
+            hasUpdates = true;
+          } else if (item.actionType === 'search') {
+            currentSerializableOutput.search = { 
+              ...(currentSerializableOutput.search || {}), 
+              ...item.data 
+            };
+            hasUpdates = true;
          }
        }

--- a/server/src/workflow-management/integrations/airtable.ts
+++ b/server/src/workflow-management/integrations/airtable.ts
@@ -13,7 +13,11 @@ interface AirtableUpdateTask {

 interface SerializableOutput {
  scrapeSchema?: Record<string, any[]>; 
-  scrapeList?: Record<string, any[]>;  
+  scrapeList?: Record<string, any[]>;
+  markdown?: Array<{ content: string }>;
+  html?: Array<{ content: string }>;
+  crawl?: Record<string, any[]>;
+  search?: any;
 }

 const MAX_RETRIES = 3;
@@ -67,6 +71,10 @@ function mergeRelatedData(serializableOutput: SerializableOutput, binaryOutput:
  const schemaData: Array<{ Group: string; Field: string; Value: any }> = [];
  const listData: any[] = [];
  const screenshotData: Array<{ key: string; url: string }> = [];
+  const markdownData: any[] = [];
+  const htmlData: any[] = [];
+  const crawlData: any[] = [];
+  const searchData: any[] = [];

  if (serializableOutput.scrapeSchema) {
    if (Array.isArray(serializableOutput.scrapeSchema)) {
@@ -122,6 +130,66 @@ function mergeRelatedData(serializableOutput: SerializableOutput, binaryOutput:
    }
  }

+  if (serializableOutput.markdown && Array.isArray(serializableOutput.markdown)) {
+    serializableOutput.markdown.forEach((item, index) => {
+      if (item.content) {
+        markdownData.push({
+          "Index": index + 1,
+          "Type": "Markdown",
+          "Content": item.content
+        });
+      }
+    });
+  }
+
+  if (serializableOutput.html && Array.isArray(serializableOutput.html)) {
+    serializableOutput.html.forEach((item, index) => {
+      if (item.content) {
+        htmlData.push({
+          "Index": index + 1,
+          "Type": "HTML",
+          "Content": item.content
+        });
+      }
+    });
+  }
+
+  if (serializableOutput.crawl && typeof serializableOutput.crawl === "object") {
+    for (const [crawlName, crawlArray] of Object.entries(serializableOutput.crawl)) {
+      if (Array.isArray(crawlArray)) {
+        crawlArray.forEach((crawlItem) => {
+          const hasContent = Object.values(crawlItem || {}).some(
+            (value) => value !== null && value !== undefined && value !== ""
+          );
+          if (hasContent) {
+            crawlData.push({ "Crawl Type": crawlName, ...crawlItem });
+          }
+        });
+      }
+    }
+  }
+
+  if (serializableOutput.search) {
+    let results: any[] = [];
+
+    if (serializableOutput.search.results && Array.isArray(serializableOutput.search.results)) {
+      results = serializableOutput.search.results;
+    } else if (Array.isArray(serializableOutput.search)) {
+      results = serializableOutput.search;
+    } else {
+      results = [serializableOutput.search];
+    }
+
+    results.forEach((result) => {
+      const hasContent = Object.values(result || {}).some(
+        (value) => value !== null && value !== undefined && value !== ""
+      );
+      if (hasContent) {
+        searchData.push(result);
+      }
+    });
+  }
+
  // Collect screenshot data (handles both string and object forms safely)
  // if (binaryOutput && Object.keys(binaryOutput).length > 0) {
  //   Object.entries(binaryOutput).forEach(([key, rawValue]: [string, any]) => {
@@ -152,7 +220,15 @@ function mergeRelatedData(serializableOutput: SerializableOutput, binaryOutput:
  // }

  // --- Merge all types into Airtable rows ---
-  const maxLength = Math.max(schemaData.length, listData.length, screenshotData.length);
+  const maxLength = Math.max(
+    schemaData.length,
+    listData.length,
+    screenshotData.length,
+    markdownData.length,
+    htmlData.length,
+    crawlData.length,
+    searchData.length
+  );

  for (let i = 0; i < maxLength; i++) {
    const record: Record<string, any> = {};
@@ -176,6 +252,38 @@ function mergeRelatedData(serializableOutput: SerializableOutput, binaryOutput:
      record.Screenshot = screenshotData[i].url;
    }

+    if (i < markdownData.length) {
+      Object.entries(markdownData[i] || {}).forEach(([key, value]) => {
+        if (value !== null && value !== undefined && value !== "") {
+          record[key] = value;
+        }
+      });
+    }
+
+    if (i < htmlData.length) {
+      Object.entries(htmlData[i] || {}).forEach(([key, value]) => {
+        if (value !== null && value !== undefined && value !== "") {
+          record[key] = value;
+        }
+      });
+    }
+
+    if (i < crawlData.length) {
+      Object.entries(crawlData[i] || {}).forEach(([key, value]) => {
+        if (value !== null && value !== undefined && value !== "") {
+          record[key] = value;
+        }
+      });
+    }
+
+    if (i < searchData.length) {
+      Object.entries(searchData[i] || {}).forEach(([key, value]) => {
+        if (value !== null && value !== undefined && value !== "") {
+          record[key] = value;
+        }
+      });
+    }
+
    if (Object.keys(record).length > 0) {
      allRecords.push(record);
    }
@@ -194,6 +302,18 @@ function mergeRelatedData(serializableOutput: SerializableOutput, binaryOutput:
      Screenshot: screenshotData[i].url,
    });
  }
+  for (let i = maxLength; i < markdownData.length; i++) {
+    allRecords.push(markdownData[i]);
+  }
+  for (let i = maxLength; i < htmlData.length; i++) {
+    allRecords.push(htmlData[i]);
+  }
+  for (let i = maxLength; i < crawlData.length; i++) {
+    allRecords.push(crawlData[i]);
+  }
+  for (let i = maxLength; i < searchData.length; i++) {
+    allRecords.push(searchData[i]);
+  }

  return allRecords;
 }
--- a/server/src/workflow-management/integrations/gsheet.ts
+++ b/server/src/workflow-management/integrations/gsheet.ts
@@ -13,6 +13,10 @@ interface GoogleSheetUpdateTask {
 interface SerializableOutput {
  scrapeSchema?: Record<string, any[]>;
  scrapeList?: Record<string, any[]>;
+  markdown?: Array<{ content: string }>;
+  html?: Array<{ content: string }>;
+  crawl?: Record<string, any[]>;
+  search?: any;
 }


@@ -95,6 +99,72 @@ export async function updateGoogleSheet(robotId: string, runId: string) {
          }
        }

+        if (serializableOutput.markdown && Array.isArray(serializableOutput.markdown) && serializableOutput.markdown.length > 0) {
+          const markdownData = serializableOutput.markdown.map((item, index) => ({
+            "Index": index + 1,
+            "Content": item.content || ""
+          }));
+
+          await processOutputType(
+            robotId,
+            spreadsheetId,
+            'Markdown',
+            markdownData,
+            plainRobot
+          );
+        }
+
+        if (serializableOutput.html && Array.isArray(serializableOutput.html) && serializableOutput.html.length > 0) {
+          const htmlData = serializableOutput.html.map((item, index) => ({
+            "Index": index + 1,
+            "Content": item.content || ""
+          }));
+
+          await processOutputType(
+            robotId,
+            spreadsheetId,
+            'HTML',
+            htmlData,
+            plainRobot
+          );
+        }
+
+        if (serializableOutput.crawl && typeof serializableOutput.crawl === "object") {
+          for (const [crawlName, crawlArray] of Object.entries(serializableOutput.crawl)) {
+            if (!Array.isArray(crawlArray) || crawlArray.length === 0) continue;
+
+            await processOutputType(
+              robotId,
+              spreadsheetId,
+              `Crawl - ${crawlName}`,
+              crawlArray,
+              plainRobot
+            );
+          }
+        }
+
+        if (serializableOutput.search) {
+          let searchData: any[] = [];
+
+          if (serializableOutput.search.results && Array.isArray(serializableOutput.search.results)) {
+            searchData = serializableOutput.search.results;
+          } else if (Array.isArray(serializableOutput.search)) {
+            searchData = serializableOutput.search;
+          } else {
+            searchData = [serializableOutput.search];
+          }
+
+          if (searchData.length > 0) {
+            await processOutputType(
+              robotId,
+              spreadsheetId,
+              'Search Results',
+              searchData,
+              plainRobot
+            );
+          }
+        }
+
      }
      
      if (plainRun.binaryOutput && Object.keys(plainRun.binaryOutput).length > 0) {
--- a/server/src/workflow-management/scheduler/index.ts
+++ b/server/src/workflow-management/scheduler/index.ts
@@ -484,6 +484,8 @@ async function executeRun(id: string, userId: string) {
    const categorizedOutput = {
      scrapeSchema: finalRun?.serializableOutput?.scrapeSchema || {},
      scrapeList: finalRun?.serializableOutput?.scrapeList || {},
+      crawl: finalRun?.serializableOutput?.crawl || {},
+      search: finalRun?.serializableOutput?.search || {}
    };

    await destroyRemoteBrowser(plainRun.browserId, userId);
@@ -570,6 +572,8 @@ async function executeRun(id: string, userId: string) {
            }, {} as Record<string, any[]>)
          : {},
        captured_lists: categorizedOutput.scrapeList,
+        crawl_data: categorizedOutput.crawl,
+        search_data: categorizedOutput.search,
        captured_texts_count: totalSchemaItemsExtracted,
        captured_lists_count: totalListItemsExtracted,
        screenshots_count: extractedScreenshotsCount
--- a/src/api/storage.ts
+++ b/src/api/storage.ts
@@ -353,7 +353,7 @@ export const createCrawlRobot = async (
 ): Promise<any> => {
  try {
    const response = await axios.post(
-      `${apiUrl}/recordings/crawl`,
+      `${apiUrl}/storage/recordings/crawl`,
      {
        url,
        name,
@@ -392,7 +392,7 @@ export const createSearchRobot = async (
 ): Promise<any> => {
  try {
    const response = await axios.post(
-      `${apiUrl}/recordings/search`,
+      `${apiUrl}/storage/recordings/search`,
      {
        name,
        searchConfig,
--- a/src/components/robot/pages/RobotConfigPage.tsx
+++ b/src/components/robot/pages/RobotConfigPage.tsx
@@ -154,7 +154,7 @@ export const RobotConfigPage: React.FC<RobotConfigPageProps> = ({
          )}

          <Box sx={{ display: 'flex', gap: 2 }}>
-            /* {showCancelButton && (
+            {/* {showCancelButton && (
              <Button
                variant="outlined"
                onClick={handleBack}
@@ -164,7 +164,7 @@ export const RobotConfigPage: React.FC<RobotConfigPageProps> = ({
                }} >
                {cancelButtonText || t("buttons.cancel")}
              </Button>
-            )} */
+            )} */}
            {showSaveButton && onSave && (
              <Button
                variant="contained"
--- a/src/components/robot/pages/RobotEditPage.tsx
+++ b/src/components/robot/pages/RobotEditPage.tsx
@@ -1051,7 +1051,7 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
        })),
        credentials: credentialsForPayload,
        targetUrl: targetUrl,
-        workflow: robot.recording.workflow,
+        workflow: updatedWorkflow,
      };

      const success = await updateRecording(robot.recording_meta.id, payload);
@@ -1101,13 +1101,15 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
                style={{ marginBottom: "20px" }}
              />

-              <TextField
-                label={t("robot_duplication.fields.target_url")}
-                key={t("robot_duplication.fields.target_url")}
-                value={getTargetUrl() || ""}
-                onChange={(e) => handleTargetUrlChange(e.target.value)}
-                style={{ marginBottom: "20px" }}
-              />
+              {robot.recording_meta.type !== 'search' && (
+                <TextField
+                  label={t("robot_duplication.fields.target_url")}
+                  key={t("robot_duplication.fields.target_url")}
+                  value={getTargetUrl() || ""}
+                  onChange={(e) => handleTargetUrlChange(e.target.value)}
+                  style={{ marginBottom: "20px" }}
+                />
+              )}
              
              {renderCrawlConfigFields()}
              {renderSearchConfigFields()}