diff --git a/server/src/markdownify/get_llm_input_text.ts b/server/src/markdownify/get_llm_input_text.ts
index c7962392..fa0aec6c 100644
--- a/server/src/markdownify/get_llm_input_text.ts
+++ b/server/src/markdownify/get_llm_input_text.ts
@@ -1,9 +1,7 @@
-
 import * as cheerio from 'cheerio';
 import { URL } from 'url';
 
 export interface ProcessTextOptions {
-  htmlParser?: boolean;
   keepImages?: boolean;
   removeSvgImage?: boolean;
   removeGifImage?: boolean;
@@ -12,13 +10,26 @@ export interface ProcessTextOptions {
   removeScriptTag?: boolean;
   removeStyleTag?: boolean;
   removeTags?: string[];
+  formatAsMarkdown?: boolean;
+}
+
+export interface ProcessedResult {
+  markdown: string;
+  plainText: string;
+  metadata: {
+    title: string;
+    url: string;
+    processedAt: string;
+    textLength: number;
+    markdownLength: number;
+  };
 }
 
 export async function getProcessedText(
   pageSource: string,
   baseUrl: string,
   options: ProcessTextOptions = {}
-): Promise<string> {
+): Promise<ProcessedResult> {
   const {
     keepImages = true,
     removeSvgImage = true,
@@ -27,13 +38,14 @@ export async function getProcessedText(
     keepWebpageLinks = true,
     removeScriptTag = true,
     removeStyleTag = true,
-    removeTags = []
+    removeTags = [],
+    formatAsMarkdown = true
   } = options;
 
   try {
     const $ = cheerio.load(pageSource);
     
-    // Remove tags
+    // Remove unwanted tags
     const tagsToRemove: string[] = [];
     if (removeScriptTag) tagsToRemove.push('script');
     if (removeStyleTag) tagsToRemove.push('style');
@@ -44,123 +56,272 @@ export async function getProcessedText(
       $(tag).remove();
     });
 
-    // Process image links
-    const imageTypesToRemove: string[] = [];
-    if (removeSvgImage) imageTypesToRemove.push('.svg');
-    if (removeGifImage) imageTypesToRemove.push('.gif');
-    imageTypesToRemove.push(...removeImageTypes);
+    // Extract page title
+    const title = $('title').text() || $('h1').first().text() || 'Untitled';
     
-    const uniqueImageTypes = [...new Set(imageTypesToRemove)];
+    // Generate both formats
+    const markdown = formatAsMarkdown ? 
+      convertToMarkdown($, baseUrl, options) : 
+      convertToPlainText($, baseUrl, options); // Fallback to plain text if markdown disabled
     
-    $('img').each((_, element) => {
-      try {
-        const $img = $(element);
-        if (!keepImages) {
-          $img.remove();
-        } else {
-          const imageLink = $img.attr('src');
-          let typeReplaced = false;
-          
-          if (imageLink) {
-            if (uniqueImageTypes.length > 0) {
-              for (const imageType of uniqueImageTypes) {
-                if (!typeReplaced && imageLink.includes(imageType)) {
-                  $img.remove();
-                  typeReplaced = true;
-                  break;
-                }
-              }
-            }
-            if (!typeReplaced) {
-              const absoluteUrl = new URL(imageLink, baseUrl).toString();
-              $img.replaceWith('\n' + absoluteUrl + ' ');
-            }
-          }
-        }
-      } catch (error) {
-        console.error('Error while processing image link: ', error);
-      }
-    });
+    const plainText = convertToPlainText($, baseUrl, options);
 
-    // Process website links - Preserve the link text AND the URL
-    $('a[href]').each((_, element) => {
-      try {
-        const $link = $(element);
-        if (!keepWebpageLinks) {
-          // Just remove the link but keep the text
-          $link.replaceWith($link.text());
-        } else {
-          const href = $link.attr('href');
-          if (href) {
-            const absoluteUrl = new URL(href, baseUrl).toString();
-            const linkText = $link.text().trim();
-            // Keep both the link text and the URL
-            $link.replaceWith(linkText + ' [' + absoluteUrl + '] ');
-          }
-        }
-      } catch (error) {
-        console.error('Error while processing webpage link: ', error);
+    const result: ProcessedResult = {
+      markdown,
+      plainText,
+      metadata: {
+        title: title.trim(),
+        url: baseUrl,
+        processedAt: new Date().toISOString(),
+        textLength: plainText.length,
+        markdownLength: markdown.length
       }
-    });
+    };
 
-    // Get text content 
-    let text: string;
-    
-    // Use a simpler approach to extract text
-    const bodyContent = $('body');
-    
-    if (bodyContent.length > 0) {
-      // Remove script and style tags that might have been missed
-      bodyContent.find('script, style, noscript').remove();
-      
-      // Get text with proper spacing
-      text = bodyContent
-        .contents()
-        .map((_, el) => {
-          if (el.type === 'text') {
-            return $(el).text();
-          }
-          if (el.type === 'tag') {
-            const $el = $(el);
-            const tagName = el.name?.toLowerCase();
-            
-            // Add appropriate spacing for block elements
-            if (['div', 'p', 'br', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(tagName || '')) {
-              return $el.text() + '\n';
-            }
-            return $el.text() + ' ';
-          }
-          return '';
-        })
-        .get()
-        .join('');
-    } else {
-      text = $.text();
-    }
-    
-    // Clean up the text while preserving quotes
-    text = cleanText(text);
-    
-    return text;
+    return result;
 
   } catch (error) {
     console.error('Error while getting processed text: ', error);
-    return '';
+    // Return empty result on error
+    return {
+      markdown: '',
+      plainText: '',
+      metadata: {
+        title: '',
+        url: baseUrl,
+        processedAt: new Date().toISOString(),
+        textLength: 0,
+        markdownLength: 0
+      }
+    };
   }
 }
 
-// Clean up text while preserving quotes and important content
-function cleanText(text: string): string {
-  if (!text) return '';
+function convertToMarkdown($: cheerio.CheerioAPI, baseUrl: string, options: ProcessTextOptions): string {
+  const { keepImages, keepWebpageLinks } = options;
   
+  // Clone the body to avoid modifying the original
+  const $body = $('body').clone();
+  
+  // Process headers
+  $body.find('h1').each((_, element) => {
+    const $el = $(element);
+    $el.replaceWith(`# ${$el.text().trim()}\n\n`);
+  });
+  
+  $body.find('h2').each((_, element) => {
+    const $el = $(element);
+    $el.replaceWith(`## ${$el.text().trim()}\n\n`);
+  });
+  
+  $body.find('h3').each((_, element) => {
+    const $el = $(element);
+    $el.replaceWith(`### ${$el.text().trim()}\n\n`);
+  });
+  
+  $body.find('h4, h5, h6').each((_, element) => {
+    const $el = $(element);
+    const level = element.name?.substring(1) || '4';
+    const hashes = '#'.repeat(parseInt(level));
+    $el.replaceWith(`${hashes} ${$el.text().trim()}\n\n`);
+  });
+
+  // Process paragraphs
+  $body.find('p').each((_, element) => {
+    const $el = $(element);
+    $el.replaceWith(`${$el.text().trim()}\n\n`);
+  });
+
+  // Process lists
+  $body.find('li').each((_, element) => {
+    const $el = $(element);
+    const text = $el.text().trim();
+    if ($el.parent().is('ol')) {
+      $el.replaceWith(`1. ${text}\n`);
+    } else {
+      $el.replaceWith(`- ${text}\n`);
+    }
+  });
+
+  $body.find('ul, ol').each((_, element) => {
+    const $el = $(element);
+    $el.replaceWith(`\n${$el.html()}\n\n`);
+  });
+
+  // Process blockquotes
+  $body.find('blockquote').each((_, element) => {
+    const $el = $(element);
+    const text = $el.text().trim();
+    $el.replaceWith(`> ${text.replace(/\n/g, '\n> ')}\n\n`);
+  });
+
+  // Process code blocks
+  $body.find('pre').each((_, element) => {
+    const $el = $(element);
+    const text = $el.text().trim();
+    $el.replaceWith(`\`\`\`\n${text}\n\`\`\`\n\n`);
+  });
+
+  $body.find('code').each((_, element) => {
+    const $el = $(element);
+    // Only format inline code that's not inside pre blocks
+    if (!$el.closest('pre').length) {
+      const text = $el.text().trim();
+      $el.replaceWith(`\`${text}\``);
+    }
+  });
+
+  // Process images
+  if (keepImages) {
+    $body.find('img').each((_, element) => {
+      const $img = $(element);
+      const src = $img.attr('src');
+      const alt = $img.attr('alt') || '';
+      
+      if (src && !shouldRemoveImage(src, options)) {
+        const absoluteUrl = new URL(src, baseUrl).toString();
+        $img.replaceWith(`![${alt}](${absoluteUrl})\n\n`);
+      } else {
+        $img.remove();
+      }
+    });
+  } else {
+    $body.find('img').remove();
+  }
+
+  // Process links
+  if (keepWebpageLinks) {
+    $body.find('a[href]').each((_, element) => {
+      const $link = $(element);
+      const href = $link.attr('href');
+      const text = $link.text().trim();
+      
+      if (href && text) {
+        const absoluteUrl = new URL(href, baseUrl).toString();
+        $link.replaceWith(`[${text}](${absoluteUrl})`);
+      } else if (text) {
+        $link.replaceWith(text);
+      } else {
+        $link.remove();
+      }
+    });
+  } else {
+    $body.find('a[href]').each((_, element) => {
+      const $link = $(element);
+      $link.replaceWith($link.text().trim());
+    });
+  }
+
+  // Process tables (basic support)
+  $body.find('table').each((_, element) => {
+    const $table = $(element);
+    let markdownTable = '\n';
+    
+    $table.find('tr').each((rowIndex, row) => {
+      const $row = $(row);
+      const cells: string[] = [];
+      
+      $row.find('th, td').each((_, cell) => {
+        const $cell = $(cell);
+        cells.push($cell.text().trim());
+      });
+      
+      if (cells.length > 0) {
+        markdownTable += `| ${cells.join(' | ')} |\n`;
+        
+        // Add header separator after first row
+        if (rowIndex === 0) {
+          markdownTable += `|${cells.map(() => '---').join('|')}|\n`;
+        }
+      }
+    });
+    
+    $table.replaceWith(markdownTable + '\n');
+  });
+
+  // Get the final text and clean it up
+  let markdown = $body.text();
+  
+  // Clean up excessive whitespace while preserving structure
+  markdown = cleanMarkdown(markdown);
+  
+  return markdown;
+}
+
+function convertToPlainText($: cheerio.CheerioAPI, baseUrl: string, options: ProcessTextOptions): string {
+  const { keepImages, keepWebpageLinks } = options;
+  
+  const $body = $('body').clone();
+  
+  // Process images
+  if (keepImages) {
+    $body.find('img').each((_, element) => {
+      const $img = $(element);
+      const src = $img.attr('src');
+      
+      if (src && !shouldRemoveImage(src, options)) {
+        const absoluteUrl = new URL(src, baseUrl).toString();
+        $img.replaceWith(`\nImage: ${absoluteUrl}\n`);
+      } else {
+        $img.remove();
+      }
+    });
+  } else {
+    $body.find('img').remove();
+  }
+
+  // Process links
+  if (keepWebpageLinks) {
+    $body.find('a[href]').each((_, element) => {
+      const $link = $(element);
+      const href = $link.attr('href');
+      const text = $link.text().trim();
+      
+      if (href && text) {
+        const absoluteUrl = new URL(href, baseUrl).toString();
+        $link.replaceWith(`${text}: ${absoluteUrl} `);
+      }
+    });
+  } else {
+    $body.find('a[href]').each((_, element) => {
+      const $link = $(element);
+      $link.replaceWith($link.text().trim());
+    });
+  }
+
+  let text = $body.text();
+  text = cleanText(text);
+  
+  return text;
+}
+
+function shouldRemoveImage(src: string, options: ProcessTextOptions): boolean {
+  const { removeSvgImage, removeGifImage, removeImageTypes = [] } = options;
+  
+  const imageTypesToRemove: string[] = [];
+  if (removeSvgImage) imageTypesToRemove.push('.svg');
+  if (removeGifImage) imageTypesToRemove.push('.gif');
+  imageTypesToRemove.push(...removeImageTypes);
+  
+  return imageTypesToRemove.some(type => src.includes(type));
+}
+
+function cleanMarkdown(markdown: string): string {
+  return markdown
+    // Replace 3+ newlines with 2 newlines
+    .replace(/\n{3,}/g, '\n\n')
+    // Remove excessive spaces
+    .replace(/[ ]{2,}/g, ' ')
+    // Clean up space around headers
+    .replace(/\n\s*(#+)\s*/g, '\n$1 ')
+    // Remove trailing whitespace
+    .replace(/[ \t]+$/gm, '')
+    .trim();
+}
+
+function cleanText(text: string): string {
   return text
-    // Replace multiple spaces with single space, but be careful with quotes
-    .replace(/[^\S\n]+/g, ' ')
-    // Replace multiple newlines with max 2 newlines
+    .replace(/\s+/g, ' ')
     .replace(/\n\s*\n/g, '\n\n')
-    // Clean up spaces around quotes but don't remove the quotes
-    .replace(/\s+"/g, ' "')
-    .replace(/"\s+/g, '" ')
-    // Remove leading/trailing whitespace
     .trim();
 }
\ No newline at end of file