From dae4e83412d8867e446841aef90022351ef6e574 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 17 Nov 2025 21:18:11 +0530 Subject: [PATCH] wip: markdown + plain text --- server/src/markdownify/get_llm_ready_text.ts | 37 ++++++++++++++------ 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/server/src/markdownify/get_llm_ready_text.ts b/server/src/markdownify/get_llm_ready_text.ts index 4d0515c6..ed7849ec 100644 --- a/server/src/markdownify/get_llm_ready_text.ts +++ b/server/src/markdownify/get_llm_ready_text.ts @@ -1,29 +1,46 @@ import { getPageSource, GetPageSourceOptions } from './get_html'; -import { getProcessedText, ProcessTextOptions } from './get_llm_input_text'; +import { getProcessedText, ProcessTextOptions, ProcessedResult } from './get_llm_input_text'; -export interface UrlToLlmTextOptions extends GetPageSourceOptions, ProcessTextOptions { - // Combined options from both interfaces -} +export interface UrlToLlmTextOptions extends GetPageSourceOptions, ProcessTextOptions {} export async function urlToLlmText( url: string, options: UrlToLlmTextOptions = {} -): Promise { +): Promise { try { const pageSource = await getPageSource(url, options); if (!pageSource) { - return ''; + return { + markdown: '', + plainText: '', + metadata: { + title: '', + url: url, + processedAt: new Date().toISOString(), + textLength: 0, + markdownLength: 0 + } + }; } - const llmText = await getProcessedText(pageSource, url, options); - return llmText; + const result = await getProcessedText(pageSource, url, options); + return result; } catch (error) { console.error('Error while scraping url: ', error); - return ''; + return { + markdown: '', + plainText: '', + metadata: { + title: '', + url: url, + processedAt: new Date().toISOString(), + textLength: 0, + markdownLength: 0 + } + }; } } -// Export individual functions as well export { getPageSource, getProcessedText }; \ No newline at end of file