From 1651763fc288c0c8b663294ab05b7d3b40326580 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 17 Nov 2025 21:53:04 +0530 Subject: [PATCH] fix: better markdown output --- server/src/markdownify/get_llm_ready_text.ts | 46 ++++++++++---------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/server/src/markdownify/get_llm_ready_text.ts b/server/src/markdownify/get_llm_ready_text.ts index ed7849ec..025fb52d 100644 --- a/server/src/markdownify/get_llm_ready_text.ts +++ b/server/src/markdownify/get_llm_ready_text.ts @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: MIT + import { getPageSource, GetPageSourceOptions } from './get_html'; import { getProcessedText, ProcessTextOptions, ProcessedResult } from './get_llm_input_text'; @@ -11,17 +13,7 @@ export async function urlToLlmText( const pageSource = await getPageSource(url, options); if (!pageSource) { - return { - markdown: '', - plainText: '', - metadata: { - title: '', - url: url, - processedAt: new Date().toISOString(), - textLength: 0, - markdownLength: 0 - } - }; + return createEmptyResult(url); } const result = await getProcessedText(pageSource, url, options); @@ -29,18 +21,28 @@ export async function urlToLlmText( } catch (error) { console.error('Error while scraping url: ', error); - return { - markdown: '', - plainText: '', - metadata: { - title: '', - url: url, - processedAt: new Date().toISOString(), - textLength: 0, - markdownLength: 0 - } - }; + return createEmptyResult(url); } } +function createEmptyResult(url: string): ProcessedResult { + return { + markdown: '', + plainText: '', + metadata: { + title: '', + description: '', + url: url, + processedAt: new Date().toISOString(), + textLength: 0, + markdownLength: 0, + hasContent: false, + language: 'en', + wordCount: 0, + linkCount: 0, + imageCount: 0 + } + }; +} + export { getPageSource, getProcessedText }; \ No newline at end of file