fix: better markdown output

This commit is contained in:
amhsirak
2025-11-17 21:53:04 +05:30
parent 28f1bf8510
commit 1651763fc2

View File

@@ -1,3 +1,5 @@
// SPDX-License-Identifier: MIT
import { getPageSource, GetPageSourceOptions } from './get_html'; import { getPageSource, GetPageSourceOptions } from './get_html';
import { getProcessedText, ProcessTextOptions, ProcessedResult } from './get_llm_input_text'; import { getProcessedText, ProcessTextOptions, ProcessedResult } from './get_llm_input_text';
@@ -11,17 +13,7 @@ export async function urlToLlmText(
const pageSource = await getPageSource(url, options); const pageSource = await getPageSource(url, options);
if (!pageSource) { if (!pageSource) {
return { return createEmptyResult(url);
markdown: '',
plainText: '',
metadata: {
title: '',
url: url,
processedAt: new Date().toISOString(),
textLength: 0,
markdownLength: 0
}
};
} }
const result = await getProcessedText(pageSource, url, options); const result = await getProcessedText(pageSource, url, options);
@@ -29,18 +21,28 @@ export async function urlToLlmText(
} catch (error) { } catch (error) {
console.error('Error while scraping url: ', error); console.error('Error while scraping url: ', error);
return { return createEmptyResult(url);
markdown: '',
plainText: '',
metadata: {
title: '',
url: url,
processedAt: new Date().toISOString(),
textLength: 0,
markdownLength: 0
}
};
} }
} }
function createEmptyResult(url: string): ProcessedResult {
return {
markdown: '',
plainText: '',
metadata: {
title: '',
description: '',
url: url,
processedAt: new Date().toISOString(),
textLength: 0,
markdownLength: 0,
hasContent: false,
language: 'en',
wordCount: 0,
linkCount: 0,
imageCount: 0
}
};
}
export { getPageSource, getProcessedText }; export { getPageSource, getProcessedText };