fix: better markdown output

This commit is contained in:
amhsirak
2025-11-17 21:53:04 +05:30
parent 28f1bf8510
commit 1651763fc2

View File

@@ -1,3 +1,5 @@
// SPDX-License-Identifier: MIT
import { getPageSource, GetPageSourceOptions } from './get_html';
import { getProcessedText, ProcessTextOptions, ProcessedResult } from './get_llm_input_text';
@@ -11,17 +13,7 @@ export async function urlToLlmText(
const pageSource = await getPageSource(url, options);
if (!pageSource) {
return {
markdown: '',
plainText: '',
metadata: {
title: '',
url: url,
processedAt: new Date().toISOString(),
textLength: 0,
markdownLength: 0
}
};
return createEmptyResult(url);
}
const result = await getProcessedText(pageSource, url, options);
@@ -29,18 +21,28 @@ export async function urlToLlmText(
} catch (error) {
console.error('Error while scraping url: ', error);
return {
markdown: '',
plainText: '',
metadata: {
title: '',
url: url,
processedAt: new Date().toISOString(),
textLength: 0,
markdownLength: 0
}
};
return createEmptyResult(url);
}
}
function createEmptyResult(url: string): ProcessedResult {
return {
markdown: '',
plainText: '',
metadata: {
title: '',
description: '',
url: url,
processedAt: new Date().toISOString(),
textLength: 0,
markdownLength: 0,
hasContent: false,
language: 'en',
wordCount: 0,
linkCount: 0,
imageCount: 0
}
};
}
export { getPageSource, getProcessedText };