fix: better markdown output
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
import { getPageSource, GetPageSourceOptions } from './get_html';
|
||||
import { getProcessedText, ProcessTextOptions, ProcessedResult } from './get_llm_input_text';
|
||||
|
||||
@@ -11,17 +13,7 @@ export async function urlToLlmText(
|
||||
const pageSource = await getPageSource(url, options);
|
||||
|
||||
if (!pageSource) {
|
||||
return {
|
||||
markdown: '',
|
||||
plainText: '',
|
||||
metadata: {
|
||||
title: '',
|
||||
url: url,
|
||||
processedAt: new Date().toISOString(),
|
||||
textLength: 0,
|
||||
markdownLength: 0
|
||||
}
|
||||
};
|
||||
return createEmptyResult(url);
|
||||
}
|
||||
|
||||
const result = await getProcessedText(pageSource, url, options);
|
||||
@@ -29,18 +21,28 @@ export async function urlToLlmText(
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error while scraping url: ', error);
|
||||
return createEmptyResult(url);
|
||||
}
|
||||
}
|
||||
|
||||
function createEmptyResult(url: string): ProcessedResult {
|
||||
return {
|
||||
markdown: '',
|
||||
plainText: '',
|
||||
metadata: {
|
||||
title: '',
|
||||
description: '',
|
||||
url: url,
|
||||
processedAt: new Date().toISOString(),
|
||||
textLength: 0,
|
||||
markdownLength: 0
|
||||
markdownLength: 0,
|
||||
hasContent: false,
|
||||
language: 'en',
|
||||
wordCount: 0,
|
||||
linkCount: 0,
|
||||
imageCount: 0
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export { getPageSource, getProcessedText };
|
||||
Reference in New Issue
Block a user