fix: better markdown output
This commit is contained in:
@@ -1,3 +1,5 @@
|
|||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
import { getPageSource, GetPageSourceOptions } from './get_html';
|
import { getPageSource, GetPageSourceOptions } from './get_html';
|
||||||
import { getProcessedText, ProcessTextOptions, ProcessedResult } from './get_llm_input_text';
|
import { getProcessedText, ProcessTextOptions, ProcessedResult } from './get_llm_input_text';
|
||||||
|
|
||||||
@@ -11,17 +13,7 @@ export async function urlToLlmText(
|
|||||||
const pageSource = await getPageSource(url, options);
|
const pageSource = await getPageSource(url, options);
|
||||||
|
|
||||||
if (!pageSource) {
|
if (!pageSource) {
|
||||||
return {
|
return createEmptyResult(url);
|
||||||
markdown: '',
|
|
||||||
plainText: '',
|
|
||||||
metadata: {
|
|
||||||
title: '',
|
|
||||||
url: url,
|
|
||||||
processedAt: new Date().toISOString(),
|
|
||||||
textLength: 0,
|
|
||||||
markdownLength: 0
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const result = await getProcessedText(pageSource, url, options);
|
const result = await getProcessedText(pageSource, url, options);
|
||||||
@@ -29,18 +21,28 @@ export async function urlToLlmText(
|
|||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error while scraping url: ', error);
|
console.error('Error while scraping url: ', error);
|
||||||
return {
|
return createEmptyResult(url);
|
||||||
markdown: '',
|
|
||||||
plainText: '',
|
|
||||||
metadata: {
|
|
||||||
title: '',
|
|
||||||
url: url,
|
|
||||||
processedAt: new Date().toISOString(),
|
|
||||||
textLength: 0,
|
|
||||||
markdownLength: 0
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function createEmptyResult(url: string): ProcessedResult {
|
||||||
|
return {
|
||||||
|
markdown: '',
|
||||||
|
plainText: '',
|
||||||
|
metadata: {
|
||||||
|
title: '',
|
||||||
|
description: '',
|
||||||
|
url: url,
|
||||||
|
processedAt: new Date().toISOString(),
|
||||||
|
textLength: 0,
|
||||||
|
markdownLength: 0,
|
||||||
|
hasContent: false,
|
||||||
|
language: 'en',
|
||||||
|
wordCount: 0,
|
||||||
|
linkCount: 0,
|
||||||
|
imageCount: 0
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
export { getPageSource, getProcessedText };
|
export { getPageSource, getProcessedText };
|
||||||
Reference in New Issue
Block a user