wip: markdown + plain text

This commit is contained in:
amhsirak
2025-11-17 21:18:11 +05:30
parent a3891f6813
commit dae4e83412

View File

@@ -1,29 +1,46 @@
import { getPageSource, GetPageSourceOptions } from './get_html'; import { getPageSource, GetPageSourceOptions } from './get_html';
import { getProcessedText, ProcessTextOptions } from './get_llm_input_text'; import { getProcessedText, ProcessTextOptions, ProcessedResult } from './get_llm_input_text';
export interface UrlToLlmTextOptions extends GetPageSourceOptions, ProcessTextOptions { export interface UrlToLlmTextOptions extends GetPageSourceOptions, ProcessTextOptions {}
// Combined options from both interfaces
}
export async function urlToLlmText( export async function urlToLlmText(
url: string, url: string,
options: UrlToLlmTextOptions = {} options: UrlToLlmTextOptions = {}
): Promise<string> { ): Promise<ProcessedResult> {
try { try {
const pageSource = await getPageSource(url, options); const pageSource = await getPageSource(url, options);
if (!pageSource) { if (!pageSource) {
return ''; return {
markdown: '',
plainText: '',
metadata: {
title: '',
url: url,
processedAt: new Date().toISOString(),
textLength: 0,
markdownLength: 0
}
};
} }
const llmText = await getProcessedText(pageSource, url, options); const result = await getProcessedText(pageSource, url, options);
return llmText; return result;
} catch (error) { } catch (error) {
console.error('Error while scraping url: ', error); console.error('Error while scraping url: ', error);
return ''; return {
markdown: '',
plainText: '',
metadata: {
title: '',
url: url,
processedAt: new Date().toISOString(),
textLength: 0,
markdownLength: 0
}
};
} }
} }
// Export individual functions as well
export { getPageSource, getProcessedText }; export { getPageSource, getProcessedText };