diff --git a/server/src/markdownify/markdown.ts b/server/src/markdownify/markdown.ts index c4709e6e..d1399297 100644 --- a/server/src/markdownify/markdown.ts +++ b/server/src/markdownify/markdown.ts @@ -1,109 +1,27 @@ -import koffi from "koffi"; -import dotenv from "dotenv"; -import { stat } from "fs/promises"; -import path from "node:path"; -import os from "node:os"; +import TurndownService from "turndown"; +import { gfm } from "joplin-turndown-plugin-gfm"; -const exts = { - win32: ".dll", - darwin: ".dylib", - default: ".so", -}; - -const ext = - exts[os.platform() as keyof typeof exts] || exts.default; - -// Build path to the binary **inside the same folder** -export const GO_MARKDOWN_PARSER_PATH = path.join( - __dirname, - "html-to-markdown", - `html-to-markdown${ext}` -); - -dotenv.config(); - -// --------------------------------------------- -// Native Go binding wrapper -// --------------------------------------------- -class NativeMarkdownBridge { - private static singleton: NativeMarkdownBridge; - private fnConvert: any; - - private constructor() { - const lib = koffi.load(GO_MARKDOWN_PARSER_PATH); - - const freeFn = lib.func("FreeCString", "void", ["string"]); - const trackedType = "CString:" + crypto.randomUUID(); - const autoReleasedStr = koffi.disposable(trackedType, "string", freeFn); - - this.fnConvert = lib.func("ConvertHTMLToMarkdown", autoReleasedStr, [ - "string", - ]); - } - - static async load(): Promise { - if (!NativeMarkdownBridge.singleton) { - try { - await stat(GO_MARKDOWN_PARSER_PATH); - } catch { - throw new Error("Go shared library not found"); - } - NativeMarkdownBridge.singleton = new NativeMarkdownBridge(); - } - return NativeMarkdownBridge.singleton; - } - - async run(html: string): Promise { - return new Promise((resolve, reject) => { - this.fnConvert.async(html, (err: Error, output: string) => { - err ? reject(err) : resolve(output); - }); - }); - } -} - -// --------------------------------------------- -// Main exposed function -// --------------------------------------------- export async function parseMarkdown( html: string | null | undefined, ): Promise { if (!html) return ""; - // Try Go library first (if enabled) - try { - const engine = await NativeMarkdownBridge.load(); - let md = await engine.run(html); - - md = fixBrokenLinks(md); - md = stripSkipLinks(md); - - return md; - } catch (err: any) { - if (err?.message !== "Go shared library not found") { - console.log("Go markdown parser failed, falling back to JS parser:", err); - } else { - console.log("Go parser missing.", { GO_MARKDOWN_PARSER_PATH }); - } - } - - // Fallback parser - const TurndownService = require("turndown"); - const { gfm } = require("joplin-turndown-plugin-gfm"); - const t = new TurndownService(); + + // Custom rule for inline links t.addRule("inlineLink", { filter: (node: any, opts: any) => opts.linkStyle === "inlined" && node.nodeName === "A" && node.getAttribute("href"), replacement: (content: string, node: any) => { - const href = node.getAttribute("href").trim(); + const href = node.getAttribute("href")?.trim() || ""; const title = node.title ? ` "${node.title}"` : ""; return `[${content.trim()}](${href}${title})\n`; }, }); + // GitHub-flavored markdown features t.use(gfm); try { @@ -134,9 +52,11 @@ function fixBrokenLinks(md: string): string { result += ch; } } + return result; } function stripSkipLinks(md: string): string { return md.replace(/\[Skip to Content\]\(#[^\)]*\)/gi, ""); } +