Files
parcer/server/src/markdownify/markdown.ts

63 lines
1.5 KiB
TypeScript
Raw Normal View History

2025-11-20 03:51:27 +05:30
import TurndownService from "turndown";
import { gfm } from "joplin-turndown-plugin-gfm";
2025-11-20 02:42:44 +05:30
export async function parseMarkdown(
html: string | null | undefined,
): Promise<string> {
if (!html) return "";
const t = new TurndownService();
2025-11-20 03:51:27 +05:30
// Custom rule for inline links
2025-11-20 02:42:44 +05:30
t.addRule("inlineLink", {
filter: (node: any, opts: any) =>
opts.linkStyle === "inlined" &&
node.nodeName === "A" &&
node.getAttribute("href"),
replacement: (content: string, node: any) => {
2025-11-20 03:51:27 +05:30
const href = node.getAttribute("href")?.trim() || "";
2025-11-20 02:42:44 +05:30
const title = node.title ? ` "${node.title}"` : "";
return `[${content.trim()}](${href}${title})\n`;
},
});
2025-11-20 03:51:27 +05:30
// GitHub-flavored markdown features
2025-11-20 02:42:44 +05:30
t.use(gfm);
try {
let out = await t.turndown(html);
out = fixBrokenLinks(out);
out = stripSkipLinks(out);
return out;
} catch (err) {
console.error("HTML→Markdown failed", { err });
return "";
}
}
// ---------------------------------------------
// Helpers
// ---------------------------------------------
function fixBrokenLinks(md: string): string {
let depth = 0;
let result = "";
for (const ch of md) {
if (ch === "[") depth++;
if (ch === "]") depth = Math.max(0, depth - 1);
if (depth > 0 && ch === "\n") {
result += "\\\n";
} else {
result += ch;
}
}
2025-11-20 03:51:27 +05:30
2025-11-20 02:42:44 +05:30
return result;
}
function stripSkipLinks(md: string): string {
return md.replace(/\[Skip to Content\]\(#[^\)]*\)/gi, "");
}
2025-11-20 03:51:27 +05:30