fix: lesser restrictions

This commit is contained in:
amhsirak
2025-11-20 16:56:43 +05:30
parent 96019058e9
commit 930c7b6c74

View File

@@ -13,21 +13,6 @@ export async function parseMarkdown(
const t = new TurndownService(); const t = new TurndownService();
// Remove irrelevant tags
const elementsToRemove = [
"meta",
"style",
"script",
"noscript",
"link",
"textarea",
];
t.addRule("remove-irrelevant", {
filter: elementsToRemove,
replacement: () => "",
});
t.addRule("truncate-svg", { t.addRule("truncate-svg", {
filter: "svg", filter: "svg",
replacement: () => "", replacement: () => "",
@@ -106,64 +91,18 @@ function tidyHtml(html: string): string {
const cheerio = require("cheerio"); const cheerio = require("cheerio");
const $ = cheerio.load(html); const $ = cheerio.load(html);
// Fix broken attributes
$("*").each(function (this: any) {
const element = $(this);
const attributes = Object.keys(this.attribs);
for (let i = 0; i < attributes.length; i++) {
let attr = attributes[i];
if (attr.includes('"')) {
element.remove();
}
}
});
const manuallyCleanedElements = [ const manuallyCleanedElements = [
"aside", "script",
"embed", "style",
"head", "iframe",
"iframe", "noscript",
"menu", "meta",
"object", "link",
"script", "object",
"applet", "embed",
"audio", "canvas",
"canvas", "audio",
"map", "video"
"svg",
"video",
"area",
"blink",
"datalist",
"dialog",
"frame",
"frameset",
"link",
"input",
"ins",
"legend",
"marquee",
"math",
"menuitem",
"nav",
"noscript",
"optgroup",
"output",
"param",
"progress",
"rp",
"rt",
"rtc",
"source",
"style",
"track",
"textarea",
"time",
"use",
"img",
"picture",
"figure",
]; ];
manuallyCleanedElements.forEach((tag) => $(tag).remove()); manuallyCleanedElements.forEach((tag) => $(tag).remove());