Skip to content

Commit 14fe1b6

Browse files
committed
fix: preserve newlines when minifying llms-small Markdown
Copilot review: replacing all \s+ collapsed newlines inside fenced code and broke Markdown structure. Collapse spaces/tabs only; extend tests. Made-with: Cursor
1 parent 408c4d3 commit 14fe1b6

File tree

2 files changed

+13
-4
lines changed

2 files changed

+13
-4
lines changed

src/lib/llms-html-sanitize.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ export function htmlToLlmsMarkdown(html: string, shouldMinify = false): string {
3131
const sanitizedHtml = stripHtmlForLlmsExport(html, shouldMinify);
3232
let markdown = turndown.turndown(sanitizedHtml).trim();
3333
if (shouldMinify) {
34-
markdown = markdown.replace(/\s+/g, " ");
34+
// Collapse horizontal whitespace only; keep newlines so fenced code, lists, and headings stay valid Markdown.
35+
markdown = markdown.replace(/[ \t]+/g, " ");
3536
}
3637
return markdown;
3738
}

tests/llms-html-sanitize.test.ts

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,16 @@ describe("LLMs HTML sanitization and Markdown export", () => {
4040
expect(md).toMatch(/bold/);
4141
});
4242

43-
it("collapses whitespace when minify is true", () => {
44-
const md = htmlToLlmsMarkdown("<p>One</p>\n\n<p>Two</p>", true);
45-
expect(md).not.toMatch(/\n\n/);
43+
it("when minifying, collapses runs of spaces and tabs only (preserves newlines)", () => {
44+
const md = htmlToLlmsMarkdown("<p>One two three</p>", true);
45+
expect(md).not.toMatch(/ {2,}/);
46+
});
47+
48+
it("when minifying, keeps line breaks inside fenced code from Turndown", () => {
49+
const md = htmlToLlmsMarkdown("<pre><code>const a = 1;\nconst b = 2;</code></pre>", true);
50+
expect(md).toContain("```");
51+
expect(md).toMatch(/a\s*=\s*1/);
52+
expect(md).toMatch(/b\s*=\s*2/);
53+
expect(md).toContain("\n");
4654
});
4755
});

0 commit comments

Comments
 (0)