Skip to content

Commit 266fa2b

Browse files
Fix/word internal underscores (#125)
* handle case for internal underscore * fix: word-internal underscores incorrectly treated as incomplete markdown * chore: remove redundant coverage * fix: word internal underscores * fix: handle non-unicode
1 parent fc09c5a commit 266fa2b

File tree

4 files changed

+193
-1
lines changed

4 files changed

+193
-1
lines changed
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
---
2+
"streamdown": patch
3+
---
4+
5+
Fix word-internal underscores being incorrectly treated as incomplete markdown
6+
7+
Previously, underscores used as word separators (e.g., `hello_world`, `snake_case`) were incorrectly identified as incomplete italic markdown, causing an extra underscore to be appended. This fix:
8+
9+
- Detects when underscores are between word characters and treats them as literals
10+
- Preserves the streaming markdown completion for genuine incomplete italics (e.g., `_italic text`)
11+
- Correctly handles trailing newlines when completing italic formatting
12+
13+
Fixes the issue where `hello_world` would become `hello_world_` when `parseIncompleteMarkdown` was enabled.

packages/streamdown/__tests__/parse-incomplete-markdown.test.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,29 @@ describe("parseIncompleteMarkdown", () => {
190190
"some\\_text_with_underscores"
191191
);
192192
});
193+
194+
it("should handle mixed escaped and unescaped underscores correctly", () => {
195+
expect(parseIncompleteMarkdown("\\_escaped\\_ and _unescaped")).toBe(
196+
"\\_escaped\\_ and _unescaped_"
197+
);
198+
199+
expect(parseIncompleteMarkdown("Start \\_escaped\\_ middle _incomplete")).toBe(
200+
"Start \\_escaped\\_ middle _incomplete_"
201+
);
202+
203+
expect(parseIncompleteMarkdown("\\_fully\\_escaped\\_")).toBe(
204+
"\\_fully\\_escaped\\_"
205+
);
206+
207+
expect(parseIncompleteMarkdown("\\_escaped\\_ _complete_ pair")).toBe(
208+
"\\_escaped\\_ _complete_ pair"
209+
);
210+
});
211+
212+
it("should handle underscores with unicode word characters", () => {
213+
expect(parseIncompleteMarkdown("café_price")).toBe("café_price");
214+
expect(parseIncompleteMarkdown("naïve_approach")).toBe("naïve_approach");
215+
});
193216
});
194217

195218
describe("inline code formatting (`)", () => {
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
import { describe, expect, it } from "vitest";
2+
import { parseIncompleteMarkdown } from "../lib/parse-incomplete-markdown";
3+
4+
describe("parseIncompleteMarkdown - word-internal underscores", () => {
5+
describe("underscores as word separators", () => {
6+
it("should handle single underscore between words", () => {
7+
const input = "hello_world";
8+
const result = parseIncompleteMarkdown(input);
9+
expect(result).toBe("hello_world");
10+
});
11+
12+
it("should handle multiple underscores between words", () => {
13+
const input = "hello_world_test";
14+
const result = parseIncompleteMarkdown(input);
15+
expect(result).toBe("hello_world_test");
16+
});
17+
18+
it("should handle CONSTANT_CASE", () => {
19+
const input = "MAX_VALUE";
20+
const result = parseIncompleteMarkdown(input);
21+
expect(result).toBe("MAX_VALUE");
22+
});
23+
24+
it("should handle multiple snake_case words in text", () => {
25+
const input = "The user_name and user_email are required";
26+
const result = parseIncompleteMarkdown(input);
27+
expect(result).toBe("The user_name and user_email are required");
28+
});
29+
30+
it("should handle underscore in URLs", () => {
31+
const input = "Visit https://example.com/path_with_underscore";
32+
const result = parseIncompleteMarkdown(input);
33+
expect(result).toBe("Visit https://example.com/path_with_underscore");
34+
});
35+
36+
it("should handle numbers with underscores", () => {
37+
const input = "The value is 1_000_000";
38+
const result = parseIncompleteMarkdown(input);
39+
expect(result).toBe("The value is 1_000_000");
40+
});
41+
});
42+
43+
describe("incomplete italic formatting", () => {
44+
it("should complete italic at word boundary", () => {
45+
const input = "_italic text";
46+
const result = parseIncompleteMarkdown(input);
47+
expect(result).toBe("_italic text_");
48+
});
49+
50+
it("should complete italic with punctuation", () => {
51+
const input = "This is _italic";
52+
const result = parseIncompleteMarkdown(input);
53+
expect(result).toBe("This is _italic_");
54+
});
55+
56+
it("should complete italic before newline", () => {
57+
const input = "_italic\n";
58+
const result = parseIncompleteMarkdown(input);
59+
expect(result).toBe("_italic_\n");
60+
});
61+
});
62+
63+
describe("edge cases", () => {
64+
it("should handle underscore at end of word (ambiguous case)", () => {
65+
const input = "word_";
66+
const result = parseIncompleteMarkdown(input);
67+
expect(result).toBe("word_");
68+
});
69+
70+
it("should handle leading underscore in identifier", () => {
71+
const input = "_privateVariable";
72+
const result = parseIncompleteMarkdown(input);
73+
expect(result).toBe("_privateVariable_");
74+
});
75+
76+
it("should handle code with underscores in markdown", () => {
77+
const input = "Use `variable_name` in your code";
78+
const result = parseIncompleteMarkdown(input);
79+
expect(result).toBe("Use `variable_name` in your code");
80+
});
81+
82+
it("should handle mixed snake_case and italic", () => {
83+
const input = "The variable_name is _important";
84+
const result = parseIncompleteMarkdown(input);
85+
expect(result).toBe("The variable_name is _important_");
86+
});
87+
88+
it("should not modify complete italic pairs", () => {
89+
const input = "_complete italic_ and some_other_text";
90+
const result = parseIncompleteMarkdown(input);
91+
expect(result).toBe("_complete italic_ and some_other_text");
92+
});
93+
94+
it("should handle underscore in code blocks", () => {
95+
const input = "```\nfunction_name()\n```";
96+
const result = parseIncompleteMarkdown(input);
97+
expect(result).toBe("```\nfunction_name()\n```");
98+
});
99+
100+
it("should handle HTML attributes with underscores", () => {
101+
const input = '<div data_attribute="value">';
102+
const result = parseIncompleteMarkdown(input);
103+
expect(result).toBe('<div data_attribute="value">');
104+
});
105+
});
106+
107+
describe("real-world scenarios", () => {
108+
it("should handle Python-style names", () => {
109+
const input = "__init__ and __main__ are special";
110+
const result = parseIncompleteMarkdown(input);
111+
expect(result).toBe("__init__ and __main__ are special");
112+
});
113+
114+
it("should handle markdown in sentences with snake_case", () => {
115+
const input = "The user_id field stores the _unique identifier";
116+
const result = parseIncompleteMarkdown(input);
117+
expect(result).toBe("The user_id field stores the _unique identifier_");
118+
});
119+
120+
it("should handle the original bug report case", () => {
121+
const input = `hello_world
122+
123+
<a href="example_link"/>`;
124+
const result = parseIncompleteMarkdown(input);
125+
expect(result).toBe(input);
126+
expect(result).not.toMatch(/hello_world_/);
127+
expect(result).not.toMatch(/_$/);
128+
});
129+
});
130+
});

packages/streamdown/lib/parse-incomplete-markdown.ts

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,10 @@ const countSingleUnderscores = (text: string): number => {
254254
if (isWithinMathBlock(text, index)) {
255255
return acc;
256256
}
257+
// Skip if underscore is word-internal (between word characters)
258+
if (prevChar && nextChar && /[\p{L}\p{N}_]/u.test(prevChar) && /[\p{L}\p{N}_]/u.test(nextChar)) {
259+
return acc;
260+
}
257261
if (prevChar !== "_" && nextChar !== "_") {
258262
return acc + 1;
259263
}
@@ -272,15 +276,28 @@ const handleIncompleteSingleUnderscoreItalic = (text: string): string => {
272276
const singleUnderscoreMatch = text.match(singleUnderscorePattern);
273277

274278
if (singleUnderscoreMatch) {
275-
// Find the first single underscore position (not part of __)
279+
// Find the first single underscore position (not part of __ and not word-internal)
276280
let firstSingleUnderscoreIndex = -1;
277281
for (let i = 0; i < text.length; i++) {
278282
if (
279283
text[i] === "_" &&
280284
text[i - 1] !== "_" &&
281285
text[i + 1] !== "_" &&
286+
text[i - 1] !== "\\" &&
282287
!isWithinMathBlock(text, i)
283288
) {
289+
// Check if underscore is word-internal (between word characters)
290+
const prevChar = i > 0 ? text[i - 1] : "";
291+
const nextChar = i < text.length - 1 ? text[i + 1] : "";
292+
if (
293+
prevChar &&
294+
nextChar &&
295+
/[\p{L}\p{N}_]/u.test(prevChar) &&
296+
/[\p{L}\p{N}_]/u.test(nextChar)
297+
) {
298+
continue;
299+
}
300+
284301
firstSingleUnderscoreIndex = i;
285302
break;
286303
}
@@ -306,6 +323,15 @@ const handleIncompleteSingleUnderscoreItalic = (text: string): string => {
306323

307324
const singleUnderscores = countSingleUnderscores(text);
308325
if (singleUnderscores % 2 === 1) {
326+
// If text ends with newline(s), insert underscore before them
327+
const trailingNewlineMatch = text.match(/\n+$/);
328+
if (trailingNewlineMatch) {
329+
const textBeforeNewlines = text.slice(
330+
0,
331+
-trailingNewlineMatch[0].length
332+
);
333+
return `${textBeforeNewlines}_${trailingNewlineMatch[0]}`;
334+
}
309335
return `${text}_`;
310336
}
311337
}

0 commit comments

Comments
 (0)