Skip to content

Commit 48c9c51

Browse files
authored
fix: split autolinks at CJK punctuation (#327)
* fix: split autolinks at CJK punctuation * chore: clarify CJK autolink boundary
1 parent 82b6474 commit 48c9c51

File tree

4 files changed

+126
-0
lines changed

4 files changed

+126
-0
lines changed

.changeset/spicy-lizards-move.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"streamdown": patch
3+
---
4+
5+
Fix autolink parsing to stop at CJK punctuation boundaries.

packages/streamdown/__tests__/cjk-friendly.test.tsx

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { render } from "@testing-library/react";
2+
import remarkGfm from "remark-gfm";
23
import { describe, expect, it } from "vitest";
34
import { Streamdown } from "../index";
45

@@ -235,6 +236,27 @@ describe("CJK (Chinese, Japanese, Korean) Friendly Support (#185)", () => {
235236
expect(link?.textContent).toContain("日本語のリンク");
236237
});
237238

239+
it("splits autolinks at CJK punctuation", () => {
240+
const autolinkContent = "请访问 https://example.com。谢谢";
241+
const { container } = render(<Streamdown>{autolinkContent}</Streamdown>);
242+
243+
const link = container.querySelector('[data-streamdown="link"]');
244+
expect(link?.textContent).toBe("https://example.com");
245+
expect(link?.getAttribute("href")).toBe("https://example.com/");
246+
expect(container.textContent).toBe(autolinkContent);
247+
});
248+
249+
it("keeps default autolink behavior without the CJK boundary plugin", () => {
250+
const autolinkContent = "请访问 https://example.com。谢谢";
251+
const { container } = render(
252+
<Streamdown remarkPlugins={[remarkGfm]}>{autolinkContent}</Streamdown>
253+
);
254+
255+
const link = container.querySelector('[data-streamdown="link"]');
256+
expect(link?.textContent).toBe("https://example.com。谢谢");
257+
expect(container.textContent).toBe(autolinkContent);
258+
});
259+
238260
it("renders inline code near CJK emphasis", () => {
239261
const inlineContent =
240262
"**日本語のコード**:`console.log('こんにちは(挨拶)')`";

packages/streamdown/index.tsx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import type { Pluggable } from "unified";
2525
import { components as defaultComponents } from "./lib/components";
2626
import { Markdown, type Options } from "./lib/markdown";
2727
import { parseMarkdownIntoBlocks } from "./lib/parse-blocks";
28+
import { remarkCjkAutolinkBoundary } from "./lib/remark/cjk-autolink";
2829
import { cn } from "./lib/utils";
2930
import packageJson from "./package.json";
3031

@@ -103,6 +104,7 @@ export const defaultRehypePlugins: Record<string, Pluggable> = {
103104

104105
export const defaultRemarkPlugins: Record<string, Pluggable> = {
105106
gfm: [remarkGfm, {}],
107+
cjkAutolinkBoundary: [remarkCjkAutolinkBoundary, {}],
106108
math: [remarkMath, { singleDollarTextMath: false }],
107109
cjkFriendly: [remarkCjkFriendly, {}],
108110
cjkFriendlyGfmStrikethrough: [remarkCjkFriendlyGfmStrikethrough, {}],
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import type { Link, Root, Text } from "mdast";
2+
import type { Plugin } from "unified";
3+
import { visit } from "unist-util-visit";
4+
import type { Parent } from "unist";
5+
6+
const CJK_AUTOLINK_BOUNDARY_CHARS = new Set<string>([
7+
"。",
8+
".",
9+
",",
10+
"、",
11+
"?",
12+
"!",
13+
":",
14+
";",
15+
"(",
16+
")",
17+
"【",
18+
"】",
19+
"「",
20+
"」",
21+
"『",
22+
"』",
23+
"〈",
24+
"〉",
25+
"《",
26+
"》",
27+
]);
28+
29+
const AUTOLINK_PREFIX_PATTERN = /^(https?:\/\/|mailto:|www\.)/i;
30+
31+
const isAutolinkLiteral = (node: Link): node is Link & { children: [Text] } => {
32+
if (node.children.length !== 1) {
33+
return false;
34+
}
35+
36+
const child = node.children[0];
37+
return child.type === "text" && child.value === node.url;
38+
};
39+
40+
const findCjkBoundaryIndex = (url: string): number | null => {
41+
let index = 0;
42+
for (const char of url) {
43+
if (CJK_AUTOLINK_BOUNDARY_CHARS.has(char)) {
44+
return index;
45+
}
46+
index += char.length;
47+
}
48+
return null;
49+
};
50+
51+
const buildAutolink = (url: string, source: Link): Link => ({
52+
...source,
53+
url,
54+
children: [
55+
{
56+
type: "text",
57+
value: url,
58+
},
59+
],
60+
});
61+
62+
const buildTrailingText = (value: string): Text => ({
63+
type: "text",
64+
value,
65+
});
66+
67+
// Split literal autolinks at CJK punctuation boundaries so trailing text
68+
// is not swallowed by the URL.
69+
export const remarkCjkAutolinkBoundary: Plugin<[], Root> = () => (tree) => {
70+
visit(tree, "link", (node: Link, index: number | null, parent?: Parent) => {
71+
if (!parent || typeof index !== "number") {
72+
return;
73+
}
74+
75+
if (!isAutolinkLiteral(node)) {
76+
return;
77+
}
78+
79+
if (!AUTOLINK_PREFIX_PATTERN.test(node.url)) {
80+
return;
81+
}
82+
83+
const boundaryIndex = findCjkBoundaryIndex(node.url);
84+
if (boundaryIndex === null || boundaryIndex === 0) {
85+
return;
86+
}
87+
88+
const trimmedUrl = node.url.slice(0, boundaryIndex);
89+
const trailing = node.url.slice(boundaryIndex);
90+
91+
const trimmedLink = buildAutolink(trimmedUrl, node);
92+
const trailingText = buildTrailingText(trailing);
93+
94+
parent.children.splice(index, 1, trimmedLink, trailingText);
95+
return index + 1;
96+
});
97+
};

0 commit comments

Comments
 (0)