Skip to content

Commit 3da819f

Browse files
authored
Merge pull request #2960 from gibson042/gh-2959-markdown-links
fix: Markdown link parsing
2 parents 6f6f3cf + bb45c31 commit 3da819f

File tree

3 files changed

+286
-45
lines changed

3 files changed

+286
-45
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ title: Changelog
77
### Bug Fixes
88

99
- Attempting to highlight a supported language which is not enabled is now a warning, not an error, #2956.
10+
- Improved compatibility with CommonMark's link parsing, #2959.
1011

1112
## v0.28.5 (2025-05-26)
1213

src/lib/converter/comments/textParser.ts

Lines changed: 78 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ interface TextParserData {
1919
sourcePath: NormalizedPath;
2020
token: Token;
2121
pos: number;
22-
i18n: TranslationProxy;
2322
warning: (msg: TranslatedString, token: Token) => void;
2423
files: FileRegistry;
2524
atNewLine: boolean;
@@ -41,18 +40,21 @@ interface RelativeLink {
4140
*/
4241
export class TextParserReentryState {
4342
withinLinkLabel = false;
43+
withinLinkDest = false;
4444
private lastPartWasNewline = false;
4545

4646
checkState(token: Token) {
4747
switch (token.kind) {
4848
case TokenSyntaxKind.Code:
4949
if (/\n\s*\n/.test(token.text)) {
5050
this.withinLinkLabel = false;
51+
this.withinLinkDest = false;
5152
}
5253
break;
5354
case TokenSyntaxKind.NewLine:
5455
if (this.lastPartWasNewline) {
5556
this.withinLinkLabel = false;
57+
this.withinLinkDest = false;
5658
}
5759
break;
5860
}
@@ -76,17 +78,18 @@ export function textContent(
7678
reentry: TextParserReentryState,
7779
) {
7880
let lastPartEnd = 0;
81+
let canEndMarkdownLink = true;
7982
const data: TextParserData = {
8083
sourcePath,
8184
token,
8285
pos: 0, // relative to the token
83-
i18n,
8486
warning,
8587
files: files,
8688
atNewLine,
8789
};
8890

8991
function addRef(ref: RelativeLink) {
92+
canEndMarkdownLink = true;
9093
outContent.push({
9194
kind: "text",
9295
text: token.text.slice(lastPartEnd, ref.pos),
@@ -116,10 +119,15 @@ export function textContent(
116119
}
117120

118121
while (data.pos < token.text.length) {
119-
const link = checkMarkdownLink(data, reentry);
120-
if (link) {
121-
addRef(link);
122-
continue;
122+
if (canEndMarkdownLink) {
123+
const link = checkMarkdownLink(data, reentry);
124+
if (link) {
125+
addRef(link);
126+
continue;
127+
}
128+
// If we're within a Markdown link, then `checkMarkdownLink`
129+
// already scanned `token` up to a line feed (if any).
130+
canEndMarkdownLink = !reentry.withinLinkLabel && !reentry.withinLinkDest;
123131
}
124132

125133
const reference = checkReference(data);
@@ -134,7 +142,9 @@ export function textContent(
134142
continue;
135143
}
136144

137-
data.atNewLine = token.text[data.pos] === "\n";
145+
const atNewLine = token.text[data.pos] === "\n";
146+
data.atNewLine = atNewLine;
147+
if (atNewLine && !reentry.withinLinkDest) canEndMarkdownLink = true;
138148
++data.pos;
139149
}
140150

@@ -160,53 +170,73 @@ function checkMarkdownLink(
160170
const { token, sourcePath, files } = data;
161171

162172
let searchStart: number;
163-
if (reentry.withinLinkLabel) {
173+
if (reentry.withinLinkLabel || reentry.withinLinkDest) {
164174
searchStart = data.pos;
165-
reentry.withinLinkLabel = false;
166175
} else if (token.text[data.pos] === "[") {
167176
searchStart = data.pos + 1;
168177
} else {
169178
return;
170179
}
171180

172-
const labelEnd = findLabelEnd(token.text, searchStart);
173-
if (labelEnd === -1) {
174-
// This markdown link might be split across multiple display parts
175-
// [ `text` ](link)
176-
// ^^ text
177-
// ^^^^^^ code
178-
// ^^^^^^^^ text
179-
reentry.withinLinkLabel = true;
180-
return;
181+
if (!reentry.withinLinkDest) {
182+
const labelEnd = findLabelEnd(token.text, searchStart);
183+
if (labelEnd === -1 || token.text[labelEnd] === "\n") {
184+
// This markdown link might be split across multiple lines or input tokens
185+
// [prefix `code` suffix](target)
186+
// ........^^^^^^................
187+
// Unless we encounter two consecutive line feeds, expect it to keep going.
188+
reentry.withinLinkLabel = labelEnd !== data.pos || !data.atNewLine;
189+
return;
190+
}
191+
reentry.withinLinkLabel = false;
192+
if (!token.text.startsWith("](", labelEnd)) return;
193+
searchStart = labelEnd + 2;
181194
}
182195

183-
if (token.text[labelEnd] === "]" && token.text[labelEnd + 1] === "(") {
184-
const link = MdHelpers.parseLinkDestination(
185-
token.text,
186-
labelEnd + 2,
187-
token.text.length,
188-
);
189-
190-
if (link.ok) {
191-
// Only make a relative-link display part if it's actually a relative link.
192-
// Discard protocol:// links, unix style absolute paths, and windows style absolute paths.
193-
if (isRelativePath(link.str)) {
194-
const { target, anchor } = files.register(
195-
sourcePath,
196-
link.str as NormalizedPath,
197-
) || { target: undefined, anchor: undefined };
198-
return {
199-
pos: labelEnd + 2,
200-
end: link.pos,
201-
target,
202-
targetAnchor: anchor,
203-
};
204-
}
205-
206-
// This was a link, skip ahead to ensure we don't happen to parse
207-
// something else as a link within the link.
208-
data.pos = link.pos - 1;
196+
// Skip whitespace (including line breaks) between "](" and the link destination.
197+
// https://spec.commonmark.org/0.31.2/#links
198+
const end = token.text.length;
199+
let lookahead = searchStart;
200+
for (let newlines = 0;; ++lookahead) {
201+
if (lookahead === end) {
202+
reentry.withinLinkDest = true;
203+
return;
209204
}
205+
switch (token.text[lookahead]) {
206+
case "\n":
207+
if (++newlines === 2) {
208+
reentry.withinLinkDest = false;
209+
return;
210+
}
211+
continue;
212+
case " ":
213+
case "\t":
214+
continue;
215+
}
216+
break;
217+
}
218+
reentry.withinLinkDest = false;
219+
220+
const link = MdHelpers.parseLinkDestination(token.text, lookahead, end);
221+
if (link.ok) {
222+
// Only make a relative-link display part if it's actually a relative link.
223+
// Discard protocol:// links, unix style absolute paths, and windows style absolute paths.
224+
if (isRelativePath(link.str)) {
225+
const { target, anchor } = files.register(
226+
sourcePath,
227+
link.str as NormalizedPath,
228+
) || { target: undefined, anchor: undefined };
229+
return {
230+
pos: lookahead,
231+
end: link.pos,
232+
target,
233+
targetAnchor: anchor,
234+
};
235+
}
236+
237+
// This was a link, skip ahead to ensure we don't happen to parse
238+
// something else as a link within the link.
239+
data.pos = link.pos - 1;
210240
}
211241
}
212242

@@ -328,6 +358,10 @@ function isRelativePath(link: string) {
328358
function findLabelEnd(text: string, pos: number) {
329359
while (pos < text.length) {
330360
switch (text[pos]) {
361+
case "\\":
362+
++pos;
363+
if (pos < text.length && text[pos] === "\n") return pos;
364+
break;
331365
case "\n":
332366
case "]":
333367
case "[":

0 commit comments

Comments
 (0)