Skip to content

Commit 041e82b

Browse files
committed
refactor(markdown-parser): promote fenced code block skipped trivia to explicit CST nodes
Replace 4 parse_as_skipped_trivia_tokens() call sites in fenced_code_block.rs: - Sites 1-3: blockquote > prefixes on continuation lines emit MdQuotePrefix nodes - Site 4: fence indent stripping emits MdIndentToken nodes Add MdIndentToken to AnyMdInline in the grammar and regenerate codegen. Add MdIndentToken no-op arm in to_html.rs extract_alt_text_inline. Add error fixture documenting pre-existing fenced-code-in-blockquote limitation. Extract try_bump_quote_marker as pub(crate) to deduplicate marker-bumping logic.
1 parent 1022662 commit 041e82b

File tree

9 files changed

+666
-146
lines changed

9 files changed

+666
-146
lines changed

crates/biome_markdown_formatter/src/markdown/any/inline.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ impl FormatRule<AnyMdInline> for FormatAnyMdInline {
1212
AnyMdInline::MdEntityReference(node) => node.format().fmt(f),
1313
AnyMdInline::MdHardLine(node) => node.format().fmt(f),
1414
AnyMdInline::MdHtmlBlock(node) => node.format().fmt(f),
15+
AnyMdInline::MdIndentToken(node) => node.format().fmt(f),
1516
AnyMdInline::MdInlineCode(node) => node.format().fmt(f),
1617
AnyMdInline::MdInlineEmphasis(node) => node.format().fmt(f),
1718
AnyMdInline::MdInlineHtml(node) => node.format().fmt(f),

crates/biome_markdown_parser/src/syntax/fenced_code_block.rs

Lines changed: 113 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ use biome_parser::{
3333
};
3434

3535
use crate::syntax::parse_error::unterminated_fenced_code;
36+
use crate::syntax::quote::try_bump_quote_marker;
3637
use crate::syntax::{MAX_BLOCK_PREFIX_INDENT, TAB_STOP_SPACES};
3738

3839
/// Minimum number of fence characters required per CommonMark §4.5.
@@ -276,70 +277,128 @@ fn parse_code_content(
276277

277278
// Consume all tokens until we see the matching closing fence or EOF
278279
while !p.at(T![EOF]) {
279-
if at_line_start && quote_depth > 0 {
280-
let prev_virtual = p.state().virtual_line_start;
281-
p.state_mut().virtual_line_start = Some(p.cur_range().start());
282-
p.skip_line_indent(MAX_BLOCK_PREFIX_INDENT);
283-
p.state_mut().virtual_line_start = prev_virtual;
284-
285-
let mut ok = true;
286-
for _ in 0..quote_depth {
287-
if p.at(MD_TEXTUAL_LITERAL) && p.cur_text().starts_with('>') {
288-
p.force_relex_regular();
289-
}
280+
match prepare_next_code_content_token(
281+
p,
282+
is_tilde_fence,
283+
fence_len,
284+
fence_indent,
285+
quote_depth,
286+
&mut at_line_start,
287+
) {
288+
CodeContentLoopAction::Break => break,
289+
CodeContentLoopAction::Continue => continue,
290+
CodeContentLoopAction::ConsumeText => {}
291+
}
290292

291-
if p.at(T![>]) {
292-
p.parse_as_skipped_trivia_tokens(|p| p.bump(T![>]));
293-
} else if p.at(MD_TEXTUAL_LITERAL) && p.cur_text() == ">" {
294-
p.parse_as_skipped_trivia_tokens(|p| p.bump_remap(T![>]));
295-
} else {
296-
ok = false;
297-
break;
298-
}
293+
consume_code_textual(p);
294+
at_line_start = false;
295+
}
299296

300-
if p.at(MD_TEXTUAL_LITERAL) {
301-
let text = p.cur_text();
302-
if text == " " || text == "\t" {
303-
p.parse_as_skipped_trivia_tokens(|p| p.bump(MD_TEXTUAL_LITERAL));
304-
}
305-
}
306-
}
297+
m.complete(p, MD_INLINE_ITEM_LIST);
298+
}
307299

308-
if !ok {
309-
break;
310-
}
311-
at_line_start = false;
312-
}
300+
enum CodeContentLoopAction {
301+
Break,
302+
Continue,
303+
ConsumeText,
304+
}
313305

314-
if p.at(NEWLINE) {
315-
// Preserve newlines as code content and reset virtual line start.
316-
let text_m = p.start();
317-
p.bump_remap(MD_TEXTUAL_LITERAL);
318-
text_m.complete(p, MD_TEXTUAL);
319-
p.set_virtual_line_start();
320-
at_line_start = true;
321-
continue;
306+
fn prepare_next_code_content_token(
307+
p: &mut MarkdownParser,
308+
is_tilde_fence: bool,
309+
fence_len: usize,
310+
fence_indent: usize,
311+
quote_depth: usize,
312+
at_line_start: &mut bool,
313+
) -> CodeContentLoopAction {
314+
if *at_line_start && quote_depth > 0 {
315+
if !consume_quote_prefixes_in_code_content(p, quote_depth) {
316+
return CodeContentLoopAction::Break;
322317
}
318+
*at_line_start = false;
319+
}
320+
321+
if consume_code_newline(p) {
322+
*at_line_start = true;
323+
return CodeContentLoopAction::Continue;
324+
}
325+
326+
if at_closing_fence(p, is_tilde_fence, fence_len) {
327+
return CodeContentLoopAction::Break;
328+
}
323329

330+
if *at_line_start && fence_indent > 0 {
331+
skip_fenced_content_indent(p, fence_indent);
324332
if at_closing_fence(p, is_tilde_fence, fence_len) {
325-
break;
333+
return CodeContentLoopAction::Break;
326334
}
335+
}
327336

328-
if at_line_start && fence_indent > 0 {
329-
skip_fenced_content_indent(p, fence_indent);
330-
if at_closing_fence(p, is_tilde_fence, fence_len) {
331-
break;
332-
}
337+
CodeContentLoopAction::ConsumeText
338+
}
339+
340+
fn consume_quote_prefixes_in_code_content(p: &mut MarkdownParser, quote_depth: usize) -> bool {
341+
let prev_virtual = p.state().virtual_line_start;
342+
p.state_mut().virtual_line_start = Some(p.cur_range().start());
343+
p.skip_line_indent(MAX_BLOCK_PREFIX_INDENT);
344+
p.state_mut().virtual_line_start = prev_virtual;
345+
346+
for _ in 0..quote_depth {
347+
if !consume_quote_prefix_in_code_content(p) {
348+
return false;
333349
}
350+
}
334351

335-
// Consume the token as code content (including NEWLINE tokens)
336-
let text_m = p.start();
337-
p.bump_remap(MD_TEXTUAL_LITERAL);
338-
text_m.complete(p, MD_TEXTUAL);
339-
at_line_start = false;
352+
true
353+
}
354+
355+
fn consume_quote_prefix_in_code_content(p: &mut MarkdownParser) -> bool {
356+
if p.at(MD_TEXTUAL_LITERAL) && p.cur_text().starts_with('>') {
357+
p.force_relex_regular();
340358
}
341359

342-
m.complete(p, MD_INLINE_ITEM_LIST);
360+
if !(p.at(T![>]) || (p.at(MD_TEXTUAL_LITERAL) && p.cur_text() == ">")) {
361+
return false;
362+
}
363+
364+
let prefix_m = p.start();
365+
366+
// Empty pre-marker indent list (initial indent handled by skip_line_indent).
367+
let indent_list_m = p.start();
368+
indent_list_m.complete(p, MD_QUOTE_INDENT_LIST);
369+
370+
debug_assert!(try_bump_quote_marker(p), "guard above guarantees marker present");
371+
372+
// Optional post-marker space
373+
if p.at(MD_TEXTUAL_LITERAL) {
374+
let text = p.cur_text();
375+
if text == " " || text == "\t" {
376+
p.bump_remap(MD_QUOTE_POST_MARKER_SPACE);
377+
}
378+
}
379+
380+
prefix_m.complete(p, MD_QUOTE_PREFIX);
381+
true
382+
}
383+
384+
fn consume_code_newline(p: &mut MarkdownParser) -> bool {
385+
if !p.at(NEWLINE) {
386+
return false;
387+
}
388+
389+
// Preserve newlines as code content and reset virtual line start.
390+
let text_m = p.start();
391+
p.bump_remap(MD_TEXTUAL_LITERAL);
392+
text_m.complete(p, MD_TEXTUAL);
393+
p.set_virtual_line_start();
394+
true
395+
}
396+
397+
fn consume_code_textual(p: &mut MarkdownParser) {
398+
// Consume the token as code content (including NEWLINE tokens).
399+
let text_m = p.start();
400+
p.bump_remap(MD_TEXTUAL_LITERAL);
401+
text_m.complete(p, MD_TEXTUAL);
343402
}
344403

345404
pub(crate) fn info_string_has_backtick(p: &mut MarkdownParser) -> bool {
@@ -390,7 +449,9 @@ fn skip_fenced_content_indent(p: &mut MarkdownParser, indent: usize) {
390449
}
391450

392451
consumed += width;
393-
p.parse_as_skipped_trivia_tokens(|p| p.bump(MD_TEXTUAL_LITERAL));
452+
let char_m = p.start();
453+
p.bump_remap(MD_INDENT_CHAR);
454+
char_m.complete(p, MD_INDENT_TOKEN);
394455
}
395456
}
396457

0 commit comments

Comments
 (0)