Skip to content

Commit 412a08d

Browse files
authored
refactor(markdown-parser): deduplicate link scan/parse with BumpMode (#9192)
1 parent 84935a4 commit 412a08d

File tree

1 file changed

+28
-137
lines changed
  • crates/biome_markdown_parser/src/syntax/inline

1 file changed

+28
-137
lines changed

crates/biome_markdown_parser/src/syntax/inline/links.rs

Lines changed: 28 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,7 @@ fn bump_textual_link_def(p: &mut MarkdownParser) {
496496
p.bump_remap_with_context(MD_TEXTUAL_LITERAL, MarkdownLexContext::LinkDefinition);
497497
item.complete(p, MD_TEXTUAL);
498498
}
499+
499500
fn is_whitespace_token(p: &MarkdownParser) -> bool {
500501
let text = p.cur_text();
501502
!text.is_empty() && text.chars().all(|c| c == ' ' || c == '\t')
@@ -531,9 +532,14 @@ fn inline_link_is_valid(p: &mut MarkdownParser) -> InlineLinkValidation {
531532
}
532533

533534
p.bump(L_PAREN);
534-
p.re_lex_link_definition();
535535

536-
let destination_result = scan_inline_link_destination_tokens(p);
536+
// Skip leading whitespace before angle bracket check
537+
// (parse_inline_link_destination_tokens only skips whitespace in the raw path).
538+
while is_title_separator_token(p) {
539+
bump_link_def_separator(p);
540+
}
541+
542+
let destination_result = parse_inline_link_destination_tokens(p);
537543

538544
// If depth exceeded, link is valid but truncated - no need to check for closing paren
539545
if destination_result == DestinationScanResult::DepthExceeded {
@@ -546,20 +552,17 @@ fn inline_link_is_valid(p: &mut MarkdownParser) -> InlineLinkValidation {
546552

547553
let mut saw_separator = false;
548554
while is_title_separator_token(p) {
549-
skip_link_def_separator_tokens(p);
555+
bump_link_def_separator(p);
550556
saw_separator = true;
551557
}
552558
let has_title = saw_separator && get_title_close_char(p).is_some();
553-
while is_title_separator_token(p) {
554-
skip_link_def_separator_tokens(p);
555-
}
556559

557560
if has_title {
558-
scan_title_content(p, get_title_close_char(p));
561+
parse_title_content(p, get_title_close_char(p));
559562
}
560563

561564
while is_title_separator_token(p) {
562-
skip_link_def_separator_tokens(p);
565+
bump_link_def_separator(p);
563566
}
564567

565568
if p.at(R_PAREN) {
@@ -582,14 +585,14 @@ enum DestinationScanResult {
582585
DepthExceeded,
583586
}
584587

585-
fn scan_inline_link_destination_tokens(p: &mut MarkdownParser) -> DestinationScanResult {
588+
fn parse_inline_link_destination_tokens(p: &mut MarkdownParser) -> DestinationScanResult {
586589
const MAX_PAREN_DEPTH: i32 = MAX_LINK_DESTINATION_PAREN_DEPTH;
587-
// Skip leading whitespace to match parse_inline_link_destination_tokens behavior
588-
while is_title_separator_token(p) {
589-
skip_link_def_separator_tokens(p);
590-
}
590+
591+
p.re_lex_link_definition();
592+
593+
// Enclosed destination: <url>
591594
if p.at(L_ANGLE) {
592-
p.bump_link_definition();
595+
bump_textual_link_def(p);
593596
let mut pending_escape = false;
594597
loop {
595598
if p.at(EOF) || p.at(NEWLINE) {
@@ -604,10 +607,10 @@ fn scan_inline_link_destination_tokens(p: &mut MarkdownParser) -> DestinationSca
604607
) {
605608
return DestinationScanResult::Invalid;
606609
}
607-
p.bump_link_definition();
610+
bump_textual_link_def(p);
608611
continue;
609612
}
610-
p.bump_link_definition();
613+
bump_textual_link_def(p);
611614
return DestinationScanResult::Valid;
612615
}
613616
if !validate_link_destination_text(
@@ -617,12 +620,19 @@ fn scan_inline_link_destination_tokens(p: &mut MarkdownParser) -> DestinationSca
617620
) {
618621
return DestinationScanResult::Invalid;
619622
}
620-
p.bump_link_definition();
623+
bump_textual_link_def(p);
621624
}
622625
}
623626

627+
// Raw destination (no angle brackets)
624628
let mut paren_depth: i32 = 0;
625629
let mut pending_escape = false;
630+
631+
// Skip leading whitespace in raw path.
632+
while is_title_separator_token(p) {
633+
bump_link_def_separator(p);
634+
}
635+
626636
while !p.at(EOF) && !p.at(NEWLINE) {
627637
if is_whitespace_token(p) {
628638
break;
@@ -634,16 +644,12 @@ fn scan_inline_link_destination_tokens(p: &mut MarkdownParser) -> DestinationSca
634644
match try_update_paren_depth(text, paren_depth, MAX_PAREN_DEPTH) {
635645
ParenDepthResult::Ok(next_depth) => {
636646
paren_depth = next_depth;
637-
p.bump_link_definition();
647+
bump_textual_link_def(p);
638648
}
639649
ParenDepthResult::DepthExceeded => {
640-
// Paren depth exceeded - destination is truncated at this point.
641-
// Per CommonMark/cmark, the link is still valid but closed here.
642650
return DestinationScanResult::DepthExceeded;
643651
}
644652
ParenDepthResult::UnmatchedClose => {
645-
// Unmatched closing paren - destination ends here normally.
646-
// The `)` belongs to the enclosing construct (inline link closer).
647653
break;
648654
}
649655
}
@@ -661,41 +667,6 @@ fn scan_inline_link_destination_tokens(p: &mut MarkdownParser) -> DestinationSca
661667
DestinationScanResult::Valid
662668
}
663669

664-
fn scan_title_content(p: &mut MarkdownParser, close_char: Option<char>) {
665-
let Some(close_char) = close_char else {
666-
return;
667-
};
668-
669-
let text = p.cur_text();
670-
let is_complete = text.len() >= 2 && ends_with_unescaped_close(text, close_char);
671-
672-
p.bump_link_definition();
673-
if is_complete {
674-
return;
675-
}
676-
677-
loop {
678-
// Stop on EOF or blank line (titles cannot span blank lines per CommonMark)
679-
if p.at(EOF) || p.at_blank_line() {
680-
return;
681-
}
682-
683-
// Continue through single newlines (titles can span non-blank lines)
684-
if p.at(NEWLINE) {
685-
skip_link_def_separator_tokens(p);
686-
continue;
687-
}
688-
689-
let text = p.cur_text();
690-
if ends_with_unescaped_close(text, close_char) {
691-
p.bump_link_definition();
692-
return;
693-
}
694-
695-
p.bump_link_definition();
696-
}
697-
}
698-
699670
fn skip_link_def_separator_tokens(p: &mut MarkdownParser) {
700671
if p.at(NEWLINE) {
701672
p.bump(NEWLINE);
@@ -718,86 +689,6 @@ fn bump_link_def_separator(p: &mut MarkdownParser) {
718689
}
719690
}
720691

721-
fn parse_inline_link_destination_tokens(p: &mut MarkdownParser) -> DestinationScanResult {
722-
p.re_lex_link_definition();
723-
const MAX_PAREN_DEPTH: i32 = MAX_LINK_DESTINATION_PAREN_DEPTH;
724-
725-
if p.at(L_ANGLE) {
726-
bump_textual_link_def(p);
727-
let mut pending_escape = false;
728-
loop {
729-
if p.at(EOF) || p.at(NEWLINE) {
730-
return DestinationScanResult::Invalid;
731-
}
732-
if p.at(R_ANGLE) {
733-
if pending_escape {
734-
if !validate_link_destination_text(
735-
p.cur_text(),
736-
LinkDestinationKind::Enclosed,
737-
&mut pending_escape,
738-
) {
739-
return DestinationScanResult::Invalid;
740-
}
741-
bump_textual_link_def(p);
742-
continue;
743-
}
744-
bump_textual_link_def(p);
745-
return DestinationScanResult::Valid;
746-
}
747-
if !validate_link_destination_text(
748-
p.cur_text(),
749-
LinkDestinationKind::Enclosed,
750-
&mut pending_escape,
751-
) {
752-
return DestinationScanResult::Invalid;
753-
}
754-
bump_textual_link_def(p);
755-
}
756-
}
757-
758-
let mut paren_depth: i32 = 0;
759-
let mut pending_escape = false;
760-
while is_title_separator_token(p) {
761-
bump_link_def_separator(p);
762-
}
763-
while !p.at(EOF) && !p.at(NEWLINE) {
764-
if is_whitespace_token(p) {
765-
break;
766-
}
767-
768-
let text = p.cur_text();
769-
if !validate_link_destination_text(text, LinkDestinationKind::Raw, &mut pending_escape) {
770-
return DestinationScanResult::Invalid;
771-
}
772-
match try_update_paren_depth(text, paren_depth, MAX_PAREN_DEPTH) {
773-
ParenDepthResult::Ok(next_depth) => {
774-
paren_depth = next_depth;
775-
bump_textual_link_def(p);
776-
}
777-
ParenDepthResult::DepthExceeded => {
778-
// Paren depth exceeded - destination is truncated at this point.
779-
return DestinationScanResult::DepthExceeded;
780-
}
781-
ParenDepthResult::UnmatchedClose => {
782-
// Unmatched closing paren - destination ends here normally.
783-
// The `)` belongs to the enclosing construct (inline link closer).
784-
break;
785-
}
786-
}
787-
}
788-
if p.at(EOF) {
789-
return DestinationScanResult::Invalid;
790-
}
791-
if p.at(NEWLINE) {
792-
return if p.at_blank_line() {
793-
DestinationScanResult::Invalid
794-
} else {
795-
DestinationScanResult::Valid
796-
};
797-
}
798-
DestinationScanResult::Valid
799-
}
800-
801692
fn get_title_close_char(p: &MarkdownParser) -> Option<char> {
802693
let text = p.cur_text();
803694
if text.starts_with('"') {

0 commit comments

Comments
 (0)