Skip to content

Commit 4f77634

Browse files
committed
Fix GFM task list checkboxes followed by eol
Closes GH-24.
1 parent d5ae7bb commit 4f77634

File tree

3 files changed

+114
-46
lines changed

3 files changed

+114
-46
lines changed

src/to_mdast.rs

Lines changed: 47 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ struct CompileContext<'a> {
9090
// compile markdown.
9191
character_reference_marker: u8,
9292
gfm_table_inside: bool,
93-
gfm_task_list_item_check_after: bool,
9493
hard_break_after: bool,
9594
heading_setext_text_after: bool,
9695
jsx_tag_stack: Vec<JsxTag>,
@@ -128,7 +127,6 @@ impl<'a> CompileContext<'a> {
128127
bytes,
129128
character_reference_marker: 0,
130129
gfm_table_inside: false,
131-
gfm_task_list_item_check_after: false,
132130
hard_break_after: false,
133131
heading_setext_text_after: false,
134132
jsx_tag_stack: vec![],
@@ -347,7 +345,6 @@ fn exit(context: &mut CompileContext) -> Result<(), String> {
347345
| Name::GfmTableRow
348346
| Name::GfmTableCell
349347
| Name::HeadingAtx
350-
| Name::ListItem
351348
| Name::ListOrdered
352349
| Name::ListUnordered
353350
| Name::Paragraph
@@ -358,6 +355,7 @@ fn exit(context: &mut CompileContext) -> Result<(), String> {
358355
Name::CharacterEscapeValue
359356
| Name::CodeFlowChunk
360357
| Name::CodeTextData
358+
| Name::Data
361359
| Name::FrontmatterChunk
362360
| Name::HtmlFlowData
363361
| Name::HtmlTextData
@@ -385,7 +383,6 @@ fn exit(context: &mut CompileContext) -> Result<(), String> {
385383
Name::CodeFenced | Name::MathFlow => on_exit_raw_flow(context)?,
386384
Name::CodeIndented => on_exit_code_indented(context)?,
387385
Name::CodeText | Name::MathText => on_exit_raw_text(context)?,
388-
Name::Data => on_exit_data_actual(context)?,
389386
Name::DefinitionDestinationString => on_exit_definition_destination_string(context),
390387
Name::DefinitionLabelString | Name::GfmFootnoteDefinitionLabelString => {
391388
on_exit_definition_id(context);
@@ -399,7 +396,6 @@ fn exit(context: &mut CompileContext) -> Result<(), String> {
399396
| Name::GfmAutolinkLiteralXmpp => on_exit_gfm_autolink_literal(context)?,
400397
Name::GfmFootnoteCall | Name::Image | Name::Link => on_exit_media(context)?,
401398
Name::GfmTable => on_exit_gfm_table(context)?,
402-
Name::GfmTaskListItemCheck => on_exit_gfm_task_list_item_check(context),
403399
Name::GfmTaskListItemValueUnchecked | Name::GfmTaskListItemValueChecked => {
404400
on_exit_gfm_task_list_item_value(context);
405401
}
@@ -411,6 +407,7 @@ fn exit(context: &mut CompileContext) -> Result<(), String> {
411407
Name::HtmlFlow | Name::HtmlText => on_exit_html(context)?,
412408
Name::LabelText => on_exit_label_text(context),
413409
Name::LineEnding => on_exit_line_ending(context)?,
410+
Name::ListItem => on_exit_list_item(context)?,
414411
Name::ListItemValue => on_exit_list_item_value(context),
415412
Name::MdxEsm | Name::MdxFlowExpression | Name::MdxTextExpression => {
416413
on_exit_mdx_esm_or_expression(context)?;
@@ -1089,29 +1086,6 @@ fn on_exit_data(context: &mut CompileContext) -> Result<(), String> {
10891086
Ok(())
10901087
}
10911088

1092-
/// Handle [`Exit`][Kind::Exit]:[`Data`][Name::Data] itself.
1093-
fn on_exit_data_actual(context: &mut CompileContext) -> Result<(), String> {
1094-
on_exit_data(context)?;
1095-
1096-
// This field is set when a check exits.
1097-
// When that’s the case, there’s always a `data` event right after it.
1098-
// That data event is the first child (after the check) of the paragraph.
1099-
// We update the text positional info (from the already fixed paragraph),
1100-
// and remove the first byte, which is always a space or tab.
1101-
if context.gfm_task_list_item_check_after {
1102-
let parent = context.tail_mut();
1103-
let start = parent.position().unwrap().start.clone();
1104-
let node = parent.children_mut().unwrap().last_mut().unwrap();
1105-
node.position_mut().unwrap().start = start;
1106-
if let Node::Text(node) = node {
1107-
node.value.remove(0);
1108-
}
1109-
context.gfm_task_list_item_check_after = false;
1110-
}
1111-
1112-
Ok(())
1113-
}
1114-
11151089
/// Handle [`Exit`][Kind::Exit]:[`DefinitionDestinationString`][Name::DefinitionDestinationString].
11161090
fn on_exit_definition_destination_string(context: &mut CompileContext) {
11171091
let value = context.resume().to_string();
@@ -1210,23 +1184,6 @@ fn on_exit_gfm_table(context: &mut CompileContext) -> Result<(), String> {
12101184
Ok(())
12111185
}
12121186

1213-
/// Handle [`Exit`][Kind::Exit]:[`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck].
1214-
fn on_exit_gfm_task_list_item_check(context: &mut CompileContext) {
1215-
// This field is set when a check exits.
1216-
// When that’s the case, there’s always a `data` event right after it.
1217-
// That data event is the first child (after the check) of the paragraph.
1218-
// We update the paragraph positional info to start after the check.
1219-
let mut start = point_from_event(&context.events[context.index]);
1220-
debug_assert!(
1221-
matches!(context.bytes[start.offset], b'\t' | b' '),
1222-
"expected tab or space after check"
1223-
);
1224-
start.column += 1;
1225-
start.offset += 1;
1226-
context.tail_mut().position_mut().unwrap().start = start;
1227-
context.gfm_task_list_item_check_after = true;
1228-
}
1229-
12301187
/// Handle [`Exit`][Kind::Exit]:{[`GfmTaskListItemValueChecked`][Name::GfmTaskListItemValueChecked],[`GfmTaskListItemValueUnchecked`][Name::GfmTaskListItemValueUnchecked]}.
12311188
fn on_exit_gfm_task_list_item_value(context: &mut CompileContext) {
12321189
let checked = context.events[context.index].name == Name::GfmTaskListItemValueChecked;
@@ -1416,6 +1373,51 @@ fn on_exit_media(context: &mut CompileContext) -> Result<(), String> {
14161373
Ok(())
14171374
}
14181375

1376+
/// Handle [`Exit`][Kind::Exit]:[`ListItem`][Name::ListItem].
1377+
fn on_exit_list_item(context: &mut CompileContext) -> Result<(), String> {
1378+
if let Node::ListItem(item) = context.tail_mut() {
1379+
if item.checked.is_some() {
1380+
if let Some(Node::Paragraph(paragraph)) = item.children.first_mut() {
1381+
if let Some(Node::Text(text)) = paragraph.children.first_mut() {
1382+
let mut point = text.position.as_ref().unwrap().start.clone();
1383+
let bytes = text.value.as_bytes();
1384+
let mut start = 0;
1385+
1386+
// Move past eol.
1387+
if matches!(bytes[0], b'\t' | b' ') {
1388+
point.offset += 1;
1389+
point.column += 1;
1390+
start += 1;
1391+
} else if matches!(bytes[0], b'\r' | b'\n') {
1392+
point.line += 1;
1393+
point.column = 1;
1394+
point.offset += 1;
1395+
start += 1;
1396+
// Move past the LF of CRLF.
1397+
if bytes.len() > 1 && bytes[0] == b'\r' && bytes[1] == b'\n' {
1398+
point.offset += 1;
1399+
start += 1;
1400+
}
1401+
}
1402+
1403+
// The whole text is whitespace: update the text.
1404+
if start == bytes.len() {
1405+
paragraph.children.remove(0);
1406+
} else {
1407+
text.value = str::from_utf8(&bytes[start..]).unwrap().into();
1408+
text.position.as_mut().unwrap().start = point.clone();
1409+
}
1410+
paragraph.position.as_mut().unwrap().start = point;
1411+
}
1412+
}
1413+
}
1414+
}
1415+
1416+
on_exit(context)?;
1417+
1418+
Ok(())
1419+
}
1420+
14191421
/// Handle [`Exit`][Kind::Exit]:[`ListItemValue`][Name::ListItemValue].
14201422
fn on_exit_list_item_value(context: &mut CompileContext) {
14211423
let start = Slice::from_position(

tests/fuzz.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,5 +109,13 @@ fn fuzz() -> Result<(), String> {
109109
"10: attention in different links (GH-21)"
110110
);
111111

112+
assert!(
113+
matches!(
114+
to_mdast("* [ ]\na", &Default::default()),
115+
Ok(mdast::Node::Root(_))
116+
),
117+
"11: gfm task list items followed by eols (GH-24)"
118+
);
119+
112120
Ok(())
113121
}

tests/gfm_task_list_item.rs

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use markdown::{
2-
mdast::{List, ListItem, Node, Paragraph, Root, Text},
2+
mdast::{Emphasis, List, ListItem, Node, Paragraph, Root, Text},
33
to_html, to_html_with_options, to_mdast,
44
unist::Position,
55
Options, ParseOptions,
@@ -291,5 +291,63 @@ Text.</li>
291291
"should support task list items as `checked` fields on `ListItem`s in mdast"
292292
);
293293

294+
assert_eq!(
295+
to_mdast(
296+
"* [x]\r\n a\n* [ ] b\n* [x]\t \r*c*",
297+
&ParseOptions::gfm()
298+
)?,
299+
Node::Root(Root {
300+
children: vec![Node::List(List {
301+
ordered: false,
302+
spread: false,
303+
start: None,
304+
children: vec![
305+
Node::ListItem(ListItem {
306+
checked: Some(true),
307+
spread: false,
308+
children: vec![Node::Paragraph(Paragraph {
309+
children: vec![Node::Text(Text {
310+
value: "a".into(),
311+
position: Some(Position::new(2, 1, 7, 2, 4, 10))
312+
}),],
313+
position: Some(Position::new(2, 1, 7, 2, 4, 10))
314+
})],
315+
position: Some(Position::new(1, 1, 0, 2, 4, 10))
316+
}),
317+
Node::ListItem(ListItem {
318+
checked: Some(false),
319+
spread: false,
320+
children: vec![Node::Paragraph(Paragraph {
321+
children: vec![Node::Text(Text {
322+
value: " b".into(),
323+
position: Some(Position::new(3, 7, 17, 3, 10, 20))
324+
}),],
325+
position: Some(Position::new(3, 7, 17, 3, 10, 20))
326+
})],
327+
position: Some(Position::new(3, 1, 11, 3, 10, 20))
328+
}),
329+
Node::ListItem(ListItem {
330+
checked: Some(true),
331+
spread: false,
332+
children: vec![Node::Paragraph(Paragraph {
333+
children: vec![Node::Emphasis(Emphasis {
334+
children: vec![Node::Text(Text {
335+
value: "c".into(),
336+
position: Some(Position::new(5, 2, 30, 5, 3, 31))
337+
}),],
338+
position: Some(Position::new(5, 1, 29, 5, 4, 32))
339+
})],
340+
position: Some(Position::new(5, 1, 29, 5, 4, 32))
341+
})],
342+
position: Some(Position::new(4, 1, 21, 5, 4, 32))
343+
}),
344+
],
345+
position: Some(Position::new(1, 1, 0, 5, 4, 32))
346+
})],
347+
position: Some(Position::new(1, 1, 0, 5, 4, 32))
348+
}),
349+
"should handle lots of whitespace after checkbox, and non-text"
350+
);
351+
294352
Ok(())
295353
}

0 commit comments

Comments
 (0)