Skip to content

Commit 2258ceb

Browse files
cscheidclaude
andauthored
Feature/6 (#49)
* Fix pipe table parsing with code spans containing pipes (#29) Added code span recognition to pipe table cell parsing. The block grammar now properly handles backtick code spans like `|` within table cells by repeating the inline grammar's code span parsing logic in the block context. Changes: - Added CODE_SPAN_START and CODE_SPAN_CLOSE external tokens to block grammar - Implemented parse_code_span() in block scanner with lookahead for matching delimiters - Added _pipe_table_code_span rule to parse code spans within table cells - Updated scanner state serialization to track code span delimiter length 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]> * Add tests for pipe tables with code spans containing pipes (issue #29) These tests verify that the parser correctly handles code spans containing pipe characters within pipe tables. The fix in the block parser now properly parses code spans to avoid treating pipes inside backticks as table delimiters. Test cases cover: - Simple code span with single pipe - Multiple code spans with pipes in different cells - Mixed backtick delimiters (double and triple backticks) All tests pass and match Pandoc's output. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]> * Add support to (@) lists (#6) --------- Co-authored-by: Claude <[email protected]>
1 parent ab92d47 commit 2258ceb

File tree

18 files changed

+42303
-38669
lines changed

18 files changed

+42303
-38669
lines changed

crates/quarto-markdown-pandoc/src/pandoc/ast_context.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,31 +3,39 @@
33
* Copyright (c) 2025 Posit, PBC
44
*/
55

6+
use std::cell::Cell;
7+
68
/// Context passed through the parsing pipeline to provide information
79
/// about the current parse operation and manage string ownership.
810
/// The filenames vector will eventually be used to deduplicate strings
911
/// in the AST by storing indices instead of cloning strings.
1012
#[derive(Debug, Clone)]
1113
pub struct ASTContext {
1214
pub filenames: Vec<String>,
15+
/// Counter for example list numbering across the document
16+
/// Example lists continue numbering even when interrupted by other content
17+
pub example_list_counter: Cell<usize>,
1318
}
1419

1520
impl ASTContext {
1621
pub fn new() -> Self {
1722
ASTContext {
1823
filenames: Vec::new(),
24+
example_list_counter: Cell::new(1),
1925
}
2026
}
2127

2228
pub fn with_filename(filename: impl Into<String>) -> Self {
2329
ASTContext {
2430
filenames: vec![filename.into()],
31+
example_list_counter: Cell::new(1),
2532
}
2633
}
2734

2835
pub fn anonymous() -> Self {
2936
ASTContext {
3037
filenames: Vec::new(),
38+
example_list_counter: Cell::new(1),
3139
}
3240
}
3341

crates/quarto-markdown-pandoc/src/pandoc/list.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
77
pub enum ListNumberStyle {
88
Default,
9+
Example,
910
Decimal,
1011
LowerRoman,
1112
UpperRoman,

crates/quarto-markdown-pandoc/src/pandoc/treesitter.rs

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,10 @@ fn process_list(
112112
// this is a marker node, we don't need to do anything with it
113113
continue;
114114
}
115-
if node == "list_marker_parenthesis" || node == "list_marker_dot" {
115+
if node == "list_marker_parenthesis"
116+
|| node == "list_marker_dot"
117+
|| node == "list_marker_example"
118+
{
116119
// this is an ordered list, so we need to set the flag
117120
let PandocNativeIntermediate::IntermediateOrderedListMarker(marker_number, _) = child
118121
else {
@@ -121,10 +124,14 @@ fn process_list(
121124

122125
is_ordered_list = Some((
123126
marker_number,
124-
ListNumberStyle::Decimal,
127+
match node.as_str() {
128+
"list_marker_example" => ListNumberStyle::Example,
129+
_ => ListNumberStyle::Decimal,
130+
},
125131
match node.as_str() {
126132
"list_marker_parenthesis" => ListNumberDelim::OneParen,
127133
"list_marker_dot" => ListNumberDelim::Period,
134+
"list_marker_example" => ListNumberDelim::TwoParens,
128135
_ => panic!("Unexpected list marker node: {}", node),
129136
},
130137
));
@@ -244,7 +251,14 @@ fn process_list(
244251
};
245252

246253
match is_ordered_list {
247-
Some(attr) => {
254+
Some(mut attr) => {
255+
// For example lists, use and update the global counter
256+
if attr.1 == ListNumberStyle::Example {
257+
let start_num = context.example_list_counter.get();
258+
attr.0 = start_num;
259+
// Increment counter by the number of items in this list
260+
context.example_list_counter.set(start_num + content.len());
261+
}
248262
PandocNativeIntermediate::IntermediateBlock(Block::OrderedList(OrderedList {
249263
attr,
250264
content,
@@ -267,7 +281,10 @@ fn process_list_item(
267281
let children = children
268282
.into_iter()
269283
.filter_map(|(node, child)| {
270-
if node == "list_marker_dot" || node == "list_marker_parenthesis" {
284+
if node == "list_marker_dot"
285+
|| node == "list_marker_parenthesis"
286+
|| node == "list_marker_example"
287+
{
271288
// this is an ordered list, so we need to set the flag
272289
let PandocNativeIntermediate::IntermediateOrderedListMarker(marker_number, _) =
273290
child
@@ -276,10 +293,14 @@ fn process_list_item(
276293
};
277294
list_attr = Some((
278295
marker_number,
279-
ListNumberStyle::Decimal,
296+
match node.as_str() {
297+
"list_marker_example" => ListNumberStyle::Example,
298+
_ => ListNumberStyle::Decimal,
299+
},
280300
match node.as_str() {
281301
"list_marker_parenthesis" => ListNumberDelim::OneParen,
282302
"list_marker_dot" => ListNumberDelim::Period,
303+
"list_marker_example" => ListNumberDelim::TwoParens,
283304
_ => panic!("Unexpected list marker node: {}", node),
284305
},
285306
));
@@ -568,7 +589,7 @@ fn native_visitor<T: Write>(
568589
"shortcode_boolean" => process_shortcode_boolean(node, input_bytes, context),
569590
"shortcode_number" => process_shortcode_number(node, input_bytes, context),
570591
"code_fence_content" => process_code_fence_content(node, children, input_bytes, context),
571-
"list_marker_parenthesis" | "list_marker_dot" => {
592+
"list_marker_parenthesis" | "list_marker_dot" | "list_marker_example" => {
572593
process_list_marker(node, input_bytes, context)
573594
}
574595
// These are marker nodes, we don't need to do anything with it

crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/list_marker.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,19 @@ pub fn process_list_marker(
2020
// we trim both ends instead of just trim_end()
2121
// because the lexer might hand us a marker with tabs at the beginning,
2222
// as a result of weird mixed-spaces-and-tabs cases like "> \t1."
23-
.trim()
23+
.trim();
24+
25+
// Check if this is an example list marker (@)
26+
if marker_text == "(@)" {
27+
// For example lists, we use 1 as the starting number
28+
// The actual numbering will be handled in postprocessing
29+
return PandocNativeIntermediate::IntermediateOrderedListMarker(
30+
1,
31+
node_source_info_with_context(node, context).range,
32+
);
33+
}
34+
35+
let marker_text = marker_text
2436
.trim_end_matches('.')
2537
.trim_end_matches(')')
2638
.to_string();

crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/postprocess.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,6 @@ fn transform_definition_list_div(div: Div) -> Block {
259259
})
260260
}
261261

262-
263262
/// Apply post-processing transformations to the Pandoc AST
264263
pub fn postprocess(doc: Pandoc) -> Result<Pandoc, Vec<String>> {
265264
let mut errors = Vec::new();

crates/quarto-markdown-pandoc/src/readers/json.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -640,7 +640,10 @@ fn read_ast_context(value: &Value) -> Result<ASTContext> {
640640
})
641641
.collect::<Result<Vec<_>>>()?;
642642

643-
Ok(ASTContext { filenames })
643+
Ok(ASTContext {
644+
filenames,
645+
example_list_counter: std::cell::Cell::new(1),
646+
})
644647
}
645648

646649
pub fn read<R: std::io::Read>(reader: &mut R) -> Result<(Pandoc, ASTContext)> {

crates/quarto-markdown-pandoc/src/writers/json.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ fn write_list_attributes(attr: &ListAttributes) -> Value {
192192
crate::pandoc::ListNumberStyle::UpperAlpha => json!({"t": "UpperAlpha"}),
193193
crate::pandoc::ListNumberStyle::LowerRoman => json!({"t": "LowerRoman"}),
194194
crate::pandoc::ListNumberStyle::UpperRoman => json!({"t": "UpperRoman"}),
195+
crate::pandoc::ListNumberStyle::Example => json!({"t": "Example"}),
195196
crate::pandoc::ListNumberStyle::Default => json!({"t": "Default"}),
196197
};
197198
let number_delimiter = match attr.2 {

crates/quarto-markdown-pandoc/src/writers/native.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ fn write_list_number_style<T: std::io::Write>(
358358
crate::pandoc::ListNumberStyle::UpperAlpha => write!(buf, "UpperAlpha"),
359359
crate::pandoc::ListNumberStyle::LowerRoman => write!(buf, "LowerRoman"),
360360
crate::pandoc::ListNumberStyle::UpperRoman => write!(buf, "UpperRoman"),
361+
crate::pandoc::ListNumberStyle::Example => write!(buf, "Example"),
361362
crate::pandoc::ListNumberStyle::Default => write!(buf, "Decimal"), // Is this supposed to be the default?
362363
}
363364
}

crates/quarto-markdown-pandoc/src/writers/qmd.rs

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
use crate::pandoc::attr::is_empty_attr;
77
use crate::pandoc::block::MetaBlock;
8-
use crate::pandoc::list::ListNumberDelim;
8+
use crate::pandoc::list::{ListNumberDelim, ListNumberStyle};
99
use crate::pandoc::meta::MetaValue;
1010
use crate::pandoc::table::{Alignment, Cell, Table};
1111
use crate::pandoc::{
@@ -101,12 +101,18 @@ struct OrderedListContext<'a, W: Write + ?Sized> {
101101
at_line_start: bool,
102102
is_first_line: bool,
103103
number: usize,
104+
number_style: ListNumberStyle,
104105
delimiter: ListNumberDelim,
105106
indent: String,
106107
}
107108

108109
impl<'a, W: Write + ?Sized> OrderedListContext<'a, W> {
109-
fn new(inner: &'a mut W, number: usize, delimiter: ListNumberDelim) -> Self {
110+
fn new(
111+
inner: &'a mut W,
112+
number: usize,
113+
number_style: ListNumberStyle,
114+
delimiter: ListNumberDelim,
115+
) -> Self {
110116
// Pandoc uses consistent spacing: for numbers < 10, uses two spaces after delimiter
111117
// For numbers >= 10, uses one space. Continuation lines always use 4 spaces indent.
112118
let indent = " ".to_string(); // Always 4 spaces for continuation lines
@@ -116,6 +122,7 @@ impl<'a, W: Write + ?Sized> OrderedListContext<'a, W> {
116122
at_line_start: true,
117123
is_first_line: true,
118124
number,
125+
number_style,
119126
delimiter,
120127
indent,
121128
}
@@ -128,18 +135,23 @@ impl<'a, W: Write + ?Sized> Write for OrderedListContext<'a, W> {
128135
for &byte in buf {
129136
if self.at_line_start {
130137
if self.is_first_line {
131-
let delim_str = match self.delimiter {
132-
ListNumberDelim::Period => ".",
133-
ListNumberDelim::OneParen => ")",
134-
ListNumberDelim::TwoParens => ")",
135-
_ => ".",
136-
};
137-
// Pandoc style: numbers < 10 get two spaces after delimiter,
138-
// numbers >= 10 get one space
139-
if self.number < 10 {
140-
write!(self.inner, "{}{} ", self.number, delim_str)?;
138+
// For example lists, always use (@) marker
139+
if matches!(self.number_style, ListNumberStyle::Example) {
140+
write!(self.inner, "(@) ")?;
141141
} else {
142-
write!(self.inner, "{}{} ", self.number, delim_str)?;
142+
let delim_str = match self.delimiter {
143+
ListNumberDelim::Period => ".",
144+
ListNumberDelim::OneParen => ")",
145+
ListNumberDelim::TwoParens => ")",
146+
_ => ".",
147+
};
148+
// Pandoc style: numbers < 10 get two spaces after delimiter,
149+
// numbers >= 10 get one space
150+
if self.number < 10 {
151+
write!(self.inner, "{}{} ", self.number, delim_str)?;
152+
} else {
153+
write!(self.inner, "{}{} ", self.number, delim_str)?;
154+
}
143155
}
144156
self.is_first_line = false;
145157
} else {
@@ -331,7 +343,7 @@ fn write_orderedlist(
331343
orderedlist: &OrderedList,
332344
buf: &mut dyn std::io::Write,
333345
) -> std::io::Result<()> {
334-
let (start_num, _number_style, delimiter) = &orderedlist.attr;
346+
let (start_num, number_style, delimiter) = &orderedlist.attr;
335347

336348
// Determine if this is a tight list
337349
// A list is tight if the first block of all items is Plain (not Para)
@@ -346,7 +358,8 @@ fn write_orderedlist(
346358
writeln!(buf)?;
347359
}
348360
let current_num = start_num + i;
349-
let mut item_writer = OrderedListContext::new(buf, current_num, delimiter.clone());
361+
let mut item_writer =
362+
OrderedListContext::new(buf, current_num, number_style.clone(), delimiter.clone());
350363
for (j, block) in item.iter().enumerate() {
351364
if j > 0 && !is_tight {
352365
// Add a blank line between blocks within a list item in loose lists
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
(@) First item
2+
3+
(@) Second item
4+
5+
Some text in between.
6+
7+
(@) Third item
8+
9+
(@) Fourth item

0 commit comments

Comments
 (0)