Skip to content

Commit 171de7c

Browse files
committed
improve cite processing on the front-end (#12)
1 parent 178779c commit 171de7c

File tree

2 files changed

+111
-1
lines changed

2 files changed

+111
-1
lines changed

crates/quarto-markdown-pandoc/src/pandoc/treesitter.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ use crate::pandoc::treesitter_utils::pipe_table::{
3232
process_pipe_table, process_pipe_table_cell, process_pipe_table_delimiter_cell,
3333
process_pipe_table_delimiter_row, process_pipe_table_header_or_row,
3434
};
35-
use crate::pandoc::treesitter_utils::postprocess::{merge_strs, postprocess};
35+
use crate::pandoc::treesitter_utils::postprocess::{merge_strs, postprocess, split_cite_content_strings};
3636
use crate::pandoc::treesitter_utils::quoted_span::process_quoted_span;
3737
use crate::pandoc::treesitter_utils::raw_attribute::process_raw_attribute;
3838
use crate::pandoc::treesitter_utils::raw_specifier::process_raw_specifier;
@@ -753,5 +753,6 @@ pub fn treesitter_to_pandoc<T: Write>(
753753
};
754754
let result = postprocess(pandoc)?;
755755
let result = merge_strs(result);
756+
let result = split_cite_content_strings(result);
756757
Ok(result)
757758
}

crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/postprocess.rs

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,64 @@ pub fn postprocess(doc: Pandoc) -> Result<Pandoc, Vec<String>> {
412412
if let Some(mut cite) = pending_cite.take() {
413413
// Add span content to the citation's suffix
414414
cite.citations[0].suffix = span.content.clone();
415+
416+
// Update the content field to include the rendered suffix with brackets
417+
// Pandoc breaks up the bracketed suffix text by spaces, with the opening
418+
// bracket attached to the first word and closing bracket to the last word
419+
// e.g., "@knuth [p. 33]" becomes: Str("@knuth"), Space, Str("[p."), Space, Str("33]")
420+
cite.content.push(Inline::Space(Space {
421+
source_info: SourceInfo::with_range(empty_range()),
422+
}));
423+
424+
// The span content may have been merged into a single string, so we need to
425+
// intelligently break it up to match Pandoc's behavior
426+
let mut bracketed_content: Vec<Inline> = vec![];
427+
for inline in &span.content {
428+
if let Inline::Str(s) = inline {
429+
// Split the string by spaces and create Str/Space inlines
430+
let words: Vec<&str> = s.text.split(' ').collect();
431+
for (i, word) in words.iter().enumerate() {
432+
if i > 0 {
433+
bracketed_content.push(Inline::Space(
434+
Space {
435+
source_info: SourceInfo::with_range(
436+
empty_range(),
437+
),
438+
},
439+
));
440+
}
441+
if !word.is_empty() {
442+
bracketed_content.push(Inline::Str(Str {
443+
text: word.to_string(),
444+
source_info: s.source_info.clone(),
445+
}));
446+
}
447+
}
448+
} else {
449+
bracketed_content.push(inline.clone());
450+
}
451+
}
452+
453+
// Now add brackets to the first and last Str elements
454+
if !bracketed_content.is_empty() {
455+
// Prepend "[" to the first Str element
456+
if let Some(Inline::Str(first_str)) =
457+
bracketed_content.first_mut()
458+
{
459+
first_str.text = format!("[{}", first_str.text);
460+
}
461+
// Append "]" to the last Str element (search from the end)
462+
for i in (0..bracketed_content.len()).rev() {
463+
if let Inline::Str(last_str) =
464+
&mut bracketed_content[i]
465+
{
466+
last_str.text = format!("{}]", last_str.text);
467+
break;
468+
}
469+
}
470+
}
471+
472+
cite.content.extend(bracketed_content);
415473
result.push(Inline::Cite(cite));
416474
}
417475
state = 0;
@@ -484,6 +542,57 @@ fn as_smart_str(s: String) -> String {
484542
}
485543
}
486544

545+
/// Re-split Cite content strings after merge_strs to match Pandoc's behavior
546+
/// Pandoc breaks up citation suffix text by spaces
547+
pub fn split_cite_content_strings(pandoc: Pandoc) -> Pandoc {
548+
topdown_traverse(
549+
pandoc,
550+
&mut Filter::new().with_cite(|mut cite| {
551+
// Split any merged strings in the cite content back into separate Str/Space inlines
552+
// Only split if the content contains spaces (i.e., hasn't been split yet)
553+
let mut needs_split = false;
554+
for inline in &cite.content {
555+
if let Inline::Str(s) = inline {
556+
// Check for regular space or non-breaking space (U+00A0)
557+
if s.text.contains(' ') || s.text.contains('\u{00A0}') {
558+
needs_split = true;
559+
break;
560+
}
561+
}
562+
}
563+
564+
if !needs_split {
565+
return Unchanged(cite);
566+
}
567+
568+
let mut new_content: Vec<Inline> = vec![];
569+
for inline in cite.content {
570+
if let Inline::Str(s) = inline {
571+
// Split by regular spaces and non-breaking spaces
572+
let words: Vec<&str> = s.text.split(|c| c == ' ' || c == '\u{00A0}').collect();
573+
for (i, word) in words.iter().enumerate() {
574+
if i > 0 {
575+
new_content.push(Inline::Space(Space {
576+
source_info: SourceInfo::with_range(empty_range()),
577+
}));
578+
}
579+
if !word.is_empty() {
580+
new_content.push(Inline::Str(Str {
581+
text: word.to_string(),
582+
source_info: s.source_info.clone(),
583+
}));
584+
}
585+
}
586+
} else {
587+
new_content.push(inline);
588+
}
589+
}
590+
cite.content = new_content;
591+
FilterResult(vec![Inline::Cite(cite)], true)
592+
}),
593+
)
594+
}
595+
487596
/// Merge consecutive Str inlines and apply smart typography
488597
pub fn merge_strs(pandoc: Pandoc) -> Pandoc {
489598
topdown_traverse(

0 commit comments

Comments
 (0)