Skip to content

Commit 86477f6

Browse files
committed
more capitalization and abbreviation fixes
1 parent fb948be commit 86477f6

File tree

7 files changed

+46
-21
lines changed

7 files changed

+46
-21
lines changed

crates/quarto-markdown-pandoc/src/pandoc/treesitter_utils/postprocess.rs

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,39 @@ fn is_abbreviation(text: &str) -> bool {
8585
)
8686
}
8787

88+
/// Check if a text string ends with a known abbreviation
89+
fn ends_with_abbreviation(text: &str) -> bool {
90+
text.ends_with("Mr.")
91+
|| text.ends_with("Mrs.")
92+
|| text.ends_with("Ms.")
93+
|| text.ends_with("Capt.")
94+
|| text.ends_with("Dr.")
95+
|| text.ends_with("Prof.")
96+
|| text.ends_with("Gen.")
97+
|| text.ends_with("Gov.")
98+
|| text.ends_with("e.g.")
99+
|| text.ends_with("i.e.")
100+
|| text.ends_with("Sgt.")
101+
|| text.ends_with("St.")
102+
|| text.ends_with("vol.")
103+
|| text.ends_with("vs.")
104+
|| text.ends_with("Sen.")
105+
|| text.ends_with("Rep.")
106+
|| text.ends_with("Pres.")
107+
|| text.ends_with("Hon.")
108+
|| text.ends_with("Rev.")
109+
|| text.ends_with("Ph.D.")
110+
|| text.ends_with("M.D.")
111+
|| text.ends_with("M.A.")
112+
|| text.ends_with("p.")
113+
|| text.ends_with("pp.")
114+
|| text.ends_with("ch.")
115+
|| text.ends_with("chap.")
116+
|| text.ends_with("sec.")
117+
|| text.ends_with("cf.")
118+
|| text.ends_with("cp.")
119+
}
120+
88121
/// Coalesce Str nodes that end with abbreviations with following words
89122
/// This matches Pandoc's behavior of keeping abbreviations with the next word
90123
/// Returns (result, did_coalesce) tuple
@@ -100,33 +133,23 @@ pub fn coalesce_abbreviations(inlines: Vec<Inline>) -> (Vec<Inline>, bool) {
100133
let mut end_info = str_inline.source_info.clone();
101134
let mut j = i + 1;
102135

103-
// Check if current text is an abbreviation
104-
if is_abbreviation(&current_text) {
105-
// Coalesce with following Space + Str until we hit a capital letter
136+
// Check if current text ends with an abbreviation
137+
if ends_with_abbreviation(&current_text) {
138+
// Coalesce with following Space + Str
106139
while j + 1 < inlines.len() {
107140
if let (Inline::Space(_), Inline::Str(next_str)) =
108141
(&inlines[j], &inlines[j + 1])
109142
{
110-
// Stop before uppercase letters (potential sentence boundaries)
111-
if next_str
112-
.text
113-
.chars()
114-
.next()
115-
.map_or(false, |c| c.is_uppercase())
116-
{
117-
break;
118-
}
119-
120-
// Coalesce
121-
current_text.push(' ');
143+
// Coalesce with non-breaking space (U+00A0) to match Pandoc
144+
current_text.push('\u{00A0}');
122145
current_text.push_str(&next_str.text);
123146
end_info = next_str.source_info.clone();
124147
j += 2;
125148
did_coalesce = true;
126149

127-
// If this word is also an abbreviation, continue coalescing
150+
// If this word also ends with an abbreviation, continue coalescing
128151
// Otherwise, stop after this word
129-
if !is_abbreviation(&next_str.text) {
152+
if !ends_with_abbreviation(&current_text) {
130153
break;
131154
}
132155
} else {
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
(e.g. this fails)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Test with Dr. Smith today.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
[ Para [Str "Blah", Space, Str "Blah", Space, Cite [Citation { citationId = "knuth1984", citationPrefix = [Str "see", Space], citationSuffix = [Str ",", Space, Str "pp. 33-35"], citationMode = NormalCitation, citationNoteNum = 1, citationHash = 0 }, Citation { citationId = "wickham2015", citationPrefix = [Space, Str "also", Space], citationSuffix = [Str ",", Space, Str "chap. 1"], citationMode = NormalCitation, citationNoteNum = 1, citationHash = 0 }] []] ]
1+
[ Para [Str "Blah", Space, Str "Blah", Space, Cite [Citation { citationId = "knuth1984", citationPrefix = [Str "see", Space], citationSuffix = [Str ",", Space, Str "pp. 33-35"], citationMode = NormalCitation, citationNoteNum = 1, citationHash = 0 }, Citation { citationId = "wickham2015", citationPrefix = [Space, Str "also", Space], citationSuffix = [Str ",", Space, Str "chap. 1"], citationMode = NormalCitation, citationNoteNum = 1, citationHash = 0 }] []] ]
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
[ Para [Str "Blah", Space, Str "Blah", Space, Cite [Citation { citationId = "knuth1984", citationPrefix = [], citationSuffix = [Str ",", Space, Str "pp. 33-35,", Space, Str "38-39", Space, Str "and", Space, Str "passim"], citationMode = NormalCitation, citationNoteNum = 1, citationHash = 0 }] []] ]
1+
[ Para [Str "Blah", Space, Str "Blah", Space, Cite [Citation { citationId = "knuth1984", citationPrefix = [], citationSuffix = [Str ",", Space, Str "pp. 33-35,", Space, Str "38-39", Space, Str "and", Space, Str "passim"], citationMode = NormalCitation, citationNoteNum = 1, citationHash = 0 }] []] ]
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
[ Para [Cite [Citation { citationId = "smith04", citationPrefix = [], citationSuffix = [Str "p. 33"], citationMode = AuthorInText, citationNoteNum = 1, citationHash = 0 }] [Str "@smith04"]] ]
1+
[ Para [Cite [Citation { citationId = "smith04", citationPrefix = [], citationSuffix = [Str "p. 33"], citationMode = AuthorInText, citationNoteNum = 1, citationHash = 0 }] [Str "@smith04"]] ]
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
[ Para [Str "i", Space, Str "think", Space, Str "e.g. this", Space, Str "is", Space, Str "good?", Space, Str "did", Space, Str "1–30", Space, Str "work?", Space, Str "wait—really—did", Space, Str "it?"] ]
1+
[ Para [Str "i", Space, Str "think", Space, Str "e.g. this", Space, Str "is", Space, Str "good?", Space, Str "did", Space, Str "1–30", Space, Str "work?", Space, Str "wait—really—did", Space, Str "it?"] ]

0 commit comments

Comments
 (0)