|
| 1 | +use super::Suggestion; |
| 2 | +use super::{Lint, LintKind, Linter}; |
| 3 | +use crate::document::Document; |
| 4 | +use crate::{Token, TokenStringExt}; |
| 5 | + |
| 6 | +/// A linter that checks for words that are capitalized but shouldn't be |
| 7 | +/// (i.e., not at the start of a sentence/heading and not proper nouns). |
| 8 | +pub struct SentenceCasing; |
| 9 | + |
| 10 | +impl Default for SentenceCasing { |
| 11 | + fn default() -> Self { |
| 12 | + Self |
| 13 | + } |
| 14 | +} |
| 15 | + |
| 16 | +impl SentenceCasing { |
| 17 | + /// Check a sequence of tokens for incorrect capitalization. |
| 18 | + /// `first_word_idx` is the index of the first word that should be capitalized. |
| 19 | + fn check_tokens(&self, tokens: &[Token], document: &Document, lints: &mut Vec<Lint>) { |
| 20 | + // Get the index of the first word in the sequence |
| 21 | + let first_word_idx = tokens.iter().position(|t| t.kind.is_word()); |
| 22 | + |
| 23 | + let Some(first_word_idx) = first_word_idx else { |
| 24 | + return; |
| 25 | + }; |
| 26 | + |
| 27 | + // Check all words after the first one |
| 28 | + for (idx, token) in tokens.iter().enumerate() { |
| 29 | + // Skip the first word (it should be capitalized) |
| 30 | + if idx <= first_word_idx { |
| 31 | + continue; |
| 32 | + } |
| 33 | + |
| 34 | + // Only check actual words |
| 35 | + if !token.kind.is_word() { |
| 36 | + continue; |
| 37 | + } |
| 38 | + |
| 39 | + // Check if the word is capitalized |
| 40 | + let word_chars = document.get_span_content(&token.span); |
| 41 | + let Some(first_char) = word_chars.first() else { |
| 42 | + continue; |
| 43 | + }; |
| 44 | + |
| 45 | + // Skip if not capitalized |
| 46 | + if !first_char.is_uppercase() { |
| 47 | + continue; |
| 48 | + } |
| 49 | + |
| 50 | + // Skip proper nouns - these should be capitalized |
| 51 | + if token.kind.is_proper_noun() { |
| 52 | + continue; |
| 53 | + } |
| 54 | + |
| 55 | + // Skip words that are all uppercase (likely acronyms/initialisms) |
| 56 | + if word_chars.iter().all(|c| !c.is_alphabetic() || c.is_uppercase()) { |
| 57 | + continue; |
| 58 | + } |
| 59 | + |
| 60 | + // Skip words after a colon (might be starting a new clause) |
| 61 | + if let Some(prev_non_ws) = tokens[..idx] |
| 62 | + .iter() |
| 63 | + .rev() |
| 64 | + .find(|t| !t.kind.is_whitespace()) |
| 65 | + { |
| 66 | + if prev_non_ws.kind.is_punctuation() { |
| 67 | + let prev_chars = document.get_span_content(&prev_non_ws.span); |
| 68 | + if prev_chars == [':'] { |
| 69 | + continue; |
| 70 | + } |
| 71 | + } |
| 72 | + } |
| 73 | + |
| 74 | + // Skip single-letter capitalizations (often used for proper context like "Plan A") |
| 75 | + if word_chars.len() == 1 { |
| 76 | + continue; |
| 77 | + } |
| 78 | + |
| 79 | + // Skip words after opening quotes (might be a quoted sentence start) |
| 80 | + if let Some(prev_non_ws) = tokens[..idx] |
| 81 | + .iter() |
| 82 | + .rev() |
| 83 | + .find(|t| !t.kind.is_whitespace()) |
| 84 | + { |
| 85 | + if prev_non_ws.kind.is_quote() { |
| 86 | + continue; |
| 87 | + } |
| 88 | + } |
| 89 | + |
| 90 | + // Check if this word follows a sentence terminator within the same sequence |
| 91 | + // (This handles cases where parsing might not have split sentences correctly) |
| 92 | + let has_terminator_before = tokens[first_word_idx + 1..idx] |
| 93 | + .iter() |
| 94 | + .any(|t| t.kind.is_sentence_terminator()); |
| 95 | + |
| 96 | + if has_terminator_before { |
| 97 | + continue; |
| 98 | + } |
| 99 | + |
| 100 | + // Create the lowercase suggestion |
| 101 | + let mut replacement_chars = word_chars.to_vec(); |
| 102 | + replacement_chars[0] = replacement_chars[0].to_ascii_lowercase(); |
| 103 | + |
| 104 | + lints.push(Lint { |
| 105 | + span: token.span, |
| 106 | + lint_kind: LintKind::Capitalization, |
| 107 | + suggestions: vec![Suggestion::ReplaceWith(replacement_chars)], |
| 108 | + priority: 63, |
| 109 | + message: "This word is capitalized but does not appear to be a proper noun. Consider using lowercase.".to_string(), |
| 110 | + }); |
| 111 | + } |
| 112 | + } |
| 113 | +} |
| 114 | + |
| 115 | +impl Linter for SentenceCasing { |
| 116 | + fn lint(&mut self, document: &Document) -> Vec<Lint> { |
| 117 | + let mut lints = Vec::new(); |
| 118 | + |
| 119 | + // Check headings |
| 120 | + for heading in document.iter_headings() { |
| 121 | + self.check_tokens(heading, document, &mut lints); |
| 122 | + } |
| 123 | + |
| 124 | + // Check regular sentences (but skip those in headings) |
| 125 | + for paragraph in document.iter_paragraphs() { |
| 126 | + // Skip paragraphs that are headings (they're already checked above) |
| 127 | + if paragraph.iter().any(|t| t.kind.is_heading_start()) { |
| 128 | + continue; |
| 129 | + } |
| 130 | + |
| 131 | + for sentence in paragraph.iter_sentences() { |
| 132 | + self.check_tokens(sentence, document, &mut lints); |
| 133 | + } |
| 134 | + } |
| 135 | + |
| 136 | + lints |
| 137 | + } |
| 138 | + |
| 139 | + fn description(&self) -> &'static str { |
| 140 | + "Flags words that are capitalized mid-sentence or mid-heading but are not proper nouns." |
| 141 | + } |
| 142 | +} |
| 143 | + |
| 144 | +#[cfg(test)] |
| 145 | +mod tests { |
| 146 | + use super::super::tests::{assert_lint_count, assert_suggestion_result}; |
| 147 | + use super::SentenceCasing; |
| 148 | + |
| 149 | + #[test] |
| 150 | + fn catches_mid_sentence_capital() { |
| 151 | + assert_lint_count("The quick Brown fox jumps over the lazy dog.", SentenceCasing, 1); |
| 152 | + } |
| 153 | + |
| 154 | + #[test] |
| 155 | + fn allows_proper_nouns() { |
| 156 | + assert_lint_count("I visited Paris last summer.", SentenceCasing, 0); |
| 157 | + } |
| 158 | + |
| 159 | + #[test] |
| 160 | + fn allows_sentence_start() { |
| 161 | + assert_lint_count("The fox is quick. The dog is lazy.", SentenceCasing, 0); |
| 162 | + } |
| 163 | + |
| 164 | + #[test] |
| 165 | + fn allows_acronyms() { |
| 166 | + assert_lint_count("The NASA mission was successful.", SentenceCasing, 0); |
| 167 | + } |
| 168 | + |
| 169 | + #[test] |
| 170 | + fn allows_after_colon() { |
| 171 | + assert_lint_count("Here is the answer: True or false.", SentenceCasing, 0); |
| 172 | + } |
| 173 | + |
| 174 | + #[test] |
| 175 | + fn allows_single_letter() { |
| 176 | + assert_lint_count("This is plan A for the mission.", SentenceCasing, 0); |
| 177 | + } |
| 178 | + |
| 179 | + #[test] |
| 180 | + fn fixes_capitalization() { |
| 181 | + assert_suggestion_result( |
| 182 | + "The quick Brown fox.", |
| 183 | + SentenceCasing, |
| 184 | + "The quick brown fox.", |
| 185 | + ); |
| 186 | + } |
| 187 | + |
| 188 | + #[test] |
| 189 | + fn allows_names() { |
| 190 | + assert_lint_count("I talked to John yesterday.", SentenceCasing, 0); |
| 191 | + } |
| 192 | + |
| 193 | + #[test] |
| 194 | + fn multiple_errors() { |
| 195 | + assert_lint_count( |
| 196 | + "The Quick Brown Fox jumps over the Lazy Dog.", |
| 197 | + SentenceCasing, |
| 198 | + 4, |
| 199 | + ); |
| 200 | + } |
| 201 | + |
| 202 | + #[test] |
| 203 | + fn allows_quoted_start() { |
| 204 | + assert_lint_count("She said \"Hello there\" to him.", SentenceCasing, 0); |
| 205 | + } |
| 206 | + |
| 207 | + // Heading tests |
| 208 | + |
| 209 | + #[test] |
| 210 | + fn catches_heading_mid_word_capital() { |
| 211 | + // Markdown heading with incorrect capitalization |
| 212 | + assert_lint_count("# The Quick Brown Fox", SentenceCasing, 3); |
| 213 | + } |
| 214 | + |
| 215 | + #[test] |
| 216 | + fn allows_heading_proper_nouns() { |
| 217 | + assert_lint_count("# A trip to Paris", SentenceCasing, 0); |
| 218 | + } |
| 219 | + |
| 220 | + #[test] |
| 221 | + fn allows_heading_start_capital() { |
| 222 | + assert_lint_count("# Introduction to the topic", SentenceCasing, 0); |
| 223 | + } |
| 224 | + |
| 225 | + #[test] |
| 226 | + fn fixes_heading_capitalization() { |
| 227 | + assert_suggestion_result( |
| 228 | + "# The Quick fox", |
| 229 | + SentenceCasing, |
| 230 | + "# The quick fox", |
| 231 | + ); |
| 232 | + } |
| 233 | + |
| 234 | + #[test] |
| 235 | + fn heading_with_acronym() { |
| 236 | + assert_lint_count("# Working with NASA and SpaceX", SentenceCasing, 0); |
| 237 | + } |
| 238 | +} |
| 239 | + |
0 commit comments