From f2d923b2ebc90c55059a168dd13cbae4cd12afbc Mon Sep 17 00:00:00 2001 From: Lucas Theis Date: Tue, 23 Dec 2025 10:40:52 +0700 Subject: [PATCH] feat(core): create rule to enforce sentence case --- harper-core/src/linting/lint_group.rs | 2 + harper-core/src/linting/mod.rs | 1 + harper-core/src/linting/sentence_casing.rs | 233 +++++++++++++++++++++ 3 files changed, 236 insertions(+) create mode 100644 harper-core/src/linting/sentence_casing.rs diff --git a/harper-core/src/linting/lint_group.rs b/harper-core/src/linting/lint_group.rs index 5be6be636..1ba9ef9f4 100644 --- a/harper-core/src/linting/lint_group.rs +++ b/harper-core/src/linting/lint_group.rs @@ -156,6 +156,7 @@ use super::safe_to_save::SafeToSave; use super::save_to_safe::SaveToSafe; use super::semicolon_apostrophe::SemicolonApostrophe; use super::sentence_capitalization::SentenceCapitalization; +use super::sentence_casing::SentenceCasing; use super::shoot_oneself_in_the_foot::ShootOneselfInTheFoot; use super::simple_past_to_past_participle::SimplePastToPastParticiple; use super::since_duration::SinceDuration; @@ -623,6 +624,7 @@ impl LintGroup { insert_expr_rule!(SafeToSave, true); insert_expr_rule!(SaveToSafe, true); insert_expr_rule!(SemicolonApostrophe, true); + insert_struct_rule!(SentenceCasing, false); insert_expr_rule!(ShootOneselfInTheFoot, true); insert_expr_rule!(SimplePastToPastParticiple, true); insert_expr_rule!(SinceDuration, true); diff --git a/harper-core/src/linting/mod.rs b/harper-core/src/linting/mod.rs index 1173f38cd..83cb50288 100644 --- a/harper-core/src/linting/mod.rs +++ b/harper-core/src/linting/mod.rs @@ -167,6 +167,7 @@ mod safe_to_save; mod save_to_safe; mod semicolon_apostrophe; mod sentence_capitalization; +mod sentence_casing; mod shoot_oneself_in_the_foot; mod simple_past_to_past_participle; mod since_duration; diff --git a/harper-core/src/linting/sentence_casing.rs b/harper-core/src/linting/sentence_casing.rs new file mode 100644 index 000000000..afd996c9a --- /dev/null +++ b/harper-core/src/linting/sentence_casing.rs @@ -0,0 +1,233 @@ +use super::Suggestion; +use super::{Lint, LintKind, Linter}; +use crate::document::Document; +use crate::{Token, TokenStringExt}; + +/// A linter that checks for words that are capitalized but shouldn't be +/// (i.e., not at the start of a sentence/heading and not proper nouns). +pub struct SentenceCasing; + +impl Default for SentenceCasing { + fn default() -> Self { + Self + } +} + +impl SentenceCasing { + /// Check a sequence of tokens for incorrect capitalization. + /// `first_word_idx` is the index of the first word that should be capitalized. + fn check_tokens(&self, tokens: &[Token], document: &Document, lints: &mut Vec) { + // Get the index of the first word in the sequence + let first_word_idx = tokens.iter().position(|t| t.kind.is_word()); + + let Some(first_word_idx) = first_word_idx else { + return; + }; + + // Check all words after the first one + for (idx, token) in tokens.iter().enumerate() { + // Skip the first word (it should be capitalized) + if idx <= first_word_idx { + continue; + } + + // Only check actual words + if !token.kind.is_word() { + continue; + } + + // Check if the word is capitalized + let word_chars = document.get_span_content(&token.span); + let Some(first_char) = word_chars.first() else { + continue; + }; + + // Skip if not capitalized + if !first_char.is_uppercase() { + continue; + } + + // Skip proper nouns - these should be capitalized + if token.kind.is_proper_noun() { + continue; + } + + // Skip words that are all uppercase (likely acronyms/initialisms) + if word_chars + .iter() + .all(|c| !c.is_alphabetic() || c.is_uppercase()) + { + continue; + } + + // Skip words after a colon (might be starting a new clause) + if let Some(prev_non_ws) = tokens[..idx].iter().rev().find(|t| !t.kind.is_whitespace()) + && prev_non_ws.kind.is_punctuation() + { + let prev_chars = document.get_span_content(&prev_non_ws.span); + if prev_chars == [':'] { + continue; + } + } + + // Skip single-letter capitalizations (often used for proper context like "Plan A") + if word_chars.len() == 1 { + continue; + } + + // Skip words after opening quotes (might be a quoted sentence start) + if let Some(prev_non_ws) = tokens[..idx].iter().rev().find(|t| !t.kind.is_whitespace()) + && prev_non_ws.kind.is_quote() + { + continue; + } + + // Check if this word follows a sentence terminator within the same sequence + // (This handles cases where parsing might not have split sentences correctly) + let has_terminator_before = tokens[first_word_idx + 1..idx] + .iter() + .any(|t| t.kind.is_sentence_terminator()); + + if has_terminator_before { + continue; + } + + // Create the lowercase suggestion + let mut replacement_chars = word_chars.to_vec(); + replacement_chars[0] = replacement_chars[0].to_ascii_lowercase(); + + lints.push(Lint { + span: token.span, + lint_kind: LintKind::Capitalization, + suggestions: vec![Suggestion::ReplaceWith(replacement_chars)], + priority: 63, + message: "This word is capitalized but does not appear to be a proper noun. Consider using lowercase.".to_string(), + }); + } + } +} + +impl Linter for SentenceCasing { + fn lint(&mut self, document: &Document) -> Vec { + let mut lints = Vec::new(); + + // Check headings + for heading in document.iter_headings() { + self.check_tokens(heading, document, &mut lints); + } + + // Check regular sentences (but skip those in headings) + for paragraph in document.iter_paragraphs() { + // Skip paragraphs that are headings (they're already checked above) + if paragraph.iter().any(|t| t.kind.is_heading_start()) { + continue; + } + + for sentence in paragraph.iter_sentences() { + self.check_tokens(sentence, document, &mut lints); + } + } + + lints + } + + fn description(&self) -> &'static str { + "Flags words that are capitalized mid-sentence or mid-heading but are not proper nouns." + } +} + +#[cfg(test)] +mod tests { + use super::super::tests::{assert_lint_count, assert_suggestion_result}; + use super::SentenceCasing; + + #[test] + fn catches_mid_sentence_capital() { + assert_lint_count( + "The quick Brown fox jumps over the lazy dog.", + SentenceCasing, + 1, + ); + } + + #[test] + fn allows_proper_nouns() { + assert_lint_count("I visited Paris last summer.", SentenceCasing, 0); + } + + #[test] + fn allows_sentence_start() { + assert_lint_count("The fox is quick. The dog is lazy.", SentenceCasing, 0); + } + + #[test] + fn allows_acronyms() { + assert_lint_count("The NASA mission was successful.", SentenceCasing, 0); + } + + #[test] + fn allows_after_colon() { + assert_lint_count("Here is the answer: True or false.", SentenceCasing, 0); + } + + #[test] + fn allows_single_letter() { + assert_lint_count("This is plan A for the mission.", SentenceCasing, 0); + } + + #[test] + fn fixes_capitalization() { + assert_suggestion_result( + "The quick Brown fox.", + SentenceCasing, + "The quick brown fox.", + ); + } + + #[test] + fn allows_names() { + assert_lint_count("I talked to John yesterday.", SentenceCasing, 0); + } + + #[test] + fn multiple_errors() { + assert_lint_count( + "The Quick Brown Fox jumps over the Lazy Dog.", + SentenceCasing, + 4, + ); + } + + #[test] + fn allows_quoted_start() { + assert_lint_count("She said \"Hello there\" to him.", SentenceCasing, 0); + } + + // Heading tests + + #[test] + fn catches_heading_mid_word_capital() { + // Markdown heading with incorrect capitalization + assert_lint_count("# The Quick Brown Fox", SentenceCasing, 3); + } + + #[test] + fn allows_heading_proper_nouns() { + assert_lint_count("# A trip to Paris", SentenceCasing, 0); + } + + #[test] + fn allows_heading_start_capital() { + assert_lint_count("# Introduction to the topic", SentenceCasing, 0); + } + + #[test] + fn fixes_heading_capitalization() { + assert_suggestion_result("# The Quick fox", SentenceCasing, "# The quick fox"); + } + + #[test] + fn heading_with_acronym() { + assert_lint_count("# Working with NASA and SpaceX", SentenceCasing, 0); + } +}