Skip to content

Commit a49ee38

Browse files
committed
feat(core): create rule to enforce sentence case
1 parent 38c7a45 commit a49ee38

File tree

3 files changed

+242
-0
lines changed

3 files changed

+242
-0
lines changed

harper-core/src/linting/lint_group.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ use super::safe_to_save::SafeToSave;
156156
use super::save_to_safe::SaveToSafe;
157157
use super::semicolon_apostrophe::SemicolonApostrophe;
158158
use super::sentence_capitalization::SentenceCapitalization;
159+
use super::sentence_casing::SentenceCasing;
159160
use super::shoot_oneself_in_the_foot::ShootOneselfInTheFoot;
160161
use super::simple_past_to_past_participle::SimplePastToPastParticiple;
161162
use super::since_duration::SinceDuration;
@@ -623,6 +624,7 @@ impl LintGroup {
623624
insert_expr_rule!(SafeToSave, true);
624625
insert_expr_rule!(SaveToSafe, true);
625626
insert_expr_rule!(SemicolonApostrophe, true);
627+
insert_struct_rule!(SentenceCasing, false);
626628
insert_expr_rule!(ShootOneselfInTheFoot, true);
627629
insert_expr_rule!(SimplePastToPastParticiple, true);
628630
insert_expr_rule!(SinceDuration, true);

harper-core/src/linting/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ mod safe_to_save;
167167
mod save_to_safe;
168168
mod semicolon_apostrophe;
169169
mod sentence_capitalization;
170+
mod sentence_casing;
170171
mod shoot_oneself_in_the_foot;
171172
mod simple_past_to_past_participle;
172173
mod since_duration;
Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
use super::Suggestion;
2+
use super::{Lint, LintKind, Linter};
3+
use crate::document::Document;
4+
use crate::{Token, TokenStringExt};
5+
6+
/// A linter that checks for words that are capitalized but shouldn't be
7+
/// (i.e., not at the start of a sentence/heading and not proper nouns).
8+
pub struct SentenceCasing;
9+
10+
impl Default for SentenceCasing {
11+
fn default() -> Self {
12+
Self
13+
}
14+
}
15+
16+
impl SentenceCasing {
17+
/// Check a sequence of tokens for incorrect capitalization.
18+
/// `first_word_idx` is the index of the first word that should be capitalized.
19+
fn check_tokens(&self, tokens: &[Token], document: &Document, lints: &mut Vec<Lint>) {
20+
// Get the index of the first word in the sequence
21+
let first_word_idx = tokens.iter().position(|t| t.kind.is_word());
22+
23+
let Some(first_word_idx) = first_word_idx else {
24+
return;
25+
};
26+
27+
// Check all words after the first one
28+
for (idx, token) in tokens.iter().enumerate() {
29+
// Skip the first word (it should be capitalized)
30+
if idx <= first_word_idx {
31+
continue;
32+
}
33+
34+
// Only check actual words
35+
if !token.kind.is_word() {
36+
continue;
37+
}
38+
39+
// Check if the word is capitalized
40+
let word_chars = document.get_span_content(&token.span);
41+
let Some(first_char) = word_chars.first() else {
42+
continue;
43+
};
44+
45+
// Skip if not capitalized
46+
if !first_char.is_uppercase() {
47+
continue;
48+
}
49+
50+
// Skip proper nouns - these should be capitalized
51+
if token.kind.is_proper_noun() {
52+
continue;
53+
}
54+
55+
// Skip words that are all uppercase (likely acronyms/initialisms)
56+
if word_chars.iter().all(|c| !c.is_alphabetic() || c.is_uppercase()) {
57+
continue;
58+
}
59+
60+
// Skip words after a colon (might be starting a new clause)
61+
if let Some(prev_non_ws) = tokens[..idx]
62+
.iter()
63+
.rev()
64+
.find(|t| !t.kind.is_whitespace())
65+
{
66+
if prev_non_ws.kind.is_punctuation() {
67+
let prev_chars = document.get_span_content(&prev_non_ws.span);
68+
if prev_chars == [':'] {
69+
continue;
70+
}
71+
}
72+
}
73+
74+
// Skip single-letter capitalizations (often used for proper context like "Plan A")
75+
if word_chars.len() == 1 {
76+
continue;
77+
}
78+
79+
// Skip words after opening quotes (might be a quoted sentence start)
80+
if let Some(prev_non_ws) = tokens[..idx]
81+
.iter()
82+
.rev()
83+
.find(|t| !t.kind.is_whitespace())
84+
{
85+
if prev_non_ws.kind.is_quote() {
86+
continue;
87+
}
88+
}
89+
90+
// Check if this word follows a sentence terminator within the same sequence
91+
// (This handles cases where parsing might not have split sentences correctly)
92+
let has_terminator_before = tokens[first_word_idx + 1..idx]
93+
.iter()
94+
.any(|t| t.kind.is_sentence_terminator());
95+
96+
if has_terminator_before {
97+
continue;
98+
}
99+
100+
// Create the lowercase suggestion
101+
let mut replacement_chars = word_chars.to_vec();
102+
replacement_chars[0] = replacement_chars[0].to_ascii_lowercase();
103+
104+
lints.push(Lint {
105+
span: token.span,
106+
lint_kind: LintKind::Capitalization,
107+
suggestions: vec![Suggestion::ReplaceWith(replacement_chars)],
108+
priority: 63,
109+
message: "This word is capitalized but does not appear to be a proper noun. Consider using lowercase.".to_string(),
110+
});
111+
}
112+
}
113+
}
114+
115+
impl Linter for SentenceCasing {
116+
fn lint(&mut self, document: &Document) -> Vec<Lint> {
117+
let mut lints = Vec::new();
118+
119+
// Check headings
120+
for heading in document.iter_headings() {
121+
self.check_tokens(heading, document, &mut lints);
122+
}
123+
124+
// Check regular sentences (but skip those in headings)
125+
for paragraph in document.iter_paragraphs() {
126+
// Skip paragraphs that are headings (they're already checked above)
127+
if paragraph.iter().any(|t| t.kind.is_heading_start()) {
128+
continue;
129+
}
130+
131+
for sentence in paragraph.iter_sentences() {
132+
self.check_tokens(sentence, document, &mut lints);
133+
}
134+
}
135+
136+
lints
137+
}
138+
139+
fn description(&self) -> &'static str {
140+
"Flags words that are capitalized mid-sentence or mid-heading but are not proper nouns."
141+
}
142+
}
143+
144+
#[cfg(test)]
145+
mod tests {
146+
use super::super::tests::{assert_lint_count, assert_suggestion_result};
147+
use super::SentenceCasing;
148+
149+
#[test]
150+
fn catches_mid_sentence_capital() {
151+
assert_lint_count("The quick Brown fox jumps over the lazy dog.", SentenceCasing, 1);
152+
}
153+
154+
#[test]
155+
fn allows_proper_nouns() {
156+
assert_lint_count("I visited Paris last summer.", SentenceCasing, 0);
157+
}
158+
159+
#[test]
160+
fn allows_sentence_start() {
161+
assert_lint_count("The fox is quick. The dog is lazy.", SentenceCasing, 0);
162+
}
163+
164+
#[test]
165+
fn allows_acronyms() {
166+
assert_lint_count("The NASA mission was successful.", SentenceCasing, 0);
167+
}
168+
169+
#[test]
170+
fn allows_after_colon() {
171+
assert_lint_count("Here is the answer: True or false.", SentenceCasing, 0);
172+
}
173+
174+
#[test]
175+
fn allows_single_letter() {
176+
assert_lint_count("This is plan A for the mission.", SentenceCasing, 0);
177+
}
178+
179+
#[test]
180+
fn fixes_capitalization() {
181+
assert_suggestion_result(
182+
"The quick Brown fox.",
183+
SentenceCasing,
184+
"The quick brown fox.",
185+
);
186+
}
187+
188+
#[test]
189+
fn allows_names() {
190+
assert_lint_count("I talked to John yesterday.", SentenceCasing, 0);
191+
}
192+
193+
#[test]
194+
fn multiple_errors() {
195+
assert_lint_count(
196+
"The Quick Brown Fox jumps over the Lazy Dog.",
197+
SentenceCasing,
198+
4,
199+
);
200+
}
201+
202+
#[test]
203+
fn allows_quoted_start() {
204+
assert_lint_count("She said \"Hello there\" to him.", SentenceCasing, 0);
205+
}
206+
207+
// Heading tests
208+
209+
#[test]
210+
fn catches_heading_mid_word_capital() {
211+
// Markdown heading with incorrect capitalization
212+
assert_lint_count("# The Quick Brown Fox", SentenceCasing, 3);
213+
}
214+
215+
#[test]
216+
fn allows_heading_proper_nouns() {
217+
assert_lint_count("# A trip to Paris", SentenceCasing, 0);
218+
}
219+
220+
#[test]
221+
fn allows_heading_start_capital() {
222+
assert_lint_count("# Introduction to the topic", SentenceCasing, 0);
223+
}
224+
225+
#[test]
226+
fn fixes_heading_capitalization() {
227+
assert_suggestion_result(
228+
"# The Quick fox",
229+
SentenceCasing,
230+
"# The quick fox",
231+
);
232+
}
233+
234+
#[test]
235+
fn heading_with_acronym() {
236+
assert_lint_count("# Working with NASA and SpaceX", SentenceCasing, 0);
237+
}
238+
}
239+

0 commit comments

Comments
 (0)