From 008c53428698ea062fc7a5ca8dd74b067798a45b Mon Sep 17 00:00:00 2001 From: 86xsk <200443667+86xsk@users.noreply.github.com> Date: Tue, 20 Jan 2026 01:12:40 -0600 Subject: [PATCH 01/18] test(core): add failing test --- harper-core/src/linting/spell_check.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/harper-core/src/linting/spell_check.rs b/harper-core/src/linting/spell_check.rs index b1a476848..54faf6621 100644 --- a/harper-core/src/linting/spell_check.rs +++ b/harper-core/src/linting/spell_check.rs @@ -513,4 +513,12 @@ mod tests { SpellCheck::new(FstDictionary::curated(), Dialect::American), ); } + + #[test] + fn dont_flag_mb() { + assert_no_lints( + "MB", + SpellCheck::new(FstDictionary::curated(), Dialect::American), + ); + } } From a5f4a1b533ca660a661e799c823e60552528f266 Mon Sep 17 00:00:00 2001 From: 86xsk <200443667+86xsk@users.noreply.github.com> Date: Tue, 27 Jan 2026 19:12:00 -0600 Subject: [PATCH 02/18] test(comments): don't expect 'lin' to be marked as a spelling error --- harper-comments/tests/language_support.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harper-comments/tests/language_support.rs b/harper-comments/tests/language_support.rs index 8a0683f6c..372e63ae3 100644 --- a/harper-comments/tests/language_support.rs +++ b/harper-comments/tests/language_support.rs @@ -52,7 +52,7 @@ create_test!(jsdoc.ts, 4); create_test!(issue_96.lua, 0); create_test!(merged_lines.ts, 1); create_test!(javadoc_clean_simple.java, 0); -create_test!(javadoc_complex.java, 5); +create_test!(javadoc_complex.java, 4); create_test!(issue_132.rs, 1); create_test!(laravel_app.php, 2); create_test!(ignore_shebang_1.sh, 0); From 5face2350122724172c58bffe73bcdd90525a933 Mon Sep 17 00:00:00 2001 From: 86xsk <200443667+86xsk@users.noreply.github.com> Date: Tue, 27 Jan 2026 19:28:07 -0600 Subject: [PATCH 03/18] test(core): move tests `SpellCheck` shouldn't handle capitalization if `OrthographicConsistency` is going to do it anyway. --- .../src/linting/orthographic_consistency.rs | 20 ++++++++++++++++++- harper-core/src/linting/spell_check.rs | 20 ------------------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/harper-core/src/linting/orthographic_consistency.rs b/harper-core/src/linting/orthographic_consistency.rs index 3034b1c0e..5a685fb24 100644 --- a/harper-core/src/linting/orthographic_consistency.rs +++ b/harper-core/src/linting/orthographic_consistency.rs @@ -144,7 +144,7 @@ fn alphabetic_differs(a: &[char], b: &[char]) -> bool { #[cfg(test)] mod tests { - use crate::linting::tests::{assert_no_lints, assert_suggestion_result}; + use crate::linting::tests::{assert_lint_count, assert_no_lints, assert_suggestion_result}; use super::OrthographicConsistency; @@ -157,6 +157,24 @@ mod tests { ); } + #[test] + fn america_capitalized() { + assert_suggestion_result( + "The word america should be capitalized.", + OrthographicConsistency::default(), + "The word America should be capitalized.", + ); + } + + #[test] + fn harper_automattic_capitalized() { + assert_lint_count( + "So should harper and automattic.", + OrthographicConsistency::default(), + 2, + ); + } + #[test] fn ikea_should_be_all_caps() { assert_suggestion_result( diff --git a/harper-core/src/linting/spell_check.rs b/harper-core/src/linting/spell_check.rs index 54faf6621..e52bf7ede 100644 --- a/harper-core/src/linting/spell_check.rs +++ b/harper-core/src/linting/spell_check.rs @@ -144,28 +144,8 @@ mod tests { }; use crate::{DictWordMetadata, Document}; - // Capitalization tests - - #[test] - fn america_capitalized() { - assert_suggestion_result( - "The word america should be capitalized.", - SpellCheck::new(FstDictionary::curated(), Dialect::American), - "The word America should be capitalized.", - ); - } - // Dialect tests - #[test] - fn harper_automattic_capitalized() { - assert_lint_count( - "So should harper and automattic.", - SpellCheck::new(FstDictionary::curated(), Dialect::American), - 2, - ); - } - #[test] fn american_color_in_british_dialect() { assert_lint_count( From fe37ba62565d02952cfab96928c2740a521d5d54 Mon Sep 17 00:00:00 2001 From: 86xsk <200443667+86xsk@users.noreply.github.com> Date: Thu, 29 Jan 2026 00:03:42 -0600 Subject: [PATCH 04/18] test(core): don't expect `SpellCheck` to mark capitalization issues --- harper-core/tests/run_tests.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/harper-core/tests/run_tests.rs b/harper-core/tests/run_tests.rs index 38f7d0a3b..4c9fbe455 100644 --- a/harper-core/tests/run_tests.rs +++ b/harper-core/tests/run_tests.rs @@ -88,7 +88,7 @@ create_test!(misc_closed_compound_clean.md, 0, Dialect::American); create_test!(statist_localist.md, 0, Dialect::American); create_test!(yogurt_british_clean.md, 0, Dialect::British); create_test!(issue_1581.md, 0, Dialect::British); -create_test!(issue_2054.md, 6, Dialect::British); +create_test!(issue_2054.md, 3, Dialect::British); create_test!(issue_1988.md, 0, Dialect::American); create_test!(issue_2054_clean.md, 0, Dialect::British); create_test!(issue_1873.md, 0, Dialect::British); @@ -98,10 +98,10 @@ create_test!(title_case_clean.md, 0, Dialect::American); create_test!(issue_2233.md, 0, Dialect::American); create_test!(issue_2240.md, 0, Dialect::American); // It just matters that it is > 1 -create_test!(issue_2151.md, 4, Dialect::British); +create_test!(issue_2151.md, 2, Dialect::British); // Make sure it doesn't panic create_test!(lukas_homework.md, 4, Dialect::American); // Org mode tests -create_org_test!(index.org, 49, Dialect::American); +create_org_test!(index.org, 47, Dialect::American); From 6f02acd97121b2a7d30f3626f3f1292423cf8206 Mon Sep 17 00:00:00 2001 From: 86xsk <200443667+86xsk@users.noreply.github.com> Date: Thu, 29 Jan 2026 15:50:12 -0600 Subject: [PATCH 05/18] deps(core): add `indexmap` --- Cargo.lock | 1 + harper-core/Cargo.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 6a577274c..550eccfde 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2773,6 +2773,7 @@ dependencies = [ "harper-brill", "harper-thesaurus", "hashbrown 0.16.1", + "indexmap", "is-macro", "itertools 0.14.0", "lazy_static", diff --git a/harper-core/Cargo.toml b/harper-core/Cargo.toml index c3efed4d0..de8467ba4 100644 --- a/harper-core/Cargo.toml +++ b/harper-core/Cargo.toml @@ -35,6 +35,7 @@ harper-brill = { path = "../harper-brill", version = "1.0.0" } harper-thesaurus = { path = "../harper-thesaurus", version = "1.4.1", optional = true } bitflags = { version = "2.10.0", features = ["serde"] } trie-rs = "0.4.2" +indexmap = "2.12.1" [dev-dependencies] criterion = { version = "0.8.1", default-features = false } From 03b1f5b531d0c6842a7a77fcd9c46c431ca5291d Mon Sep 17 00:00:00 2001 From: 86xsk <200443667+86xsk@users.noreply.github.com> Date: Thu, 29 Jan 2026 15:53:28 -0600 Subject: [PATCH 06/18] feat(core)!: more explicit handling of case-sensitivity in dictionaries - Splits `WordId` into `CanonicalWordId` and `CaseFoldedWordId`. - Updates dictionary functions to more explicitly handle casing. There are now functions to get a specific word case-sensitively, multiple words case-insensitively, and ditto but merge all metadata (which was the old behavior). - Fixes issue where `SpellCheck` would sometimes mark words as incorrect if an identical entry with different casing existed in the dictionary (e.g. OS, PR, etc.). - Makes `SpellCheck` no longer care about casing, since that is handled by `OrthographicConsistency`. --- harper-cli/src/main.rs | 19 ++- harper-core/src/dict_word_metadata.rs | 16 ++- harper-core/src/document.rs | 2 +- harper-core/src/expr/mergeable_words.rs | 6 +- .../src/linting/inflected_verb_after_to.rs | 4 +- .../src/linting/mass_nouns/mass_plurals.rs | 4 +- harper-core/src/linting/more_adjective.rs | 2 +- .../src/linting/one_of_the_singular.rs | 8 +- .../src/linting/orthographic_consistency.rs | 14 +- .../linting/phrasal_verb_as_compound_noun.rs | 4 +- .../src/linting/pronoun_verb_agreement.rs | 4 +- .../src/linting/sentence_capitalization.rs | 8 +- harper-core/src/linting/spell_check.rs | 13 +- harper-core/src/linting/split_words.rs | 4 +- harper-core/src/linting/transposed_space.rs | 17 ++- harper-core/src/patterns/derived_from.rs | 61 ++++++--- harper-core/src/spell/dictionary.rs | 51 ++++++-- harper-core/src/spell/fst_dictionary.rs | 60 +++++---- harper-core/src/spell/merged_dictionary.rs | 48 ++++--- harper-core/src/spell/mod.rs | 2 +- harper-core/src/spell/mutable_dictionary.rs | 105 +++++++++------ harper-core/src/spell/rune/attribute_list.rs | 25 ++-- harper-core/src/spell/rune/mod.rs | 9 +- harper-core/src/spell/trie_dictionary.rs | 20 +-- harper-core/src/spell/word_id.rs | 114 ++++++++++++++++- harper-core/src/spell/word_map.rs | 120 +++++++++++------- harper-core/src/thesaurus_helper.rs | 2 +- harper-core/src/title_case.rs | 7 +- 28 files changed, 510 insertions(+), 239 deletions(-) diff --git a/harper-cli/src/main.rs b/harper-cli/src/main.rs index 6d55e5ade..3a655fc0a 100644 --- a/harper-cli/src/main.rs +++ b/harper-cli/src/main.rs @@ -1,6 +1,6 @@ #![doc = include_str!("../README.md")] -use harper_core::spell::{Dictionary, FstDictionary, MutableDictionary, WordId}; +use harper_core::spell::{CanonicalWordId, Dictionary, FstDictionary, MutableDictionary}; use hashbrown::HashMap; use std::collections::BTreeMap; use std::fs::File; @@ -349,7 +349,7 @@ fn main() -> anyhow::Result<()> { ]; for word in words { - let meta = curated_dictionary.get_word_metadata_str(&word); + let meta = curated_dictionary.get_word_metadata_str_exact(&word); let (flags, emojis) = meta.as_ref().map_or_else( || (String::new(), String::new()), |md| { @@ -845,7 +845,7 @@ fn main() -> anyhow::Result<()> { let mut processed_words = HashMap::new(); let mut longest_word = 0; for word in curated_dictionary.words_iter() { - if let Some(metadata) = curated_dictionary.get_word_metadata(word) { + if let Some(metadata) = curated_dictionary.get_word_metadata_exact(word) { let orth = metadata.orth_info; let bits = orth.bits() & case_bitmask.bits(); @@ -947,11 +947,16 @@ fn line_to_parts(line: &str) -> (String, String) { fn print_word_derivations(word: &str, annot: &str, dictionary: &impl Dictionary) { println!("{word}/{annot}"); - let id = WordId::from_word_str(word); + let id = CanonicalWordId::from_word_str(word); - let children = dictionary - .words_iter() - .filter(|e| dictionary.get_word_metadata(e).unwrap().derived_from == Some(id)); + let children = dictionary.words_iter().filter(|e| { + dictionary + .get_word_metadata_exact(e) + .unwrap() + .derived_from + .map(|derived_from| derived_from.canonical()) + == Some(id) + }); println!(" - {word}"); diff --git a/harper-core/src/dict_word_metadata.rs b/harper-core/src/dict_word_metadata.rs index a4d2ab59f..0e7191ad2 100644 --- a/harper-core/src/dict_word_metadata.rs +++ b/harper-core/src/dict_word_metadata.rs @@ -10,7 +10,7 @@ use strum_macros::{Display, EnumCount, EnumIter, EnumString, VariantArray}; use std::convert::TryFrom; use crate::dict_word_metadata_orthography::OrthFlags; -use crate::spell::WordId; +use crate::spell::WordIdPair; use crate::{Document, TokenKind, TokenStringExt}; /// This represents a "lexeme" or "headword" which is case-folded but affix-expanded. @@ -45,7 +45,7 @@ pub struct DictWordMetadata { #[serde(default = "default_false")] pub common: bool, #[serde(default = "default_none")] - pub derived_from: Option, + pub derived_from: Option, /// Generated by a chunker. Declares whether the word is a member of a nominal phrase. Using /// this should be preferred over the similarly named `Pattern`. /// @@ -1199,15 +1199,19 @@ impl Default for DialectFlags { #[cfg(test)] pub mod tests { + use std::borrow::Cow; + use std::sync::{Arc, LazyLock}; + use crate::DictWordMetadata; use crate::spell::{Dictionary, FstDictionary}; // Helper function to get metadata from the curated dictionary - pub fn md(word: &str) -> DictWordMetadata { - FstDictionary::curated() - .get_word_metadata_str(word) + pub fn md(word: &str) -> Cow<'_, DictWordMetadata> { + static CURATED_DICT: LazyLock> = LazyLock::new(FstDictionary::curated); + + CURATED_DICT + .get_word_metadata_combined_str(word) .unwrap_or_else(|| panic!("Word '{word}' not found in dictionary")) - .into_owned() } mod dialect { diff --git a/harper-core/src/document.rs b/harper-core/src/document.rs index 174366c7a..24df866a6 100644 --- a/harper-core/src/document.rs +++ b/harper-core/src/document.rs @@ -180,7 +180,7 @@ impl Document { if let TokenKind::Word(meta) = &mut token.kind { let word_source = token.span.get_content(&self.source); let mut found_meta = dictionary - .get_word_metadata(word_source) + .get_word_metadata_combined(word_source) .map(|c| c.into_owned()); if let Some(inner) = &mut found_meta { diff --git a/harper-core/src/expr/mergeable_words.rs b/harper-core/src/expr/mergeable_words.rs index a37ff5f09..73a6d17a0 100644 --- a/harper-core/src/expr/mergeable_words.rs +++ b/harper-core/src/expr/mergeable_words.rs @@ -49,13 +49,13 @@ impl MergeableWords { let mut compound = a_chars.clone(); compound.push(' '); compound.extend_from_slice(&b_chars); - let meta_open = self.dict.get_word_metadata(&compound); + let meta_open = self.dict.get_word_metadata(&compound).first().copied(); // Then check if the closed compound exists in the dictionary compound.remove(a_chars.len()); - let meta_closed = self.dict.get_word_metadata(&compound); + let meta_closed = self.dict.get_word_metadata(&compound).first().copied(); - if (self.predicate)(meta_closed.as_deref(), meta_open.as_deref()) { + if (self.predicate)(meta_closed, meta_open) { return Some(compound); } diff --git a/harper-core/src/linting/inflected_verb_after_to.rs b/harper-core/src/linting/inflected_verb_after_to.rs index 41ab1f1c5..afc6c3e28 100644 --- a/harper-core/src/linting/inflected_verb_after_to.rs +++ b/harper-core/src/linting/inflected_verb_after_to.rs @@ -42,7 +42,7 @@ impl Linter for InflectedVerbAfterTo { } let check_stem = |stem: &[char]| { - if let Some(metadata) = self.dictionary.get_word_metadata(stem) + if let Some(metadata) = self.dictionary.get_word_metadata_combined(stem) && metadata.is_verb() && !metadata.is_noun() { @@ -79,7 +79,7 @@ impl Linter for InflectedVerbAfterTo { let ed_specific_heuristics = || { if let Some(prev) = document.get_next_word_from_offset(pi, -1) { let prev_chars = document.get_span_content(&prev.span); - if let Some(metadata) = self.dictionary.get_word_metadata(prev_chars) { + if let Some(metadata) = self.dictionary.get_word_metadata_combined(prev_chars) { // adj: "able to" expects an infinitive verb // verb: "have/had/has/having to" expect an infinitive verb if metadata.is_adjective() || metadata.is_verb() { diff --git a/harper-core/src/linting/mass_nouns/mass_plurals.rs b/harper-core/src/linting/mass_nouns/mass_plurals.rs index 757016e66..c4b4e510b 100644 --- a/harper-core/src/linting/mass_nouns/mass_plurals.rs +++ b/harper-core/src/linting/mass_nouns/mass_plurals.rs @@ -43,13 +43,13 @@ where fn is_mass_noun_in_dictionary(&self, chars: &[char]) -> bool { self.dict - .get_word_metadata(chars) + .get_word_metadata_combined(chars) .is_some_and(|wmd| wmd.is_mass_noun_only()) } fn is_mass_noun_in_dictionary_str(&self, s: &str) -> bool { self.dict - .get_word_metadata_str(s) + .get_word_metadata_combined_str(s) .is_some_and(|wmd| wmd.is_mass_noun_only()) } } diff --git a/harper-core/src/linting/more_adjective.rs b/harper-core/src/linting/more_adjective.rs index 97b0fd3f9..c96aa3b61 100644 --- a/harper-core/src/linting/more_adjective.rs +++ b/harper-core/src/linting/more_adjective.rs @@ -28,7 +28,7 @@ where } fn add_valid_candidate(&self, candidates: &mut Vec, candidate: String) -> bool { - if let Some(metadata) = self.dict.get_word_metadata_str(&candidate) + if let Some(metadata) = self.dict.get_word_metadata_str_exact(&candidate) && (metadata.is_comparative_adjective() || metadata.is_superlative_adjective()) { candidates.push(candidate); diff --git a/harper-core/src/linting/one_of_the_singular.rs b/harper-core/src/linting/one_of_the_singular.rs index 5b209d5e9..1dc8564ca 100644 --- a/harper-core/src/linting/one_of_the_singular.rs +++ b/harper-core/src/linting/one_of_the_singular.rs @@ -98,14 +98,14 @@ impl ExprLinter for OneOfTheSingular { if self .dict - .get_word_metadata(&plural_s) + .get_word_metadata_combined(&plural_s) .is_some_and(|m| m.is_plural_noun()) { suggestions.push(Suggestion::replace_with_match_case(plural_s, singular)); } if self .dict - .get_word_metadata(&plural_es) + .get_word_metadata_combined(&plural_es) .is_some_and(|m| m.is_plural_noun()) { suggestions.push(Suggestion::replace_with_match_case(plural_es, singular)); @@ -117,7 +117,7 @@ impl ExprLinter for OneOfTheSingular { plural_ies.extend(['i', 'e', 's']); if self .dict - .get_word_metadata(&plural_ies) + .get_word_metadata_combined(&plural_ies) .is_some_and(|m| m.is_plural_noun()) { suggestions.push(Suggestion::replace_with_match_case(plural_ies, singular)); @@ -130,7 +130,7 @@ impl ExprLinter for OneOfTheSingular { plural_ves.extend(['v', 'e', 's']); if self .dict - .get_word_metadata(&plural_ves) + .get_word_metadata_combined(&plural_ves) .is_some_and(|m| m.is_plural_noun()) { suggestions.push(Suggestion::replace_with_match_case(plural_ves, singular)); diff --git a/harper-core/src/linting/orthographic_consistency.rs b/harper-core/src/linting/orthographic_consistency.rs index 5a685fb24..72509b19e 100644 --- a/harper-core/src/linting/orthographic_consistency.rs +++ b/harper-core/src/linting/orthographic_consistency.rs @@ -1,3 +1,5 @@ +use itertools::Itertools; + use crate::linting::{LintKind, Suggestion}; use std::sync::Arc; @@ -98,7 +100,11 @@ impl ExprLinter for OrthographicConsistency { .filter(|flag| canonical_flags.contains(*flag) != cur_flags.contains(*flag)) .count() == 1 - && let Some(canonical) = self.dict.get_correct_capitalization_of(chars) + && let Ok(canonical) = self + .dict + .get_correct_capitalization_of(chars) + .into_iter() + .exactly_one() && alphabetic_differs(canonical, chars) { return Some(Lint { @@ -115,7 +121,11 @@ impl ExprLinter for OrthographicConsistency { if metadata.is_titlecase() && cur_flags.contains(OrthFlags::LOWERCASE) - && let Some(canonical) = self.dict.get_correct_capitalization_of(chars) + && let Ok(canonical) = self + .dict + .get_correct_capitalization_of(chars) + .into_iter() + .exactly_one() && alphabetic_differs(canonical, chars) { return Some(Lint { diff --git a/harper-core/src/linting/phrasal_verb_as_compound_noun.rs b/harper-core/src/linting/phrasal_verb_as_compound_noun.rs index a739e5bb3..874d0aba4 100644 --- a/harper-core/src/linting/phrasal_verb_as_compound_noun.rs +++ b/harper-core/src/linting/phrasal_verb_as_compound_noun.rs @@ -103,10 +103,10 @@ impl Linter for PhrasalVerbAsCompoundNoun { // So far we only have a small number of phrasal verbs in the dictionary. let (verb_part_is_verb, phrasal_verb_is_verb) = ( self.dict - .get_word_metadata(verb_part) + .get_word_metadata_combined(verb_part) .is_some_and(|md| md.verb.is_some()), self.dict - .get_word_metadata_str(&phrasal_verb) + .get_word_metadata_combined_str(&phrasal_verb) .is_some_and(|md| md.verb.is_some()), ); diff --git a/harper-core/src/linting/pronoun_verb_agreement.rs b/harper-core/src/linting/pronoun_verb_agreement.rs index d57bda90c..4eddcdd60 100644 --- a/harper-core/src/linting/pronoun_verb_agreement.rs +++ b/harper-core/src/linting/pronoun_verb_agreement.rs @@ -129,7 +129,7 @@ where .iter() .filter(|&w| { self.dict - .get_word_metadata(w) + .get_word_metadata_exact(w) .is_some_and(|md| md.is_verb_lemma()) }) .map(|w| w.to_vec()) @@ -161,7 +161,7 @@ where .iter() .filter(|&w| { self.dict - .get_word_metadata(w) + .get_word_metadata_exact(w) .is_some_and(|md| md.is_verb_third_person_singular_present_form()) }) .map(|w| w.to_vec()) diff --git a/harper-core/src/linting/sentence_capitalization.rs b/harper-core/src/linting/sentence_capitalization.rs index 13805a297..936004f52 100644 --- a/harper-core/src/linting/sentence_capitalization.rs +++ b/harper-core/src/linting/sentence_capitalization.rs @@ -37,7 +37,7 @@ impl Linter for SentenceCapitalization { } } - for sentence in paragraph.iter_sentences() { + 'sentence: for sentence in paragraph.iter_sentences() { if !is_full_sentence(sentence) { continue; } @@ -53,12 +53,12 @@ impl Linter for SentenceCapitalization { && first_char.is_alphabetic() && !first_char.is_uppercase() { - if let Some(canonical_spelling) = + for canonical_spelling in self.dictionary.get_correct_capitalization_of(word_chars) { // Skip if it's a proper noun or contains uppercase letters before a separator if first_word.kind.is_proper_noun() { - continue; + continue 'sentence; } // Check for uppercase letters in the rest of the word before any separators @@ -68,7 +68,7 @@ impl Linter for SentenceCapitalization { .take_while(|&c| !c.is_whitespace() && *c != '-' && *c != '\'') .any(|&c| c.is_uppercase()) { - continue; + continue 'sentence; } } diff --git a/harper-core/src/linting/spell_check.rs b/harper-core/src/linting/spell_check.rs index e52bf7ede..ff54a9fba 100644 --- a/harper-core/src/linting/spell_check.rs +++ b/harper-core/src/linting/spell_check.rs @@ -7,7 +7,7 @@ use super::Suggestion; use super::{Lint, LintKind, Linter}; use crate::document::Document; use crate::spell::{Dictionary, suggest_correct_spelling}; -use crate::{CharString, CharStringExt, Dialect, TokenStringExt}; +use crate::{CharString, Dialect, TokenStringExt}; pub struct SpellCheck where @@ -47,10 +47,10 @@ impl SpellCheck { .filter(|v| { // Ignore entries outside the configured dialect self.dictionary - .get_word_metadata(v) - .unwrap() - .dialects - .is_dialect_enabled(self.dialect) + .get_word_metadata_combined(v) + .is_some_and(|word_meta| { + word_meta.dialects.is_dialect_enabled(self.dialect) + }) }) .map(|v| v.to_smallvec()) .take(Self::MAX_SUGGESTIONS) @@ -75,8 +75,7 @@ impl Linter for SpellCheck { if let Some(metadata) = word.kind.as_word().unwrap() && metadata.dialects.is_dialect_enabled(self.dialect) - && (self.dictionary.contains_exact_word(word_chars) - || self.dictionary.contains_exact_word(&word_chars.to_lower())) + && self.dictionary.contains_word(word_chars) { continue; }; diff --git a/harper-core/src/linting/split_words.rs b/harper-core/src/linting/split_words.rs index 26fc5caa3..dc4935c94 100644 --- a/harper-core/src/linting/split_words.rs +++ b/harper-core/src/linting/split_words.rs @@ -57,14 +57,14 @@ impl ExprLinter for SplitWords { continue; } - let cand_meta = self.dict.get_word_metadata(&candidate).unwrap(); + let cand_meta = self.dict.get_word_metadata_exact(&candidate).unwrap(); if !cand_meta.common { continue; } // The potential word that completes the compound let remainder = &chars[candidate.len()..]; - if let Some(rem_meta) = self.dict.get_word_metadata(remainder) + if let Some(rem_meta) = self.dict.get_word_metadata_exact(remainder) && rem_meta.common { let candidate_chars = candidate.as_ref(); diff --git a/harper-core/src/linting/transposed_space.rs b/harper-core/src/linting/transposed_space.rs index 232ad2b0c..068209969 100644 --- a/harper-core/src/linting/transposed_space.rs +++ b/harper-core/src/linting/transposed_space.rs @@ -78,13 +78,14 @@ impl ExprLinter for TransposedSpace { // "thec" "at" -> "the cat" if self.dict.contains_word(w1_start) && self.dict.contains_word(&w1_last_plus_w2) { let maybe_canon_w2 = self.dict.get_correct_capitalization_of(&w1_last_plus_w2); - if let Some(canon_w1) = self.dict.get_correct_capitalization_of(w1_start) { - if let Some(canon_w2) = maybe_canon_w2 { + + if let Some(canon_w1) = self.dict.get_correct_capitalization_of(w1_start).first() { + if let Some(canon_w2) = maybe_canon_w2.first() { keep_unique(&mut values, canon_w1, canon_w2); } else { keep_unique(&mut values, canon_w1, &w1_last_plus_w2); } - } else if let Some(canon_w2) = maybe_canon_w2 { + } else if let Some(canon_w2) = maybe_canon_w2.first() { keep_unique(&mut values, w1_start, canon_w2); } @@ -94,13 +95,17 @@ impl ExprLinter for TransposedSpace { // "th" "ecat" -> "the cat" if self.dict.contains_word(&w1_plus_w2_first) && self.dict.contains_word(w2_end) { let maybe_canon_w2 = self.dict.get_correct_capitalization_of(w2_end); - if let Some(canon_w1) = self.dict.get_correct_capitalization_of(&w1_plus_w2_first) { - if let Some(canon_w2) = maybe_canon_w2 { + if let Some(canon_w1) = self + .dict + .get_correct_capitalization_of(&w1_plus_w2_first) + .first() + { + if let Some(canon_w2) = maybe_canon_w2.first() { keep_unique(&mut values, canon_w1, canon_w2); } else { keep_unique(&mut values, canon_w1, w2_end); } - } else if let Some(canon_w2) = maybe_canon_w2 { + } else if let Some(canon_w2) = maybe_canon_w2.first() { keep_unique(&mut values, &w1_plus_w2_first, canon_w2); } diff --git a/harper-core/src/patterns/derived_from.rs b/harper-core/src/patterns/derived_from.rs index f1d818665..240917dab 100644 --- a/harper-core/src/patterns/derived_from.rs +++ b/harper-core/src/patterns/derived_from.rs @@ -1,4 +1,6 @@ -use crate::spell::WordId; +use crate::spell::{ + CanonicalWordId, CaseFoldedWordId, Dictionary, EitherWordId, MutableDictionary, +}; use super::Pattern; @@ -7,19 +9,27 @@ use super::Pattern; /// /// For example, this will match "call" as well as "recall", "calling", etc. pub struct DerivedFrom { - word_id: WordId, + word_id: EitherWordId, } impl DerivedFrom { pub fn new_from_str(word: &str) -> DerivedFrom { - Self::new(WordId::from_word_str(word)) + Self::new(EitherWordId::from_str_case_folded(word)) } pub fn new_from_chars(word: &[char]) -> DerivedFrom { - Self::new(WordId::from_word_chars(word)) + Self::new(EitherWordId::from_chars_case_folded(word)) } - pub fn new(word_id: WordId) -> Self { + pub fn new_from_str_exact(word: &str) -> DerivedFrom { + Self::new(EitherWordId::from_str_canonical(word)) + } + + pub fn new_from_chars_exact(word: &[char]) -> DerivedFrom { + Self::new(EitherWordId::from_chars_canonical(word)) + } + + pub fn new(word_id: EitherWordId) -> Self { Self { word_id } } } @@ -27,19 +37,40 @@ impl DerivedFrom { impl Pattern for DerivedFrom { fn matches(&self, tokens: &[crate::Token], source: &[char]) -> Option { let tok = tokens.first()?; - let metadata = tok.kind.as_word()?.as_ref()?; + let chars = tok.span.get_content(source); - if metadata.derived_from == Some(self.word_id) { - return Some(1); - } + match self.word_id { + EitherWordId::Canonical(canonical_word_id) => { + let tok_derived_from_canonical = tok + .kind + .as_word()? + .as_ref() + .and_then(|meta| meta.derived_from)? + .canonical(); - let chars = tok.span.get_content(source); - let word_id = WordId::from_word_chars(chars); + if CanonicalWordId::from_word_chars(chars) == canonical_word_id + || tok_derived_from_canonical == canonical_word_id + { + Some(1) + } else { + None + } + } + EitherWordId::CaseFolded(case_folded_word_id) => { + let dict = MutableDictionary::curated(); - if word_id == self.word_id { - return Some(1); + if CaseFoldedWordId::from_word_chars(chars) == case_folded_word_id + || dict + .get_word_metadata(chars) + .into_iter() + .filter_map(|word_meta| word_meta.derived_from) + .any(|word_id_pair| word_id_pair.case_folded() == case_folded_word_id) + { + Some(1) + } else { + None + } + } } - - None } } diff --git a/harper-core/src/spell/dictionary.rs b/harper-core/src/spell/dictionary.rs index f06a8f92b..a616a2dd5 100644 --- a/harper-core/src/spell/dictionary.rs +++ b/harper-core/src/spell/dictionary.rs @@ -1,8 +1,8 @@ -use blanket::blanket; use std::borrow::Cow; +use blanket::blanket; + use super::FuzzyMatchResult; -use super::WordId; use crate::DictWordMetadata; /// An in-memory database that contains everything necessary to parse and analyze English text. @@ -32,13 +32,29 @@ pub trait Dictionary: Send + Sync { max_distance: u8, max_results: usize, ) -> Vec>; - fn get_correct_capitalization_of(&self, word: &[char]) -> Option<&'_ [char]>; + + /// Get the correct canonical capitalizations for the given word. + fn get_correct_capitalization_of(&self, word: &[char]) -> Vec<&'_ [char]>; + /// Get the associated [`DictWordMetadata`] for any capitalization of a given word. - fn get_word_metadata(&self, word: &[char]) -> Option>; + /// + /// Since the dictionary might contain words that differ only in capitalization, this may + /// return multiple entries. + fn get_word_metadata(&self, word: &[char]) -> Vec<&DictWordMetadata>; + + /// Get the associated [`DictWordMetadata`] for this specific capitalization of the given word. + fn get_word_metadata_exact(&self, word: &[char]) -> Option<&DictWordMetadata>; + /// Get the associated [`DictWordMetadata`] for any capitalization of a given word. /// If the word isn't in the dictionary, the resulting metadata will be /// empty. - fn get_word_metadata_str(&self, word: &str) -> Option>; + /// + /// Since the dictionary might contain words that differ only in capitalization, this may + /// return multiple entries. + fn get_word_metadata_str(&self, word: &str) -> Vec<&DictWordMetadata>; + + /// Get the associated [`DictWordMetadata`] for this specific capitalization of the given word. + fn get_word_metadata_str_exact(&self, word: &str) -> Option<&DictWordMetadata>; /// Iterate over the words in the dictionary. fn words_iter(&self) -> Box + Send + '_>; @@ -46,12 +62,31 @@ pub trait Dictionary: Send + Sync { /// The number of words in the dictionary. fn word_count(&self) -> usize; - /// Returns the correct capitalization of the word with the given ID. - fn get_word_from_id(&self, id: &WordId) -> Option<&[char]>; - /// Look for words with a specific prefix fn find_words_with_prefix(&self, prefix: &[char]) -> Vec>; /// Look for words that share a prefix with the provided word fn find_words_with_common_prefix(&self, word: &[char]) -> Vec>; + + /// Search for a word's metadata case-insensitively, then merge all the results into one + /// [`DictWordMetadata`]. + fn get_word_metadata_combined(&self, word: &[char]) -> Option> { + let found_words = self.get_word_metadata(word); + + match found_words.len() { + 0 => None, + 1 => Some(Cow::Borrowed(found_words[0])), + _ => Some(Cow::Owned({ + found_words + .iter() + .fold(found_words[0].to_owned(), |acc, word| acc.or(word)) + })), + } + } + + /// Search for a word's metadata case-insensitively, then merge all the results into one + /// [`DictWordMetadata`]. + fn get_word_metadata_combined_str(&self, word: &str) -> Option> { + self.get_word_metadata_combined(&word.chars().collect::>()) + } } diff --git a/harper-core/src/spell/fst_dictionary.rs b/harper-core/src/spell/fst_dictionary.rs index fc7aa9914..d2e52338e 100644 --- a/harper-core/src/spell/fst_dictionary.rs +++ b/harper-core/src/spell/fst_dictionary.rs @@ -1,4 +1,4 @@ -use super::{MutableDictionary, WordId}; +use super::MutableDictionary; use fst::{IntoStreamer, Map as FstMap, Streamer, map::StreamWithState}; use hashbrown::HashMap; use lazy_static::lazy_static; @@ -122,14 +122,22 @@ impl Dictionary for FstDictionary { self.mutable_dict.contains_word_str(word) } - fn get_word_metadata(&self, word: &[char]) -> Option> { + fn get_word_metadata(&self, word: &[char]) -> Vec<&DictWordMetadata> { self.mutable_dict.get_word_metadata(word) } - fn get_word_metadata_str(&self, word: &str) -> Option> { + fn get_word_metadata_exact(&self, word: &[char]) -> Option<&DictWordMetadata> { + self.mutable_dict.get_word_metadata_exact(word) + } + + fn get_word_metadata_str(&self, word: &str) -> Vec<&DictWordMetadata> { self.mutable_dict.get_word_metadata_str(word) } + fn get_word_metadata_str_exact(&self, word: &str) -> Option<&DictWordMetadata> { + self.mutable_dict.get_word_metadata_str_exact(word) + } + fn fuzzy_match( &'_ self, word: &[char], @@ -198,6 +206,10 @@ impl Dictionary for FstDictionary { ) } + fn get_correct_capitalization_of(&self, word: &[char]) -> Vec<&'_ [char]> { + self.mutable_dict.get_correct_capitalization_of(word) + } + fn words_iter(&self) -> Box + Send + '_> { self.mutable_dict.words_iter() } @@ -214,14 +226,6 @@ impl Dictionary for FstDictionary { self.mutable_dict.contains_exact_word_str(word) } - fn get_correct_capitalization_of(&self, word: &[char]) -> Option<&'_ [char]> { - self.mutable_dict.get_correct_capitalization_of(word) - } - - fn get_word_from_id(&self, id: &WordId) -> Option<&[char]> { - self.mutable_dict.get_word_from_id(id) - } - fn find_words_with_prefix(&self, prefix: &[char]) -> Vec> { self.mutable_dict.find_words_with_prefix(prefix) } @@ -236,7 +240,7 @@ mod tests { use itertools::Itertools; use crate::CharStringExt; - use crate::spell::{Dictionary, WordId}; + use crate::spell::{CanonicalWordId, Dictionary}; use super::FstDictionary; @@ -296,7 +300,7 @@ mod tests { fn on_is_not_nominal() { let dict = FstDictionary::curated(); - assert!(!dict.get_word_metadata_str("on").unwrap().is_nominal()); + assert!(!dict.get_word_metadata_str_exact("on").unwrap().is_nominal()); } #[test] @@ -329,7 +333,7 @@ mod tests { for contraction in contractions { dbg!(contraction); assert!( - dict.get_word_metadata_str(contraction) + dict.get_word_metadata_str_exact(contraction) .unwrap() .derived_from .is_none() @@ -342,11 +346,12 @@ mod tests { let dict = FstDictionary::curated(); assert_eq!( - dict.get_word_metadata_str("llamas") + dict.get_word_metadata_str_exact("llamas") .unwrap() .derived_from - .unwrap(), - WordId::from_word_str("llama") + .unwrap() + .canonical(), + CanonicalWordId::from_word_str("llama") ) } @@ -355,11 +360,12 @@ mod tests { let dict = FstDictionary::curated(); assert_eq!( - dict.get_word_metadata_str("cats") + dict.get_word_metadata_str_exact("cats") .unwrap() .derived_from - .unwrap(), - WordId::from_word_str("cat") + .unwrap() + .canonical(), + CanonicalWordId::from_word_str("cat") ); } @@ -368,11 +374,12 @@ mod tests { let dict = FstDictionary::curated(); assert_eq!( - dict.get_word_metadata_str("unhappy") + dict.get_word_metadata_str_exact("unhappy") .unwrap() .derived_from - .unwrap(), - WordId::from_word_str("happy") + .unwrap() + .canonical(), + CanonicalWordId::from_word_str("happy") ); } @@ -381,11 +388,12 @@ mod tests { let dict = FstDictionary::curated(); assert_eq!( - dict.get_word_metadata_str("quickly") + dict.get_word_metadata_str_exact("quickly") .unwrap() .derived_from - .unwrap(), - WordId::from_word_str("quick") + .unwrap() + .canonical(), + CanonicalWordId::from_word_str("quick") ); } } diff --git a/harper-core/src/spell/merged_dictionary.rs b/harper-core/src/spell/merged_dictionary.rs index 3c144e176..35d18c323 100644 --- a/harper-core/src/spell/merged_dictionary.rs +++ b/harper-core/src/spell/merged_dictionary.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use foldhash::quality::FixedState; use itertools::Itertools; -use super::{FstDictionary, WordId}; +use super::FstDictionary; use super::{FuzzyMatchResult, dictionary::Dictionary}; use crate::{CharString, DictWordMetadata}; @@ -67,15 +67,6 @@ impl Default for MergedDictionary { } impl Dictionary for MergedDictionary { - fn get_correct_capitalization_of(&self, word: &[char]) -> Option<&'_ [char]> { - for child in &self.children { - if let Some(word) = child.get_correct_capitalization_of(word) { - return Some(word); - } - } - None - } - fn contains_word(&self, word: &[char]) -> bool { for child in &self.children { if child.contains_word(word) { @@ -94,11 +85,20 @@ impl Dictionary for MergedDictionary { false } - fn get_word_metadata(&self, word: &[char]) -> Option> { + fn get_word_metadata(&self, word: &[char]) -> Vec<&DictWordMetadata> { self.children .iter() - .filter_map(|d| d.get_word_metadata(word)) - .reduce(|acc, md| Cow::Owned(acc.or(&md))) + .flat_map(|d| d.get_word_metadata(word)) + .collect() + } + + fn get_word_metadata_exact(&self, word: &[char]) -> Option<&DictWordMetadata> { + for child in &self.children { + if let Some(dict_word_metadata) = child.get_word_metadata_exact(word) { + return Some(dict_word_metadata); + } + } + None } fn words_iter(&self) -> Box + Send + '_> { @@ -112,14 +112,19 @@ impl Dictionary for MergedDictionary { fn contains_exact_word_str(&self, word: &str) -> bool { let chars: CharString = word.chars().collect(); - self.contains_word(&chars) + self.contains_exact_word(&chars) } - fn get_word_metadata_str(&self, word: &str) -> Option> { + fn get_word_metadata_str(&self, word: &str) -> Vec<&DictWordMetadata> { let chars: CharString = word.chars().collect(); self.get_word_metadata(&chars) } + fn get_word_metadata_str_exact(&self, word: &str) -> Option<&DictWordMetadata> { + let chars: CharString = word.chars().collect(); + self.get_word_metadata_exact(&chars) + } + fn fuzzy_match( &'_ self, word: &[char], @@ -152,14 +157,15 @@ impl Dictionary for MergedDictionary { .collect() } - fn word_count(&self) -> usize { - self.children.iter().map(|d| d.word_count()).sum() - } - - fn get_word_from_id(&self, id: &WordId) -> Option<&[char]> { + fn get_correct_capitalization_of(&self, word: &[char]) -> Vec<&'_ [char]> { self.children .iter() - .find_map(|dict| dict.get_word_from_id(id)) + .flat_map(|child| child.get_correct_capitalization_of(word)) + .collect() + } + + fn word_count(&self) -> usize { + self.children.iter().map(|d| d.word_count()).sum() } fn find_words_with_prefix(&self, prefix: &[char]) -> Vec> { diff --git a/harper-core/src/spell/mod.rs b/harper-core/src/spell/mod.rs index bb5fccd83..8d63a6dca 100644 --- a/harper-core/src/spell/mod.rs +++ b/harper-core/src/spell/mod.rs @@ -10,7 +10,7 @@ pub use self::fst_dictionary::FstDictionary; pub use self::merged_dictionary::MergedDictionary; pub use self::mutable_dictionary::MutableDictionary; pub use self::trie_dictionary::TrieDictionary; -pub use self::word_id::WordId; +pub use self::word_id::{CanonicalWordId, CaseFoldedWordId, EitherWordId, WordIdPair}; mod dictionary; mod fst_dictionary; diff --git a/harper-core/src/spell/mutable_dictionary.rs b/harper-core/src/spell/mutable_dictionary.rs index debf7afb8..25a155360 100644 --- a/harper-core/src/spell/mutable_dictionary.rs +++ b/harper-core/src/spell/mutable_dictionary.rs @@ -1,9 +1,9 @@ use super::{ - FstDictionary, WordId, + CanonicalWordId, FstDictionary, rune::{self, AttributeList, parse_word_list}, word_map::{WordMap, WordMapEntry}, }; -use crate::edit_distance::edit_distance_min_alloc; +use crate::{edit_distance::edit_distance_min_alloc, spell::CaseFoldedWordId}; use itertools::Itertools; use lazy_static::lazy_static; use std::borrow::Cow; @@ -108,14 +108,22 @@ impl Default for MutableDictionary { } impl Dictionary for MutableDictionary { - fn get_word_metadata(&self, word: &[char]) -> Option> { + fn get_word_metadata(&self, word: &[char]) -> Vec<&DictWordMetadata> { self.word_map - .get_with_chars(word) - .map(|v| Cow::Borrowed(&v.metadata)) + .get_case_folded(CaseFoldedWordId::from_word_chars(word)) + .map(|v| &v.metadata) + .collect() + } + + fn get_word_metadata_exact(&self, word: &[char]) -> Option<&DictWordMetadata> { + self.word_map + .get_canonical(CanonicalWordId::from_word_chars(word)) + .map(|word_map_entry| &word_map_entry.metadata) } fn contains_word(&self, word: &[char]) -> bool { - self.word_map.contains_chars(word) + self.word_map + .contains_case_folded(CaseFoldedWordId::from_word_chars(word)) } fn contains_word_str(&self, word: &str) -> bool { @@ -123,15 +131,14 @@ impl Dictionary for MutableDictionary { self.contains_word(&chars) } - fn get_word_metadata_str(&self, word: &str) -> Option> { + fn get_word_metadata_str(&self, word: &str) -> Vec<&DictWordMetadata> { let chars: CharString = word.chars().collect(); self.get_word_metadata(&chars) } - fn get_correct_capitalization_of(&self, word: &[char]) -> Option<&'_ [char]> { - self.word_map - .get_with_chars(word) - .map(|v| v.canonical_spelling.as_slice()) + fn get_word_metadata_str_exact(&self, word: &str) -> Option<&DictWordMetadata> { + let chars: CharString = word.chars().collect(); + self.get_word_metadata_exact(&chars) } /// Suggest a correct spelling for a given misspelled word. @@ -188,7 +195,7 @@ impl Dictionary for MutableDictionary { .map(|(word, edit_distance)| FuzzyMatchResult { word, edit_distance, - metadata: self.get_word_metadata(word).unwrap(), + metadata: Cow::Borrowed(self.get_word_metadata_exact(word).unwrap()), }) .collect() } @@ -203,6 +210,13 @@ impl Dictionary for MutableDictionary { self.fuzzy_match(&word, max_distance, max_results) } + fn get_correct_capitalization_of(&self, word: &[char]) -> Vec<&'_ [char]> { + self.word_map + .get_case_folded(CaseFoldedWordId::from_word_chars(word)) + .map(|word_map_entry| word_map_entry.canonical_spelling.as_slice()) + .collect() + } + fn words_iter(&self) -> Box + Send + '_> { Box::new( self.word_map @@ -216,15 +230,8 @@ impl Dictionary for MutableDictionary { } fn contains_exact_word(&self, word: &[char]) -> bool { - let normalized = word.normalized(); - - if let Some(found) = self.word_map.get_with_chars(normalized.as_ref()) - && found.canonical_spelling.as_ref() == normalized.as_ref() - { - return true; - } - - false + self.word_map + .contains_canonical(CanonicalWordId::from_word_chars(word)) } fn contains_exact_word_str(&self, word: &str) -> bool { @@ -232,10 +239,6 @@ impl Dictionary for MutableDictionary { self.contains_exact_word(word.as_ref()) } - fn get_word_from_id(&self, id: &WordId) -> Option<&[char]> { - self.word_map.get(id).map(|w| w.canonical_spelling.as_ref()) - } - fn find_words_with_prefix(&self, prefix: &[char]) -> Vec> { let mut found = Vec::new(); @@ -306,15 +309,18 @@ mod tests { #[test] fn this_is_determiner() { let dict = MutableDictionary::curated(); - assert!(dict.get_word_metadata_str("this").unwrap().is_determiner()); - assert!(dict.get_word_metadata_str("This").unwrap().is_determiner()); + assert!( + dict.get_word_metadata_str_exact("this") + .unwrap() + .is_determiner() + ); } #[test] fn several_is_quantifier() { let dict = MutableDictionary::curated(); assert!( - dict.get_word_metadata_str("several") + dict.get_word_metadata_str_exact("several") .unwrap() .is_quantifier() ); @@ -323,27 +329,41 @@ mod tests { #[test] fn few_is_quantifier() { let dict = MutableDictionary::curated(); - assert!(dict.get_word_metadata_str("few").unwrap().is_quantifier()); + assert!( + dict.get_word_metadata_str_exact("few") + .unwrap() + .is_quantifier() + ); } #[test] fn fewer_is_quantifier() { let dict = MutableDictionary::curated(); - assert!(dict.get_word_metadata_str("fewer").unwrap().is_quantifier()); + assert!( + dict.get_word_metadata_str_exact("fewer") + .unwrap() + .is_quantifier() + ); } #[test] fn than_is_conjunction() { let dict = MutableDictionary::curated(); - assert!(dict.get_word_metadata_str("than").unwrap().is_conjunction()); - assert!(dict.get_word_metadata_str("Than").unwrap().is_conjunction()); + assert!( + dict.get_word_metadata_str_exact("than") + .unwrap() + .is_conjunction() + ); } #[test] fn herself_is_pronoun() { let dict = MutableDictionary::curated(); - assert!(dict.get_word_metadata_str("herself").unwrap().is_pronoun()); - assert!(dict.get_word_metadata_str("Herself").unwrap().is_pronoun()); + assert!( + dict.get_word_metadata_str_exact("herself") + .unwrap() + .is_pronoun() + ); } #[test] @@ -355,7 +375,7 @@ mod tests { #[test] fn im_is_common() { let dict = MutableDictionary::curated(); - assert!(dict.get_word_metadata_str("I'm").unwrap().common); + assert!(dict.get_word_metadata_str_exact("I'm").unwrap().common); } #[test] @@ -375,9 +395,10 @@ mod tests { #[test] fn there_is_not_a_pronoun() { let dict = MutableDictionary::curated(); + let there_meta = dict.get_word_metadata_str_exact("there").unwrap(); - assert!(!dict.get_word_metadata_str("there").unwrap().is_nominal()); - assert!(!dict.get_word_metadata_str("there").unwrap().is_pronoun()); + assert!(!there_meta.is_nominal()); + assert!(!there_meta.is_pronoun()); } #[test] @@ -403,7 +424,7 @@ mod tests { fn curated_contains_possessive_abandonment() { assert!( MutableDictionary::curated() - .get_word_metadata_str("abandonment's") + .get_word_metadata_str_exact("abandonment's") .unwrap() .is_possessive_noun() ) @@ -413,7 +434,7 @@ mod tests { fn has_is_not_a_nominal() { let dict = MutableDictionary::curated(); - let has = dict.get_word_metadata_str("has"); + let has = dict.get_word_metadata_str_exact("has"); assert!(has.is_some()); assert!(!has.unwrap().is_nominal()) @@ -423,7 +444,7 @@ mod tests { fn is_is_linking_verb() { let dict = MutableDictionary::curated(); - let is = dict.get_word_metadata_str("is"); + let is = dict.get_word_metadata_str_exact("is"); assert!(is.is_some()); assert!(is.unwrap().is_linking_verb()); @@ -447,14 +468,14 @@ mod tests { fn apart_is_not_noun() { let dict = MutableDictionary::curated(); - assert!(!dict.get_word_metadata_str("apart").unwrap().is_noun()); + assert!(!dict.get_word_metadata_str_exact("apart").unwrap().is_noun()); } #[test] fn be_is_verb_lemma() { let dict = MutableDictionary::curated(); - let is = dict.get_word_metadata_str("be"); + let is = dict.get_word_metadata_str_exact("be"); assert!(is.is_some()); assert!(is.unwrap().is_verb_lemma()); diff --git a/harper-core/src/spell/rune/attribute_list.rs b/harper-core/src/spell/rune/attribute_list.rs index 575b739a1..0a10a4fb3 100644 --- a/harper-core/src/spell/rune/attribute_list.rs +++ b/harper-core/src/spell/rune/attribute_list.rs @@ -13,7 +13,7 @@ use super::expansion::{ }; use super::word_list::AnnotatedWord; use crate::dict_word_metadata_orthography::OrthFlags; -use crate::spell::WordId; +use crate::spell::{CanonicalWordId, WordIdPair}; use crate::{CharString, DictWordMetadata, Span}; #[derive(Debug, Clone)] @@ -149,17 +149,21 @@ impl AttributeList { word_map, ); // Update the metadata of the expanded word - let target_metadata = word_map.get_metadata_mut_chars(&new_word).unwrap(); + let target_metadata = word_map + .get_metadata_mut_canonical(CanonicalWordId::from_word_chars(new_word)) + .unwrap(); target_metadata.append(&metadata); target_metadata.derived_from = - Some(WordId::from_word_chars(&annotated_word.letters)); + Some(WordIdPair::from_word_chars(&annotated_word.letters)); } } else { // Simple case: no cross-product expansion needed for (key, mut value) in new_words.into_iter() { - value.derived_from = Some(WordId::from_word_chars(&annotated_word.letters)); + value.derived_from = Some(WordIdPair::from_word_chars(&annotated_word.letters)); - if let Some(existing_metadata) = word_map.get_metadata_mut_chars(&key) { + if let Some(existing_metadata) = + word_map.get_metadata_mut_canonical(CanonicalWordId::from_word_chars(&key)) + { // Merge with existing metadata existing_metadata.append(&value); } else { @@ -177,7 +181,9 @@ impl AttributeList { let mut full_metadata = base_metadata; // Merge with any existing metadata for this word - if let Some(existing_metadata) = word_map.get_with_chars(&annotated_word.letters) { + if let Some(existing_metadata) = + word_map.get_canonical(CanonicalWordId::from_word_chars(&annotated_word.letters)) + { full_metadata.append(&existing_metadata.metadata); } @@ -197,7 +203,7 @@ impl AttributeList { // Apply the conditional metadata word_map - .get_metadata_mut_chars(&letters) + .get_metadata_mut_canonical(CanonicalWordId::from_word_chars(letters)) .unwrap() .append(&metadata); } @@ -300,14 +306,15 @@ mod tests { #[test] fn proper_noun_property_propagates_to_plurals() { let fst_dict = FstDictionary::curated(); - if let Some(vw_plural) = fst_dict.get_word_metadata_str("Volkswagens") { + if let Some(vw_plural) = fst_dict.get_word_metadata_str_exact("Volkswagens") { assert!(vw_plural.is_proper_noun()); } } #[test] fn proper_noun_propagates_to_possessives_2327() { - if let Some(vw_possessive) = FstDictionary::curated().get_word_metadata_str("Volkswagen's") + if let Some(vw_possessive) = + FstDictionary::curated().get_word_metadata_str_exact("Volkswagen's") { assert!(vw_possessive.is_possessive_noun()); } diff --git a/harper-core/src/spell/rune/mod.rs b/harper-core/src/spell/rune/mod.rs index 7fc48dd55..67db6fbed 100644 --- a/harper-core/src/spell/rune/mod.rs +++ b/harper-core/src/spell/rune/mod.rs @@ -19,6 +19,7 @@ mod tests { use super::super::word_map::WordMap; use super::word_list::parse_word_list; use crate::CharStringExt; + use crate::spell::CanonicalWordId; use crate::spell::rune::AttributeList; pub const TEST_WORD_LIST: &str = "4\nhello\ntry/B\nwork/AB\nblank/"; @@ -208,10 +209,14 @@ mod tests { attributes.expand_annotated_words(words, &mut expanded); - let giant_data = expanded.get_with_str("giant").unwrap(); + let giant_data = expanded + .get_canonical(CanonicalWordId::from_word_str("giant")) + .unwrap(); assert!(giant_data.metadata.is_noun()); - let giants_data = expanded.get_with_str("giants").unwrap(); + let giants_data = expanded + .get_canonical(CanonicalWordId::from_word_str("giants")) + .unwrap(); assert!(giants_data.metadata.is_plural_noun()); } } diff --git a/harper-core/src/spell/trie_dictionary.rs b/harper-core/src/spell/trie_dictionary.rs index f7f6b62a6..eba45f858 100644 --- a/harper-core/src/spell/trie_dictionary.rs +++ b/harper-core/src/spell/trie_dictionary.rs @@ -7,7 +7,7 @@ use trie_rs::iter::{Keys, PrefixIter, SearchIter}; use crate::DictWordMetadata; -use super::{Dictionary, FstDictionary, FuzzyMatchResult, WordId}; +use super::{Dictionary, FstDictionary, FuzzyMatchResult}; /// A [`Dictionary`] optimized for pre- and postfix search. /// Wraps another dictionary to implement other operations. @@ -72,18 +72,26 @@ impl Dictionary for TrieDictionary { self.inner.fuzzy_match_str(word, max_distance, max_results) } - fn get_correct_capitalization_of(&self, word: &[char]) -> Option<&'_ [char]> { + fn get_correct_capitalization_of(&self, word: &[char]) -> Vec<&'_ [char]> { self.inner.get_correct_capitalization_of(word) } - fn get_word_metadata(&self, word: &[char]) -> Option> { + fn get_word_metadata(&self, word: &[char]) -> Vec<&DictWordMetadata> { self.inner.get_word_metadata(word) } - fn get_word_metadata_str(&self, word: &str) -> Option> { + fn get_word_metadata_exact(&self, word: &[char]) -> Option<&DictWordMetadata> { + self.inner.get_word_metadata_exact(word) + } + + fn get_word_metadata_str(&self, word: &str) -> Vec<&DictWordMetadata> { self.inner.get_word_metadata_str(word) } + fn get_word_metadata_str_exact(&self, word: &str) -> Option<&DictWordMetadata> { + self.inner.get_word_metadata_str_exact(word) + } + fn words_iter(&self) -> Box + Send + '_> { self.inner.words_iter() } @@ -92,10 +100,6 @@ impl Dictionary for TrieDictionary { self.inner.word_count() } - fn get_word_from_id(&self, id: &WordId) -> Option<&[char]> { - self.inner.get_word_from_id(id) - } - fn find_words_with_prefix(&self, prefix: &[char]) -> Vec> { let results: Keys, _>> = self.trie.predictive_search(prefix); diff --git a/harper-core/src/spell/word_id.rs b/harper-core/src/spell/word_id.rs index 9140a3911..91c23c421 100644 --- a/harper-core/src/spell/word_id.rs +++ b/harper-core/src/spell/word_id.rs @@ -1,3 +1,8 @@ +//! Identifiers for a words. +//! +//! These are meant for situations where you need to refer to a word (or a collection of words), +//! without storing all of accompanying data (like spelling or metadata). + use std::hash::BuildHasher; use foldhash::fast::FixedState; @@ -5,18 +10,41 @@ use serde::{Deserialize, Serialize}; use crate::{CharString, CharStringExt}; -/// An identifier for a particular word. +/// An identifier for a particular word with canonical casing. +#[derive(Hash, Copy, Clone, PartialEq, Eq, PartialOrd, Debug, Serialize, Deserialize)] +pub struct CanonicalWordId { + hash: u64, +} + +impl CanonicalWordId { + /// Create a Word ID from a character slice. + pub fn from_word_chars(chars: impl AsRef<[char]>) -> Self { + let hash = FixedState::default().hash_one(chars.as_ref()); + + Self { hash } + } + + /// Create a word ID from a string. + /// Requires allocation, so use sparingly. + pub fn from_word_str(text: impl AsRef) -> Self { + let chars: CharString = text.as_ref().chars().collect(); + Self::from_word_chars(chars) + } +} + +/// An identifier for a particular word with case-folded casing. /// -/// It works by hashing the word it represents, normalized to lowercase. -/// It is meant for situations where you need to refer to a word (or a collection of words), -/// without storing all of accompanying data (like spelling or metadata). +/// This does not usually point to a specific word, but rather a group of words that are identical +/// when lowercased. #[derive(Hash, Copy, Clone, PartialEq, Eq, PartialOrd, Debug, Serialize, Deserialize)] -pub struct WordId { +pub struct CaseFoldedWordId { hash: u64, } -impl WordId { +impl CaseFoldedWordId { /// Create a Word ID from a character slice. + /// + /// This will case-fold and normalize the input before calculating the word ID. pub fn from_word_chars(chars: impl AsRef<[char]>) -> Self { let normalized = chars.as_ref().normalized(); let lower = normalized.to_lower(); @@ -27,8 +55,82 @@ impl WordId { /// Create a word ID from a string. /// Requires allocation, so use sparingly. + /// + /// This will case-fold and normalize the input before calculating the word ID. pub fn from_word_str(text: impl AsRef) -> Self { let chars: CharString = text.as_ref().chars().collect(); Self::from_word_chars(chars) } } + +/// A pair containing both [`CanonicalWordId`] and [`CaseFoldedWordId`] for a given word. +#[derive(Hash, Copy, Clone, PartialEq, Eq, PartialOrd, Debug, Serialize, Deserialize)] +pub struct WordIdPair { + canonical: CanonicalWordId, + case_folded: CaseFoldedWordId, +} + +impl WordIdPair { + /// Create a Word ID pair from a character slice. + /// + /// Calculates both the canonical and case-folded word ID for the provided word. + pub fn from_word_chars(chars: impl AsRef<[char]>) -> Self { + Self { + canonical: CanonicalWordId::from_word_chars(&chars), + case_folded: CaseFoldedWordId::from_word_chars(&chars), + } + } + + /// Create a word ID pair from a string. + /// Requires allocation, so use sparingly. + /// + /// Calculates both the canonical and case-folded word ID for the provided word. + pub fn from_word_str(text: impl AsRef) -> Self { + let chars: CharString = text.as_ref().chars().collect(); + Self::from_word_chars(chars) + } + + /// The canonical ID of the word. + pub fn canonical(&self) -> CanonicalWordId { + self.canonical + } + + /// The case-folded ID of the word. + pub fn case_folded(&self) -> CaseFoldedWordId { + self.case_folded + } +} + +/// Represents either a canonical or case-folded word ID. +#[derive(Hash, Copy, Clone, PartialEq, Eq, PartialOrd, Debug, Serialize, Deserialize)] +pub enum EitherWordId { + Canonical(CanonicalWordId), + CaseFolded(CaseFoldedWordId), +} +impl EitherWordId { + /// Create a canonical Word ID from a character slice. + pub fn from_chars_canonical(chars: impl AsRef<[char]>) -> Self { + Self::Canonical(CanonicalWordId::from_word_chars(chars)) + } + + /// Create a canonical word ID from a string. + /// Requires allocation, so use sparingly. + pub fn from_str_canonical(text: impl AsRef) -> Self { + Self::Canonical(CanonicalWordId::from_word_str(text)) + } + + /// Create a case-folded Word ID from a character slice. + /// + /// This will case-fold and normalize the input before calculating the word ID. + pub fn from_chars_case_folded(chars: impl AsRef<[char]>) -> Self { + Self::CaseFolded(CaseFoldedWordId::from_word_chars(chars)) + } + + /// Create a case-folded word ID from a string. + /// Requires allocation, so use sparingly. + /// + /// This will case-fold and normalize the input before calculating the word ID. + pub fn from_str_case_folded(text: impl AsRef) -> Self { + Self::CaseFolded(CaseFoldedWordId::from_word_str(text)) + } +} diff --git a/harper-core/src/spell/word_map.rs b/harper-core/src/spell/word_map.rs index 8f97ec884..c6b49c202 100644 --- a/harper-core/src/spell/word_map.rs +++ b/harper-core/src/spell/word_map.rs @@ -1,13 +1,22 @@ -use hashbrown::{HashMap, hash_map::IntoValues}; +use hashbrown::{DefaultHashBuilder, HashMap}; +use indexmap::IndexMap; -use crate::{CharString, DictWordMetadata}; - -use super::WordId; +use crate::{ + CharString, DictWordMetadata, + spell::{ + WordIdPair, + word_id::{CanonicalWordId, CaseFoldedWordId}, + }, +}; /// The underlying data structure for the `MutableDictionary`. #[derive(Debug, Clone, Eq, PartialEq, Default)] pub struct WordMap { - inner: HashMap, + /// Underlying container for the entries in the word map. + canonical: IndexMap, + /// A map containing indices into `canonical` for a specific `CaseFoldedWordId`. This is used for + /// case-folded lookups in the word map. + case_folded: HashMap>, } #[derive(Debug, Clone, Eq, PartialEq, Hash)] @@ -17,88 +26,103 @@ pub struct WordMapEntry { } impl WordMap { - /// Get an entry from the word map using raw chars. - pub fn get_with_str(&self, string: &str) -> Option<&WordMapEntry> { - let chars: CharString = string.chars().collect(); - let id = WordId::from_word_chars(chars); - - self.get(&id) - } - - pub fn contains_str(&self, string: &str) -> bool { - self.get_with_str(string).is_some() - } - - pub fn contains_chars(&self, chars: impl AsRef<[char]>) -> bool { - self.get_with_chars(chars).is_some() + pub fn contains_canonical(&self, id: CanonicalWordId) -> bool { + self.get_canonical(id).is_some() } - pub fn contains(&self, id: &WordId) -> bool { - self.get(id).is_some() + pub fn contains_case_folded(&self, id: CaseFoldedWordId) -> bool { + !self.get_canonical_indices_from_case_folded(id).is_empty() } - /// Get an entry from the word map using raw chars. - pub fn get_with_chars(&self, chars: impl AsRef<[char]>) -> Option<&WordMapEntry> { - let id = WordId::from_word_chars(chars); - - self.get(&id) + /// Get an entry from the word map using a word identifier. + pub fn get_canonical(&self, id: CanonicalWordId) -> Option<&WordMapEntry> { + self.canonical.get(&id) } - /// Get an entry from the word map using a word identifier. - pub fn get(&self, id: &WordId) -> Option<&WordMapEntry> { - self.inner.get(id) + pub fn get_case_folded( + &self, + id: CaseFoldedWordId, + ) -> impl ExactSizeIterator { + self.get_canonical_indices_from_case_folded(id) + .iter() + .map(|canonical_index| self.get_by_canonical_index(*canonical_index).unwrap()) } /// Borrow a word's metadata mutably - pub fn get_metadata_mut_chars( + pub fn get_metadata_mut_canonical( &mut self, - chars: impl AsRef<[char]>, + id: CanonicalWordId, ) -> Option<&mut DictWordMetadata> { - let id = WordId::from_word_chars(chars); - - self.get_metadata_mut(&id) - } - - /// Borrow a word's metadata mutably - pub fn get_metadata_mut(&mut self, id: &WordId) -> Option<&mut DictWordMetadata> { - self.inner.get_mut(id).map(|v| &mut v.metadata) + self.canonical.get_mut(&id).map(|v| &mut v.metadata) } pub fn insert(&mut self, entry: WordMapEntry) { - let id = WordId::from_word_chars(&entry.canonical_spelling); - - self.inner.insert(id, entry); + let word_ids = WordIdPair::from_word_chars(&entry.canonical_spelling); + + if let Some(existing_entry) = self.canonical.get_mut(&word_ids.canonical()) { + // An existing word with the same canonical ID exists; update its entry. + existing_entry.metadata = existing_entry.metadata.or(&entry.metadata); + } else { + // An existing word with the same canonical ID does NOT exist; insert it. + let (canonical_idx, _) = self.canonical.insert_full(word_ids.canonical(), entry); + let case_folded_id = word_ids.case_folded(); + if let Some(existing_case_folded_entry) = self.case_folded.get_mut(&case_folded_id) { + // `case_folded` already has a canonical ID list for this word; append to it, if + // the same entry does not already exist. + if !existing_case_folded_entry.contains(&canonical_idx) { + existing_case_folded_entry.push(canonical_idx); + } + } else { + // `case_folded` does NOT have a canonical ID list for this word; initialize one. + self.case_folded.insert(case_folded_id, vec![canonical_idx]); + } + } } /// Reserves capacity for at least `additional` more elements to be inserted /// in the `WordMap`. The collection may reserve more space to avoid /// frequent reallocations. pub fn reserve(&mut self, additional: usize) { - self.inner.reserve(additional); + self.canonical.reserve(additional); } /// Iterate through the canonical spellings of the words in the map. pub fn iter(&self) -> impl Iterator { - self.inner.values() + self.canonical.values() } pub fn len(&self) -> usize { - self.inner.len() + self.canonical.len() } pub fn with_capacity(capacity: usize) -> Self { Self { - inner: HashMap::with_capacity(capacity), + canonical: IndexMap::with_capacity_and_hasher(capacity, DefaultHashBuilder::default()), + case_folded: HashMap::new(), } } + + /// Get a [`WordMapEntry`] by its canonical ID. + fn get_by_canonical_index(&self, index: usize) -> Option<&WordMapEntry> { + self.canonical + .get_index(index) + .map(|(_, word_map_entry)| word_map_entry) + } + + /// Get indices into [`Self::canonical`] using the provided [`CaseFoldedWordId`]. + fn get_canonical_indices_from_case_folded(&self, id: CaseFoldedWordId) -> &[usize] { + self.case_folded + .get(&id) + .map_or(&[], |canonical_indices| canonical_indices) + } } impl IntoIterator for WordMap { type Item = WordMapEntry; fn into_iter(self) -> Self::IntoIter { - self.inner.into_values() + self.canonical.into_values() } - type IntoIter = IntoValues; + type IntoIter = indexmap::map::IntoValues; } diff --git a/harper-core/src/thesaurus_helper.rs b/harper-core/src/thesaurus_helper.rs index 73dfdec49..f435b3694 100644 --- a/harper-core/src/thesaurus_helper.rs +++ b/harper-core/src/thesaurus_helper.rs @@ -34,7 +34,7 @@ pub fn get_synonyms_sorted(_word: &str, _token: &TokenKind) -> Option Date: Thu, 29 Jan 2026 16:24:31 -0600 Subject: [PATCH 07/18] chore: update snapshots --- .../Alice's Adventures in Wonderland.snap.yml | 131 +------ .../text/linters/Computer science.snap.yml | 27 +- .../text/linters/Difficult sentences.snap.yml | 9 - .../linters/Part-of-speech tagging.snap.yml | 30 +- .../tests/text/linters/Spell.US.snap.yml | 2 +- harper-core/tests/text/linters/Spell.snap.yml | 11 - ...Constitution of the United States.snap.yml | 19 +- .../text/linters/The Great Gatsby.snap.yml | 347 +++--------------- .../Alice's Adventures in Wonderland.md | 4 +- .../tests/text/tagged/The Great Gatsby.md | 6 +- 10 files changed, 75 insertions(+), 511 deletions(-) diff --git a/harper-core/tests/text/linters/Alice's Adventures in Wonderland.snap.yml b/harper-core/tests/text/linters/Alice's Adventures in Wonderland.snap.yml index 35dc1481a..ec2f89faa 100644 --- a/harper-core/tests/text/linters/Alice's Adventures in Wonderland.snap.yml +++ b/harper-core/tests/text/linters/Alice's Adventures in Wonderland.snap.yml @@ -681,17 +681,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 320 | she began again: “Où est ma chatte?” which was the first sentence in her French - | ^~~ Did you mean to spell `est` this way? -Suggest: - - Replace with: “east” - - Replace with: “eat” - - Replace with: “esp” - - - Lint: Spelling (63 priority) Message: | 320 | she began again: “Où est ma chatte?” which was the first sentence in her French @@ -1231,16 +1220,6 @@ Message: | -Lint: Capitalization (31 priority) -Message: | - 692 | below!” (a loud crash)—“Now, who did that?—It was Bill, I fancy—Who’s to go down - | ^~~~~ The canonical dictionary spelling is `who's`. - 693 | the chimney?—Nay, I shan’t! You do it!—That I won’t, then!—Bill’s to go -Suggest: - - Replace with: “who's” - - - Lint: Capitalization (31 priority) Message: | 694 | down—Here, Bill! the master says you’re to go down the chimney!” @@ -1658,15 +1637,6 @@ Suggest: -Lint: Capitalization (127 priority) -Message: | - 1059 | ## CHAPTER VI: Pig and Pepper - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings. -Suggest: - - Replace with: “## CHAPTER Vi: Pig and Pepper” - - - Lint: Readability (127 priority) Message: | 1061 | For a minute or two she stood looking at the house, and wondering what to do @@ -2000,15 +1970,6 @@ Suggest: -Lint: Capitalization (31 priority) -Message: | - 1582 | “Who’s making personal remarks now?” the Hatter asked triumphantly. - | ^~~~~ The canonical dictionary spelling is `who's`. -Suggest: - - Replace with: “who's” - - - Lint: Readability (127 priority) Message: | 1637 | The Dormouse had closed its eyes by this time, and was going off into a doze; @@ -2586,7 +2547,7 @@ Message: | Suggest: - Replace with: “Hacker” - Replace with: “Hickory” - - Replace with: “Hackers” + - Replace with: “Hickory” @@ -3219,17 +3180,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 2477 | eagerly that the Gryphon said, in a rather offended tone, “Hm! No accounting for - | ^~ Did you mean to spell `Hm` this way? -Suggest: - - Replace with: “Ha” - - Replace with: “Ham” - - Replace with: “He” - - - Lint: Formatting (255 priority) Message: | 2483 | > “Beautiful Soup, so rich and green, Waiting in a hot tureen! Who for such @@ -3268,17 +3218,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 2485 | > evening, beautiful Soup! Beau—ootiful Soo—oop! Beau—ootiful Soo—oop! Soo—oop - | ^~~ Did you mean to spell `oop` this way? -Suggest: - - Replace with: “oops” - - Replace with: “op” - - Replace with: “opp” - - - Lint: Spelling (63 priority) Message: | 2485 | > evening, beautiful Soup! Beau—ootiful Soo—oop! Beau—ootiful Soo—oop! Soo—oop @@ -3310,17 +3249,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 2485 | > evening, beautiful Soup! Beau—ootiful Soo—oop! Beau—ootiful Soo—oop! Soo—oop - | ^~~ Did you mean to spell `oop` this way? -Suggest: - - Replace with: “oops” - - Replace with: “op” - - Replace with: “opp” - - - Lint: Spelling (63 priority) Message: | 2485 | > evening, beautiful Soup! Beau—ootiful Soo—oop! Beau—ootiful Soo—oop! Soo—oop @@ -3343,18 +3271,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 2485 | > evening, beautiful Soup! Beau—ootiful Soo—oop! Beau—ootiful Soo—oop! Soo—oop - | ^~~ Did you mean to spell `oop` this way? - 2486 | > of the e—e—evening, Beautiful, beautiful Soup! -Suggest: - - Replace with: “oops” - - Replace with: “op” - - Replace with: “opp” - - - Lint: WordChoice (63 priority) Message: | 2488 | > “Beautiful Soup! Who cares for fish, Game, or any other dish? Who would not @@ -3406,17 +3322,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 2490 | > beautiful Soup? Beau—ootiful Soo—oop! Beau—ootiful Soo—oop! Soo—oop of the - | ^~~ Did you mean to spell `oop` this way? -Suggest: - - Replace with: “oops” - - Replace with: “op” - - Replace with: “opp” - - - Lint: Spelling (63 priority) Message: | 2490 | > beautiful Soup? Beau—ootiful Soo—oop! Beau—ootiful Soo—oop! Soo—oop of the @@ -3448,17 +3353,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 2490 | > beautiful Soup? Beau—ootiful Soo—oop! Beau—ootiful Soo—oop! Soo—oop of the - | ^~~ Did you mean to spell `oop` this way? -Suggest: - - Replace with: “oops” - - Replace with: “op” - - Replace with: “opp” - - - Lint: Spelling (63 priority) Message: | 2490 | > beautiful Soup? Beau—ootiful Soo—oop! Beau—ootiful Soo—oop! Soo—oop of the @@ -3481,18 +3375,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 2490 | > beautiful Soup? Beau—ootiful Soo—oop! Beau—ootiful Soo—oop! Soo—oop of the - | ^~~ Did you mean to spell `oop` this way? - 2491 | > e—e—evening, Beautiful, beauti—FUL SOUP!” -Suggest: - - Replace with: “oops” - - Replace with: “op” - - Replace with: “opp” - - - Lint: Spelling (63 priority) Message: | 2490 | > beautiful Soup? Beau—ootiful Soo—oop! Beau—ootiful Soo—oop! Soo—oop of the @@ -3575,17 +3457,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 2503 | > “Soo—oop of the e—e—evening, Beautiful, beautiful Soup!” - | ^~~ Did you mean to spell `oop` this way? -Suggest: - - Replace with: “oops” - - Replace with: “op” - - Replace with: “opp” - - - Lint: Readability (127 priority) Message: | 2507 | The King and Queen of Hearts were seated on their throne when they arrived, with diff --git a/harper-core/tests/text/linters/Computer science.snap.yml b/harper-core/tests/text/linters/Computer science.snap.yml index ab735bb63..e96e96404 100644 --- a/harper-core/tests/text/linters/Computer science.snap.yml +++ b/harper-core/tests/text/linters/Computer science.snap.yml @@ -47,7 +47,7 @@ Message: | Suggest: - Replace with: “Reckoned” - Replace with: “Recover” - - Replace with: “Rickover” + - Replace with: “Reasoner” @@ -567,9 +567,9 @@ Message: | 156 | von Neumann, Rózsa Péter and Alonzo Church and there continues to be a useful | ^~~~~ Did you mean to spell `Péter` this way? Suggest: + - Replace with: “Peter” - Replace with: “Peter” - Replace with: “Pother” - - Replace with: “Paper” @@ -608,7 +608,7 @@ Message: | Suggest: - Replace with: “Paras” - Replace with: “Parkas” - - Replace with: “Parana's” + - Replace with: “Pan's” @@ -889,17 +889,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 262 | The famous P = NP? problem, one of the Millennium Prize Problems, is an open - | ^~ Did you mean to spell `NP` this way? -Suggest: - - Replace with: “Nap” - - Replace with: “Nip” - - Replace with: “No” - - - Lint: Capitalization (31 priority) Message: | 262 | The famous P = NP? problem, one of the Millennium Prize Problems, is an open @@ -1061,8 +1050,8 @@ Message: | | ^~~ Did you mean to spell `HCI` this way? Suggest: - Replace with: “Hi” + - Replace with: “HI” - Replace with: “Chi” - - Replace with: “Sci” @@ -1073,8 +1062,8 @@ Message: | 349 | that focus on the relationship between emotions, social behavior and brain Suggest: - Replace with: “Hi” + - Replace with: “HI” - Replace with: “Chi” - - Replace with: “Sci” @@ -1130,7 +1119,7 @@ Message: | Suggest: - Replace with: “RI” - Replace with: “Ra” - - Replace with: “Ru” + - Replace with: “Re” @@ -1141,7 +1130,7 @@ Message: | Suggest: - Replace with: “Pa” - Replace with: “Pi” - - Replace with: “PE” + - Replace with: “PA” @@ -1331,9 +1320,9 @@ Message: | 469 | goto (which means it is more elementary than structured programming). | ^~~~ Did you mean to spell `goto` this way? Suggest: + - Replace with: “goth” - Replace with: “goo” - Replace with: “got” - - Replace with: “goths” diff --git a/harper-core/tests/text/linters/Difficult sentences.snap.yml b/harper-core/tests/text/linters/Difficult sentences.snap.yml index 7b8503edc..834842080 100644 --- a/harper-core/tests/text/linters/Difficult sentences.snap.yml +++ b/harper-core/tests/text/linters/Difficult sentences.snap.yml @@ -66,15 +66,6 @@ Suggest: -Lint: Capitalization (31 priority) -Message: | - 126 | Who's for ice-cream? - | ^~~~~ The canonical dictionary spelling is `who's`. -Suggest: - - Replace with: “who's” - - - Lint: Capitalization (31 priority) Message: | 160 | to account for one's whereabouts. diff --git a/harper-core/tests/text/linters/Part-of-speech tagging.snap.yml b/harper-core/tests/text/linters/Part-of-speech tagging.snap.yml index 0d558a225..369c12593 100644 --- a/harper-core/tests/text/linters/Part-of-speech tagging.snap.yml +++ b/harper-core/tests/text/linters/Part-of-speech tagging.snap.yml @@ -30,18 +30,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 8 | In corpus linguistics, part-of-speech tagging (POS tagging or PoS tagging or - | ^~~ Did you mean to spell `PoS` this way? - 9 | POST), also called grammatical tagging is the process of marking up a word in a -Suggest: - - Replace with: “Pod” - - Replace with: “Poi” - - Replace with: “Pol” - - - Lint: Spelling (63 priority) Message: | 18 | two distinctive groups: rule-based and stochastic. E. Brill's tagger, one of the @@ -59,9 +47,9 @@ Message: | | ^~~~~~~ Did you mean to spell `Brill's` this way? 19 | first and most widely used English POS-taggers, employs rule-based algorithms. Suggest: + - Replace with: “Bill's” - Replace with: “Brillo's” - Replace with: “Bill's” - - Replace with: “Drill's” @@ -131,18 +119,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 49 | tags. For example, NN for singular common nouns, NNS for plural common nouns, NP - | ^~ Did you mean to spell `NP` this way? - 50 | for singular proper nouns (see the POS tags used in the Brown Corpus). Other -Suggest: - - Replace with: “Nap” - - Replace with: “Nip” - - Replace with: “No” - - - Lint: Spelling (63 priority) Message: | 55 | 150 separate parts of speech for English. Work on stochastic methods for tagging @@ -611,9 +587,9 @@ Message: | | ^~~~~ Did you mean to spell `Welch` this way? 202 | known as the forward-backward algorithm). Hidden Markov model and visible Markov Suggest: + - Replace with: “Welsh” - Replace with: “Welsh” - Replace with: “Wench” - - Replace with: “Watch” @@ -635,7 +611,7 @@ Message: | Suggest: - Replace with: “Sim” - Replace with: “Sum” - - Replace with: “SCM” + - Replace with: “SAM” diff --git a/harper-core/tests/text/linters/Spell.US.snap.yml b/harper-core/tests/text/linters/Spell.US.snap.yml index 2f2d3e2e5..ea8d24192 100644 --- a/harper-core/tests/text/linters/Spell.US.snap.yml +++ b/harper-core/tests/text/linters/Spell.US.snap.yml @@ -69,9 +69,9 @@ Message: | 15 | - Grey. | ^~~~ Did you mean to spell `Grey` this way? Suggest: + - Replace with: “Gray” - Replace with: “Gray” - Replace with: “Grew” - - Replace with: “Gorey” diff --git a/harper-core/tests/text/linters/Spell.snap.yml b/harper-core/tests/text/linters/Spell.snap.yml index 7b257f50b..984bdc0d8 100644 --- a/harper-core/tests/text/linters/Spell.snap.yml +++ b/harper-core/tests/text/linters/Spell.snap.yml @@ -17,17 +17,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 7 | My favourite color is blu. - | ^~~ Did you mean to spell `blu` this way? -Suggest: - - Replace with: “bl” - - Replace with: “blue” - - Replace with: “blur” - - - Lint: Spelling (63 priority) Message: | 8 | I must defend my honour! diff --git a/harper-core/tests/text/linters/The Constitution of the United States.snap.yml b/harper-core/tests/text/linters/The Constitution of the United States.snap.yml index 168a3e042..67a6e6fa9 100644 --- a/harper-core/tests/text/linters/The Constitution of the United States.snap.yml +++ b/harper-core/tests/text/linters/The Constitution of the United States.snap.yml @@ -60,9 +60,9 @@ Message: | 11 | ## Article. I. | ^~ Did you mean to spell `I.` this way? Suggest: + - Replace with: “I” - Replace with: “I” - Replace with: “Id” - - Replace with: “If” @@ -1658,9 +1658,9 @@ Message: | 628 | or Labour in one State, under the Laws thereof, escaping into another, shall, | ^~~~~~ Did you mean to spell `Labour` this way? Suggest: + - Replace with: “Labor” - Replace with: “Labor” - Replace with: “Laborer” - - Replace with: “Layout” @@ -1670,9 +1670,9 @@ Message: | 630 | Service or Labour, but shall be delivered up on Claim of the Party to whom such | ^~~~~~ Did you mean to spell `Labour` this way? Suggest: + - Replace with: “Labor” - Replace with: “Labor” - Replace with: “Laborer” - - Replace with: “Layout” @@ -1682,9 +1682,9 @@ Message: | 631 | Service or Labour may be due. | ^~~~~~ Did you mean to spell `Labour` this way? Suggest: + - Replace with: “Labor” - Replace with: “Labor” - Replace with: “Laborer” - - Replace with: “Layout” @@ -1770,7 +1770,7 @@ Message: | Suggest: - Replace with: “Vi” - Replace with: “VA” - - Replace with: “Vb” + - Replace with: “VI” @@ -1813,15 +1813,6 @@ Suggest: -Lint: Capitalization (127 priority) -Message: | - 677 | ## Article. VI. - | ^~~~~~~~~~~~~~~ Try to use title case in headings. -Suggest: - - Replace with: “## Article. Vi.” - - - Lint: Readability (127 priority) Message: | 683 | This Constitution, and the Laws of the United States which shall be made in diff --git a/harper-core/tests/text/linters/The Great Gatsby.snap.yml b/harper-core/tests/text/linters/The Great Gatsby.snap.yml index 9f950d3e2..7b47a65ad 100644 --- a/harper-core/tests/text/linters/The Great Gatsby.snap.yml +++ b/harper-core/tests/text/linters/The Great Gatsby.snap.yml @@ -9,26 +9,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 3 | BY F. SCOTT FITZGERALD - | ^~~~~ Did you mean to spell `SCOTT` this way? -Suggest: - - Replace with: “Scout” - - Replace with: “Scott” - - Replace with: “Scoot” - - - -Lint: Spelling (63 priority) -Message: | - 3 | BY F. SCOTT FITZGERALD - | ^~~~~~~~~~ Did you mean `Fitzgerald`? -Suggest: - - Replace with: “Fitzgerald” - - - Lint: Spelling (63 priority) Message: | 10 | “Whenever you feel like criticising any one,” he told me, “just remember that @@ -318,7 +298,7 @@ Message: | Suggest: - Replace with: “Vile” - Replace with: “Villa” - - Replace with: “Villi” + - Replace with: “Villa” @@ -978,7 +958,7 @@ Message: | Suggest: - Replace with: “Do” - Replace with: “DA” - - Replace with: “Di” + - Replace with: “DE” @@ -994,10 +974,11 @@ Suggest: Lint: Spelling (63 priority) Message: | 825 | “No, you don’t,” interposed Tom quickly. “Myrtle’ll be hurt if you don’t come up - | ^~~~~~~~~ Did you mean `Myrtle's`? + | ^~~~~~~~~ Did you mean to spell `Myrtle’ll` this way? 826 | to the apartment. Won’t you, Myrtle?” Suggest: - Replace with: “Myrtle's” + - Replace with: “Myrtle's” @@ -2008,9 +1989,10 @@ Suggest: Lint: Spelling (63 priority) Message: | 1710 | “Wonder’ff tell me where there’s a gas’line station?” - | ^~~~~~~~~ Did you mean `Wonder's`? + | ^~~~~~~~~ Did you mean to spell `Wonder’ff` this way? Suggest: - Replace with: “Wonder's” + - Replace with: “Wonder's” @@ -2316,7 +2298,7 @@ Message: | Suggest: - Replace with: “RI” - Replace with: “Ra” - - Replace with: “Ru” + - Replace with: “Re” @@ -2327,7 +2309,7 @@ Message: | Suggest: - Replace with: “Pa” - Replace with: “Pi” - - Replace with: “PE” + - Replace with: “PA” @@ -2367,9 +2349,9 @@ Message: | 1861 | the gravel drive that Mrs. Ulysses Swett’s automobile ran over his right hand. | ^~~~~~~ Did you mean to spell `Swett’s` this way? Suggest: + - Replace with: “Sweet's” - Replace with: “Sweat's” - Replace with: “Sweet's” - - Replace with: “Seth's” @@ -2391,7 +2373,7 @@ Message: | Suggest: - Replace with: “So” - Replace with: “SA” - - Replace with: “Se” + - Replace with: “SE” @@ -2495,7 +2477,7 @@ Message: | Suggest: - Replace with: “So” - Replace with: “SA” - - Replace with: “Se” + - Replace with: “SE” @@ -2541,8 +2523,8 @@ Message: | | ^~ Did you mean to spell `G.` this way? Suggest: - Replace with: “Go” - - Replace with: “GI” - - Replace with: “GU” + - Replace with: “GA” + - Replace with: “GE” @@ -2575,17 +2557,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 1871 | afterward strangled his wife. Da Fontano the promoter came there, and Ed Legros - | ^~ Did you mean to spell `Da` this way? -Suggest: - - Replace with: “Dab” - - Replace with: “Dad” - - Replace with: “Dag” - - - Lint: Readability (127 priority) Message: | 1871 | afterward strangled his wife. Da Fontano the promoter came there, and Ed Legros @@ -2618,7 +2589,7 @@ Message: | Suggest: - Replace with: “Lear's” - Replace with: “Leger's” - - Replace with: “Leer's” + - Replace with: “Lego's” @@ -2642,7 +2613,7 @@ Message: | Suggest: - Replace with: “Jon's” - Replace with: “Jonas” - - Replace with: “Jogs” + - Replace with: “Jones” @@ -2782,7 +2753,7 @@ Message: | Suggest: - Replace with: “So” - Replace with: “SA” - - Replace with: “Se” + - Replace with: “SE” @@ -2838,9 +2809,9 @@ Message: | 1882 | and Henry L. Palmetto, who killed himself by jumping in front of a subway train | ^~ Did you mean to spell `L.` this way? Suggest: + - Replace with: “LA” + - Replace with: “La” - Replace with: “Le” - - Replace with: “Li” - - Replace with: “Lu” @@ -2966,7 +2937,7 @@ Message: | Suggest: - Replace with: “Pa” - Replace with: “Pi” - - Replace with: “PE” + - Replace with: “PA” @@ -2975,9 +2946,9 @@ Message: | 1896 | Fitz-Peters and Mr. P. Jewett, once head of the American Legion, and Miss | ^~~~~~ Did you mean to spell `Jewett` this way? Suggest: + - Replace with: “Jewel” - Replace with: “Jewel” - Replace with: “Jewell” - - Replace with: “Jewess” @@ -3014,8 +2985,8 @@ Message: | 1982 | de Boulogne. Suggest: - Replace with: “Boss” + - Replace with: “Boas” - Replace with: “Boris” - - Replace with: “Bios” @@ -3025,6 +2996,7 @@ Message: | 1982 | de Boulogne. | ^~~~~~~~ Did you mean to spell `Boulogne` this way? Suggest: + - Replace with: “Bologna” - Replace with: “Bologna” - Replace with: “Cologne” @@ -3062,18 +3034,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 2006 | To my astonishment, the thing had an authentic look. “Orderi di Danilo,” ran the - | ^~ Did you mean to spell `di` this way? - 2007 | circular legend, “Montenegro, Nicolas Rex.” -Suggest: - - Replace with: “db” - - Replace with: “dc” - - Replace with: “dd” - - - Lint: Spelling (63 priority) Message: | 2006 | To my astonishment, the thing had an authentic look. “Orderi di Danilo,” ran the @@ -3184,18 +3144,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 2078 | driven by a white chauffeur, in which sat three modish negroes, two bucks and a - | ^~~~~~~ Did you mean to spell `negroes` this way? - 2079 | girl. I laughed aloud as the yolks of their eyeballs rolled toward us in haughty -Suggest: - - Replace with: “Negroes” - - Replace with: “Negro's” - - Replace with: “Negros” - - - Lint: Spelling (63 priority) Message: | 2091 | “Mr. Carraway, this is my friend Mr. Wolfshiem.” @@ -3244,17 +3192,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 2105 | “I handed the money to Katspaugh and I sid: ‘All right, Katspaugh, don’t pay him - | ^~~ Did you mean to spell `sid` this way? -Suggest: - - Replace with: “sad” - - Replace with: “said” - - Replace with: “sic” - - - Lint: Spelling (63 priority) Message: | 2105 | “I handed the money to Katspaugh and I sid: ‘All right, Katspaugh, don’t pay him @@ -3452,27 +3389,30 @@ Suggest: Lint: Spelling (63 priority) Message: | 2188 | “He’s an Oggsford man.” - | ^~~~~~~~ Did you mean `Oxford`? + | ^~~~~~~~ Did you mean to spell `Oggsford` this way? Suggest: - Replace with: “Oxford” + - Replace with: “Oxford” Lint: Spelling (63 priority) Message: | 2192 | “He went to Oggsford College in England. You know Oggsford College?” - | ^~~~~~~~ Did you mean `Oxford`? + | ^~~~~~~~ Did you mean to spell `Oggsford` this way? Suggest: - Replace with: “Oxford” + - Replace with: “Oxford” Lint: Spelling (63 priority) Message: | 2192 | “He went to Oggsford College in England. You know Oggsford College?” - | ^~~~~~~~ Did you mean `Oxford`? + | ^~~~~~~~ Did you mean to spell `Oggsford` this way? Suggest: - Replace with: “Oxford” + - Replace with: “Oxford” @@ -3753,18 +3693,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 2395 | When Jordan Baker had finished telling all this we had left the Plaza for half - 2396 | an hour and were driving in a victoria through Central Park. The sun had gone - | ^~~~~~~~ Did you mean to spell `victoria` this way? -Suggest: - - Replace with: “Victoria” - - Replace with: “victor” - - Replace with: “victor's” - - - Lint: Spelling (63 priority) Message: | 2401 | > “I’m the Sheik of Araby. Your love belongs to me. At night when you’re asleep @@ -4231,15 +4159,6 @@ Message: | -Lint: Capitalization (31 priority) -Message: | - 2905 | “Who’s this?” - | ^~~~~ The canonical dictionary spelling is `who's`. -Suggest: - - Replace with: “who's” - - - Lint: Spelling (63 priority) Message: | 2940 | “I know what we'll do,” said Gatsby, “we'll have Klipspringer play the piano.” @@ -4318,15 +4237,6 @@ Suggest: -Lint: Capitalization (127 priority) -Message: | - 3003 | ## CHAPTER VI - | ^~~~~~~~~~~~~ Try to use title case in headings. -Suggest: - - Replace with: “## CHAPTER Vi” - - - Lint: Readability (127 priority) Message: | 3020 | short of being news. Contemporary legends such as the “underground pipe-line to @@ -5427,7 +5337,7 @@ Message: | Suggest: - Replace with: “Biko's” - Replace with: “Bilbo's” - - Replace with: “Biro's” + - Replace with: “Bill's” @@ -5438,7 +5348,7 @@ Message: | Suggest: - Replace with: “Biko's” - Replace with: “Bilbo's” - - Replace with: “Biro's” + - Replace with: “Bill's” @@ -5449,7 +5359,7 @@ Message: | Suggest: - Replace with: “Biko's” - Replace with: “Bilbo's” - - Replace with: “Biro's” + - Replace with: “Bill's” @@ -5461,7 +5371,7 @@ Message: | Suggest: - Replace with: “Biko's” - Replace with: “Bilbo's” - - Replace with: “Biro's” + - Replace with: “Bill's” @@ -5479,7 +5389,7 @@ Message: | Suggest: - Replace with: “Biko's” - Replace with: “Bilbo's” - - Replace with: “Biro's” + - Replace with: “Bill's” @@ -5501,7 +5411,7 @@ Message: | Suggest: - Replace with: “Biko's” - Replace with: “Bilbo's” - - Replace with: “Biro's” + - Replace with: “Bill's” @@ -5519,7 +5429,7 @@ Message: | Suggest: - Replace with: “Biko's” - Replace with: “Bilbo's” - - Replace with: “Biro's” + - Replace with: “Bill's” @@ -5542,7 +5452,7 @@ Message: | Suggest: - Replace with: “Biko's” - Replace with: “Bilbo's” - - Replace with: “Biro's” + - Replace with: “Bill's” @@ -5553,7 +5463,7 @@ Message: | Suggest: - Replace with: “Biko's” - Replace with: “Bilbo's” - - Replace with: “Biro's” + - Replace with: “Bill's” @@ -5916,50 +5826,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 4479 | “Oh, my Ga-od! Oh, my Ga-od! Oh, Ga-od! Oh, my Ga-od!” - | ^~ Did you mean to spell `od` this way? -Suggest: - - Replace with: “odd” - - Replace with: “ode” - - Replace with: “of” - - - -Lint: Spelling (63 priority) -Message: | - 4479 | “Oh, my Ga-od! Oh, my Ga-od! Oh, Ga-od! Oh, my Ga-od!” - | ^~ Did you mean to spell `od` this way? -Suggest: - - Replace with: “odd” - - Replace with: “ode” - - Replace with: “of” - - - -Lint: Spelling (63 priority) -Message: | - 4479 | “Oh, my Ga-od! Oh, my Ga-od! Oh, Ga-od! Oh, my Ga-od!” - | ^~ Did you mean to spell `od` this way? -Suggest: - - Replace with: “odd” - - Replace with: “ode” - - Replace with: “of” - - - -Lint: Spelling (63 priority) -Message: | - 4479 | “Oh, my Ga-od! Oh, my Ga-od! Oh, Ga-od! Oh, my Ga-od!” - | ^~ Did you mean to spell `od` this way? -Suggest: - - Replace with: “odd” - - Replace with: “ode” - - Replace with: “of” - - - Lint: Capitalization (127 priority) Message: | 4484 | “M-a-v—” the policeman was saying, “—o———” @@ -5969,17 +5835,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 4484 | “M-a-v—” the policeman was saying, “—o———” - | ^ Did you mean to spell `o` this way? -Suggest: - - Replace with: “of” - - Replace with: “oh” - - Replace with: “oi” - - - Lint: Capitalization (127 priority) Message: | 4486 | “No, r—” corrected the man, “M-a-v-r-o———” @@ -5989,39 +5844,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 4486 | “No, r—” corrected the man, “M-a-v-r-o———” - | ^ Did you mean to spell `r` this way? -Suggest: - - Replace with: “re” - - Replace with: “a” - - Replace with: “e” - - - -Lint: Spelling (63 priority) -Message: | - 4486 | “No, r—” corrected the man, “M-a-v-r-o———” - | ^ Did you mean to spell `r` this way? -Suggest: - - Replace with: “re” - - Replace with: “a” - - Replace with: “e” - - - -Lint: Spelling (63 priority) -Message: | - 4486 | “No, r—” corrected the man, “M-a-v-r-o———” - | ^ Did you mean to spell `o` this way? -Suggest: - - Replace with: “of” - - Replace with: “oh” - - Replace with: “oi” - - - Lint: Capitalization (127 priority) Message: | 4490 | “r—” said the policeman, “o———” @@ -6031,17 +5853,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 4490 | “r—” said the policeman, “o———” - | ^ Did you mean to spell `r` this way? -Suggest: - - Replace with: “re” - - Replace with: “a” - - Replace with: “e” - - - Lint: Capitalization (127 priority) Message: | 4490 | “r—” said the policeman, “o———” @@ -6051,17 +5862,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 4490 | “r—” said the policeman, “o———” - | ^ Did you mean to spell `o` this way? -Suggest: - - Replace with: “of” - - Replace with: “oh” - - Replace with: “oi” - - - Lint: Spelling (63 priority) Message: | 4499 | “Auto hit her. Ins’antly killed.” @@ -6081,17 +5881,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 4503 | “She ran out ina road. Son-of-a-bitch didn’t even stopus car.” - | ^~~ Did you mean to spell `ina` this way? -Suggest: - - Replace with: “in” - - Replace with: “inc” - - Replace with: “ind” - - - Lint: Spelling (63 priority) Message: | 4503 | “She ran out ina road. Son-of-a-bitch didn’t even stopus car.” @@ -6514,9 +6303,10 @@ Suggest: Lint: Spelling (63 priority) Message: | 4943 | “I suppose Daisy’ll call too.” He looked at me anxiously, as if he hoped I’d - | ^~~~~~~~ Did you mean `Daisy's`? + | ^~~~~~~~ Did you mean to spell `Daisy’ll` this way? Suggest: - Replace with: “Daisy's” + - Replace with: “Daisy's” @@ -6741,18 +6531,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 5121 | Wilson shook his head. His eyes narrowed and his mouth widened slightly with the - 5122 | ghost of a superior “Hm!” - | ^~ Did you mean to spell `Hm` this way? -Suggest: - - Replace with: “Ha” - - Replace with: “Ham” - - Replace with: “He” - - - Lint: Spelling (63 priority) Message: | 5128 | Michaelis had seen this too, but it hadn’t occurred to him that there was any @@ -6896,9 +6674,9 @@ Message: | 5172 | Roosevelt and then to Gad’s Hill, where he bought a sandwich that he didn’t eat, | ^~~~~ Did you mean to spell `Gad’s` this way? Suggest: - - Replace with: “Gab's” - - Replace with: “Gag's” - - Replace with: “Gal's” + - Replace with: “Gap's” + - Replace with: “Gay's” + - Replace with: “God's” @@ -6908,9 +6686,9 @@ Message: | 5174 | reach Gad’s Hill until noon. Thus far there was no difficulty in accounting for | ^~~~~ Did you mean to spell `Gad’s` this way? Suggest: - - Replace with: “Gab's” - - Replace with: “Gag's” - - Replace with: “Gal's” + - Replace with: “Gap's” + - Replace with: “Gay's” + - Replace with: “God's” @@ -7186,9 +6964,9 @@ Message: | 5341 | “Young Parke’s in trouble,” he said rapidly. “They picked him up when he handed | ^~~~~~~ Did you mean to spell `Parke’s` this way? Suggest: + - Replace with: “Park's” - Replace with: “Parker's” - Replace with: “Parks's” - - Replace with: “Park's” @@ -7228,7 +7006,7 @@ Message: | Suggest: - Replace with: “Co” - Replace with: “Cu” - - Replace with: “CI” + - Replace with: “CA” @@ -7473,10 +7251,11 @@ Suggest: Lint: Spelling (63 priority) Message: | 5507 | First time I saw him was when he come into Winebrenner’s poolroom at Forty-third - | ^~~~~~~~~~~~~ Did you mean `Windbreaker's`? + | ^~~~~~~~~~~~~ Did you mean to spell `Winebrenner’s` this way? 5508 | Street and asked for a job. He hadn’t eat anything for a couple of days. ‘Come Suggest: - Replace with: “Windbreaker's” + - Replace with: “Windbreaker's” @@ -7490,25 +7269,14 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 5508 | Street and asked for a job. He hadn’t eat anything for a couple of days. ‘Come - 5509 | on have some lunch with me,’ I sid. He ate more than four dollars’ worth of food - | ^~~ Did you mean to spell `sid` this way? -Suggest: - - Replace with: “sad” - - Replace with: “said” - - Replace with: “sic” - - - Lint: Spelling (63 priority) Message: | 5519 | was a fine-appearing, gentlemanly young man, and when he told me he was an 5520 | Oggsford I knew I could use him good. I got him to join up in the American - | ^~~~~~~~ Did you mean `Oxford`? + | ^~~~~~~~ Did you mean to spell `Oggsford` this way? Suggest: - Replace with: “Oxford” + - Replace with: “Oxford” @@ -7604,8 +7372,8 @@ Message: | | ^~~~~~~~ Did you mean to spell `Hopalong` this way? Suggest: - Replace with: “Hopping” + - Replace with: “Halon” - Replace with: “Haling” - - Replace with: “Haloing” @@ -7652,17 +7420,6 @@ Suggest: -Lint: Spelling (63 priority) -Message: | - 5613 | great for that. He told me I et like a hog once, and I beat him for it.” - | ^~ Did you mean to spell `et` this way? -Suggest: - - Replace with: “e” - - Replace with: “ea” - - Replace with: “eat” - - - Lint: Spelling (63 priority) Message: | 5629 | then Mr. Gatz and the minister and I in the limousine, and a little later four diff --git a/harper-core/tests/text/tagged/Alice's Adventures in Wonderland.md b/harper-core/tests/text/tagged/Alice's Adventures in Wonderland.md index b9b3a1e1a..d02cc2ebd 100644 --- a/harper-core/tests/text/tagged/Alice's Adventures in Wonderland.md +++ b/harper-core/tests/text/tagged/Alice's Adventures in Wonderland.md @@ -1161,7 +1161,7 @@ > RABBIT , ” engraved upon it . She went in without knocking , and hurried upstairs , # NSg/VB+ . . VP/J P NPr/ISg+ . ISg+ NSg/VPt NPr/J/R/P C/P Nᴹ/Vg/J . VB/C VP/J NSg/J . > in great fear lest she should meet the real Mary Ann , and be turned out of the -# NPr/J/R/P NSg/J+ N🅪Sg/VB+ JS ISg+ VXB NSg/VB/J D+ NSg/J+ NPr+ NPr/J+ . VB/C NSg/VXB VP/J NSg/VB/J/R/P P D+ +# NPr/J/R/P NSg/J+ N🅪Sg/VB+ W? ISg+ VXB NSg/VB/J D+ NSg/J+ NPr+ NPr/J+ . VB/C NSg/VXB VP/J NSg/VB/J/R/P P D+ > house before she had found the fan and gloves . # NPr/VB+ C/P ISg+ VP NSg/VP D NSg/VB VB/C NPl/V3+ . > @@ -5699,7 +5699,7 @@ > must have meant some mischief , or else you’d have signed your name like an # NSg/VXB NSg/VXB VP I/J/R/Dq+ NSg/VB+ . NPr/C NSg/J/C K NSg/VXB VP/J D$+ NSg/VB+ NSg/VB/J/C/P D/P > honest man . ” -# VB/JS NPr/VB/J+ . . +# VB/J NPr/VB/J+ . . > # > There was a general clapping of hands at this : it was the first really clever diff --git a/harper-core/tests/text/tagged/The Great Gatsby.md b/harper-core/tests/text/tagged/The Great Gatsby.md index c267072cb..912caee04 100644 --- a/harper-core/tests/text/tagged/The Great Gatsby.md +++ b/harper-core/tests/text/tagged/The Great Gatsby.md @@ -811,7 +811,7 @@ > # > “ Well , he wasn’t always a butler ; he used to be the silver polisher for some -# . NSg/VB/J/R . NPr/ISg+ VPt R D/P NPr/VB . NPr/ISg+ VP/J P NSg/VXB D Nᴹ/VB/J+ NSg/JC R/C/P I/J/R/Dq +# . NSg/VB/J/R . NPr/ISg+ VPt R D/P NPr/VB . NPr/ISg+ VP/J P NSg/VXB D Nᴹ/VB/J+ NSg R/C/P I/J/R/Dq > people in New York that had a silver service for two hundred people . He had to # NPl/VB+ NPr/J/R/P NSg/J NPr+ NSg/I/C/Ddem+ VP D/P Nᴹ/VB/J+ NSg/VB+ R/C/P NSg NSg NPl/VB+ . NPr/ISg+ VP P > polish it from morning till night , until finally it began to affect his nose — — — ” @@ -3653,7 +3653,7 @@ > Every one suspects himself of at least one of the cardinal virtues , and this is # Dq NSg/I/J NPl/V3 ISg+ P NSg/P NSg/J/Dq NSg/I/J P D+ NSg/J+ NPl+ . VB/C I/Ddem+ VL3 > mine : I am one of the few honest people that I have ever known . -# NSg/I/VB+ . ISg/#r+ NPr/VB/J NSg/I/J P D+ NSg/I/Dq+ VB/JS+ NPl/VB+ NSg/I/C/Ddem+ ISg/#r+ NSg/VXB J/R VPp/J . +# NSg/I/VB+ . ISg/#r+ NPr/VB/J NSg/I/J P D+ NSg/I/Dq+ VB/J+ NPl/VB+ NSg/I/C/Ddem+ ISg/#r+ NSg/VXB J/R VPp/J . > # > CHAPTER IV @@ -11463,7 +11463,7 @@ > met another bad driver , didn’t I ? I mean it was careless of me to make such a # VP I/D+ NSg/VB/J+ NSg+ . VXPt ISg/#r+ . ISg/#r+ NSg/VB/J NPr/ISg+ VPt J P NPr/ISg+ P NSg/VB NSg/I D/P > wrong guess . I thought you were rather an honest , straightforward person . I -# NSg/VB/J/R NSg/VB+ . ISg/#r+ N🅪Sg/VP ISgPl+ NSg/VPt NPr/VB/J/R D/P VB/JS . J+ NSg/VB+ . ISg/#r+ +# NSg/VB/J/R NSg/VB+ . ISg/#r+ N🅪Sg/VP ISgPl+ NSg/VPt NPr/VB/J/R D/P VB/J . J+ NSg/VB+ . ISg/#r+ > thought it was your secret pride . ” # N🅪Sg/VP NPr/ISg+ VPt D$+ NSg/VB/J+ Nᴹ/VB+ . . > From 4443a4e231d72e5bec5cbd260f509bc1ee84ba7c Mon Sep 17 00:00:00 2001 From: 86xsk <200443667+86xsk@users.noreply.github.com> Date: Thu, 29 Jan 2026 16:35:19 -0600 Subject: [PATCH 08/18] Partially revert "fix(core): PR getting flagged as 'misspelled' (#2476)" This partially reverts commit 5230d6ad8cca579d615d6bd092534224ac44d4bf. Returns the word to the dictionary, since removing it should no longer be necessary. --- harper-core/dictionary.dict | 1 + 1 file changed, 1 insertion(+) diff --git a/harper-core/dictionary.dict b/harper-core/dictionary.dict index 1b793049e..fb65001b1 100644 --- a/harper-core/dictionary.dict +++ b/harper-core/dictionary.dict @@ -8252,6 +8252,7 @@ PowerPoint/ONgV Powers/NOg Powhatan/NOg Poznan/Og +Pr/ # Praseodymium Prada/g Prado/Og Praetorian/Ng From e982f23c6913e5b17451453d3f81d626bc598eda Mon Sep 17 00:00:00 2001 From: 86xsk <200443667+86xsk@users.noreply.github.com> Date: Thu, 29 Jan 2026 16:46:29 -0600 Subject: [PATCH 09/18] test(core): merge tests and add test --- harper-core/src/linting/spell_check.rs | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/harper-core/src/linting/spell_check.rs b/harper-core/src/linting/spell_check.rs index ff54a9fba..62413971f 100644 --- a/harper-core/src/linting/spell_check.rs +++ b/harper-core/src/linting/spell_check.rs @@ -486,18 +486,11 @@ mod tests { } #[test] - fn dont_flag_pr() { - assert_no_lints( - "PR", - SpellCheck::new(FstDictionary::curated(), Dialect::American), - ); - } + fn dont_flag_certain_entries_with_multiple_case_variants_in_dict() { + let dict = FstDictionary::curated(); - #[test] - fn dont_flag_mb() { - assert_no_lints( - "MB", - SpellCheck::new(FstDictionary::curated(), Dialect::American), - ); + assert_no_lints("PR", SpellCheck::new(&dict, Dialect::American)); + assert_no_lints("MB", SpellCheck::new(&dict, Dialect::American)); + assert_no_lints("OS", SpellCheck::new(&dict, Dialect::American)); // Issue #2585 } } From 0a1e2d4e729703728927c040458f8632f77d5458 Mon Sep 17 00:00:00 2001 From: 86xsk <200443667+86xsk@users.noreply.github.com> Date: Thu, 29 Jan 2026 18:09:08 -0600 Subject: [PATCH 10/18] test(core): move test Since casing-related issues are now handled by `OrthographicConsistency`, not `SpellCheck`. --- .../src/linting/orthographic_consistency.rs | 15 ++++++++++++++- harper-core/src/linting/spell_check.rs | 12 +----------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/harper-core/src/linting/orthographic_consistency.rs b/harper-core/src/linting/orthographic_consistency.rs index 72509b19e..4a915ed60 100644 --- a/harper-core/src/linting/orthographic_consistency.rs +++ b/harper-core/src/linting/orthographic_consistency.rs @@ -154,7 +154,10 @@ fn alphabetic_differs(a: &[char], b: &[char]) -> bool { #[cfg(test)] mod tests { - use crate::linting::tests::{assert_lint_count, assert_no_lints, assert_suggestion_result}; + use crate::linting::tests::{ + assert_good_and_bad_suggestions, assert_lint_count, assert_no_lints, + assert_suggestion_result, + }; use super::OrthographicConsistency; @@ -431,4 +434,14 @@ mod tests { OrthographicConsistency::default(), ); } + + #[test] + fn no_improper_suggestion_for_macos() { + assert_good_and_bad_suggestions( + "MacOS", + OrthographicConsistency::default(), + &["macOS"], + &["MacOS"], + ); + } } diff --git a/harper-core/src/linting/spell_check.rs b/harper-core/src/linting/spell_check.rs index 48fbcaa60..e8614969a 100644 --- a/harper-core/src/linting/spell_check.rs +++ b/harper-core/src/linting/spell_check.rs @@ -141,7 +141,7 @@ mod tests { use super::SpellCheck; use crate::dict_word_metadata::DialectFlags; use crate::linting::Linter; - use crate::linting::tests::{assert_good_and_bad_suggestions, assert_no_lints}; + use crate::linting::tests::assert_no_lints; use crate::spell::{Dictionary, FstDictionary, MergedDictionary, MutableDictionary}; use crate::{ Dialect, @@ -501,14 +501,4 @@ mod tests { assert_no_lints("MB", SpellCheck::new(&dict, Dialect::American)); assert_no_lints("OS", SpellCheck::new(&dict, Dialect::American)); // Issue #2585 } - - #[test] - fn no_improper_suggestion_for_macos() { - assert_good_and_bad_suggestions( - "MacOS", - SpellCheck::new(FstDictionary::curated(), Dialect::American), - &["macOS"], - &["MacOS"], - ); - } } From cf9a90a097d456ef61ebdfd800eb4fdd472050fc Mon Sep 17 00:00:00 2001 From: 86xsk <200443667+86xsk@users.noreply.github.com> Date: Thu, 29 Jan 2026 18:10:44 -0600 Subject: [PATCH 11/18] fix(core): fix logic in `OrthographicConsistency` Expands the criteria in which `OrthographicConsistency` will lint for incorrect capitalization. Makes the `no_improper_suggestion_for_macos` test pass. --- .../src/linting/orthographic_consistency.rs | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/harper-core/src/linting/orthographic_consistency.rs b/harper-core/src/linting/orthographic_consistency.rs index 4a915ed60..b61fcf675 100644 --- a/harper-core/src/linting/orthographic_consistency.rs +++ b/harper-core/src/linting/orthographic_consistency.rs @@ -88,18 +88,14 @@ impl ExprLinter for OrthographicConsistency { } let canonical_flags = metadata.orth_info; - let flags_to_check = [ - OrthFlags::LOWER_CAMEL, - OrthFlags::UPPER_CAMEL, - OrthFlags::APOSTROPHE, - OrthFlags::HYPHENATED, - ]; - - if flags_to_check - .into_iter() - .filter(|flag| canonical_flags.contains(*flag) != cur_flags.contains(*flag)) - .count() - == 1 + let flags_to_check = OrthFlags::LOWER_CAMEL + | OrthFlags::UPPER_CAMEL + | OrthFlags::APOSTROPHE + | OrthFlags::HYPHENATED; + + // If any of the flags specified by flags_to_check differ between cur_flags and + // canonical_flags. + if !((canonical_flags ^ cur_flags) & flags_to_check).is_empty() && let Ok(canonical) = self .dict .get_correct_capitalization_of(chars) From 007df6e7fd2d5a4c3091e537e875333559118b05 Mon Sep 17 00:00:00 2001 From: 86xsk <200443667+86xsk@users.noreply.github.com> Date: Thu, 29 Jan 2026 19:19:06 -0600 Subject: [PATCH 12/18] test(core): add failing test --- harper-core/src/linting/orthographic_consistency.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/harper-core/src/linting/orthographic_consistency.rs b/harper-core/src/linting/orthographic_consistency.rs index b61fcf675..cabfe5945 100644 --- a/harper-core/src/linting/orthographic_consistency.rs +++ b/harper-core/src/linting/orthographic_consistency.rs @@ -440,4 +440,12 @@ mod tests { &["MacOS"], ); } + + #[test] + fn accept_case_variants() { + // At the time of writing this test, "Pr" (despite being a word in the curated dictionary) + // would be linted for the supposed reason of the canonical spelling being "PR". + // Since both words are in the curated dictionary, neither should be linted. + assert_no_lints("Pr PR", OrthographicConsistency::default()); + } } From 7518350f733d1dfb69c8d7094cf4056832b01755 Mon Sep 17 00:00:00 2001 From: 86xsk <200443667+86xsk@users.noreply.github.com> Date: Thu, 29 Jan 2026 19:19:35 -0600 Subject: [PATCH 13/18] fix(core): allow all case-variants in `OrthographicConsistency` Allow all word casing/orthography that are defined in the dictionary. If the dictionary contains the exact word, `OrthographicConsistency` will skip it. --- harper-core/src/linting/orthographic_consistency.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/harper-core/src/linting/orthographic_consistency.rs b/harper-core/src/linting/orthographic_consistency.rs index cabfe5945..1c497320a 100644 --- a/harper-core/src/linting/orthographic_consistency.rs +++ b/harper-core/src/linting/orthographic_consistency.rs @@ -69,6 +69,11 @@ impl ExprLinter for OrthographicConsistency { let chars = word.span.get_content(source); + if self.dict.contains_exact_word(chars) { + // Exit if the dictionary contains the exact word. + return None; + } + let cur_flags = OrthFlags::from_letters(chars); if metadata.is_allcaps() From 3c9d54e54c70ea7e71d933b79e3aac5a7074e912 Mon Sep 17 00:00:00 2001 From: 86xsk <200443667+86xsk@users.noreply.github.com> Date: Thu, 29 Jan 2026 19:24:13 -0600 Subject: [PATCH 14/18] test(core): remove Lego -> LEGO test in `OrthographicConsistency` Both variants are defined in the dictionary, and appear to be valid in this case. --- harper-core/src/linting/orthographic_consistency.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/harper-core/src/linting/orthographic_consistency.rs b/harper-core/src/linting/orthographic_consistency.rs index 1c497320a..39dc1ed78 100644 --- a/harper-core/src/linting/orthographic_consistency.rs +++ b/harper-core/src/linting/orthographic_consistency.rs @@ -198,15 +198,6 @@ mod tests { ); } - #[test] - fn lego_should_be_all_caps() { - assert_suggestion_result( - "Lego bricks encourage creativity.", - OrthographicConsistency::default(), - "LEGO bricks encourage creativity.", - ); - } - #[test] fn nato_should_be_all_caps() { assert_suggestion_result( From 8b426d9251795d6b5a23f097e8f256de0654c178 Mon Sep 17 00:00:00 2001 From: 86xsk <200443667+86xsk@users.noreply.github.com> Date: Thu, 29 Jan 2026 19:29:50 -0600 Subject: [PATCH 15/18] chore: update snapshots --- .../linters/Alice's Adventures in Wonderland.snap.yml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/harper-core/tests/text/linters/Alice's Adventures in Wonderland.snap.yml b/harper-core/tests/text/linters/Alice's Adventures in Wonderland.snap.yml index ec2f89faa..6b89047b0 100644 --- a/harper-core/tests/text/linters/Alice's Adventures in Wonderland.snap.yml +++ b/harper-core/tests/text/linters/Alice's Adventures in Wonderland.snap.yml @@ -672,15 +672,6 @@ Suggest: -Lint: Capitalization (127 priority) -Message: | - 320 | she began again: “Où est ma chatte?” which was the first sentence in her French - | ^~~ This word's canonical spelling is all-caps. -Suggest: - - Replace with: “EST” - - - Lint: Spelling (63 priority) Message: | 320 | she began again: “Où est ma chatte?” which was the first sentence in her French From 3381fba458faa53a29ca10b9129e8a347f85b067 Mon Sep 17 00:00:00 2001 From: 86xsk <200443667+86xsk@users.noreply.github.com> Date: Thu, 29 Jan 2026 19:44:21 -0600 Subject: [PATCH 16/18] test(core): add test --- harper-core/src/linting/orthographic_consistency.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/harper-core/src/linting/orthographic_consistency.rs b/harper-core/src/linting/orthographic_consistency.rs index 39dc1ed78..942a096a6 100644 --- a/harper-core/src/linting/orthographic_consistency.rs +++ b/harper-core/src/linting/orthographic_consistency.rs @@ -444,4 +444,10 @@ mod tests { // Since both words are in the curated dictionary, neither should be linted. assert_no_lints("Pr PR", OrthographicConsistency::default()); } + + #[test] + fn dont_accept_undefined_case_variants() { + // "pr" isn't defined in the dictionary, so it should be linted. + assert_lint_count("pr", OrthographicConsistency::default(), 1); + } } From e11a2d648443ad18acebefbbfc2c18ef105ba816 Mon Sep 17 00:00:00 2001 From: 86xsk <200443667+86xsk@users.noreply.github.com> Date: Thu, 29 Jan 2026 19:49:59 -0600 Subject: [PATCH 17/18] test(core): fix incorrect test expectation The test would expect 'Al' to be linted by `OrthographicConsistency` for not being all-caps. --- harper-core/tests/run_tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harper-core/tests/run_tests.rs b/harper-core/tests/run_tests.rs index 4c9fbe455..85e7866c0 100644 --- a/harper-core/tests/run_tests.rs +++ b/harper-core/tests/run_tests.rs @@ -79,7 +79,7 @@ create_test!(issue_159.md, 1, Dialect::American); create_test!(issue_358.md, 0, Dialect::American); create_test!(issue_195.md, 0, Dialect::American); create_test!(issue_118.md, 0, Dialect::American); -create_test!(lots_of_latin.md, 1, Dialect::American); +create_test!(lots_of_latin.md, 0, Dialect::American); create_test!(pr_504.md, 1, Dialect::American); create_test!(pr_452.md, 2, Dialect::American); create_test!(hex_basic_clean.md, 0, Dialect::American); From b23f652866a359278b36ea38f7d95cf9c65b25be Mon Sep 17 00:00:00 2001 From: 86xsk <200443667+86xsk@users.noreply.github.com> Date: Thu, 29 Jan 2026 21:06:16 -0600 Subject: [PATCH 18/18] refactor(core): appease Clippy Remove needless borrow. --- harper-core/src/thesaurus_helper.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harper-core/src/thesaurus_helper.rs b/harper-core/src/thesaurus_helper.rs index f435b3694..c5def939a 100644 --- a/harper-core/src/thesaurus_helper.rs +++ b/harper-core/src/thesaurus_helper.rs @@ -35,7 +35,7 @@ pub fn get_synonyms_sorted(_word: &str, _token: &TokenKind) -> Option