From a8016cde08ec3e6987456049556393a82a1be47a Mon Sep 17 00:00:00 2001 From: hippietrail Date: Sat, 20 Dec 2025 15:41:24 +0800 Subject: [PATCH 1/5] feat: work in progress on #954 --- harper-core/src/linting/damages.rs | 70 +++++++++++++++++++++++++++ harper-core/src/linting/lint_group.rs | 2 + harper-core/src/linting/mod.rs | 39 +++++++++++++++ 3 files changed, 111 insertions(+) create mode 100644 harper-core/src/linting/damages.rs diff --git a/harper-core/src/linting/damages.rs b/harper-core/src/linting/damages.rs new file mode 100644 index 000000000..ee6480b29 --- /dev/null +++ b/harper-core/src/linting/damages.rs @@ -0,0 +1,70 @@ +use crate::{ + expr::{Expr, SequenceExpr}, + linting::{ExprLinter, expr_linter::Chunk, debug::format_lint_match}, + Token, Lint +}; + +pub struct Damages { + expr: Box, +} + +impl Default for Damages { + fn default() -> Self { + Self { + expr: Box::new(SequenceExpr::word_set(&["damages", "damage"])), + } + } +} + +impl ExprLinter for Damages { + type Unit = Chunk; + + fn expr(&self) -> &dyn Expr { + self.expr.as_ref() + } + + fn match_to_lint_with_context(&self, toks: &[Token], src: &[char], ctx: Option<(&[Token], &[Token])>) -> Option { + eprintln!("🩵 {}", format_lint_match(toks, ctx, src)); + None + } + + fn description(&self) -> &str { + "Checks for plural `damages` not in the context of a court case." + } +} + +#[cfg(test)] +mod tests { + + + + // Examples of the error from GitHub: + + // Flow networks robust against damages are simple model networks described in a series of publications by Kaluza et al. + // POC to select vehicle damages on a car and mark the severity - sudheeshcm/vehicle-damage-selector. + // This is a web application that detects damages on mangoes using a TensorFlow model with Django as the frontend framework + // Detecting different types of damages of roads like cracks and potholes for the given image/video of the road. + + // Examples from GitHub where it seems to be used correctly in regard to financial compensation: + + // Code used for calculating damages in lost chance cases. + // Where the dispute involves a claim for damages in respect of a motor accident for cost of rental of a replacement vehicle + // Under this section, the Commercial Contributor would have to + // defend claims against the other Contributors related to those + // performance claims and warranties, and if a court requires any other + // Contributor to pay any damages as a result, the Commercial Contributor + // must pay those damages. + + // Examples from GitHub where it's not an error but a verb: + + // Profiles pb's and damages them when their runtime goes over a set value - sirhamsteralot/HaE-PBLimiter. + // Opening Wayland-native terminal damages Firefox + // Open File Requester damages underlaying windows when moved + + // Examples from GitHub that are too hard to call - maybe they are talking about financial compensation? + + // The goal is to estimate the damages of each link in the Graph object using the Damages result (estimating the damages for each segment of a Network). + // This repository contains code to conduct statistical inference in cartel damages estimation. It will be updated to include a Stata .do file which approximates the standard error of total damages from a fixed effects panel data model, using the delta method. + // Financial damages caused by received errors $$$$ + // It would be useful to be able to see asset-level damages after running FDA 2.0. +} \ No newline at end of file diff --git a/harper-core/src/linting/lint_group.rs b/harper-core/src/linting/lint_group.rs index 5be6be636..1ee7c516b 100644 --- a/harper-core/src/linting/lint_group.rs +++ b/harper-core/src/linting/lint_group.rs @@ -48,6 +48,7 @@ use super::correct_number_suffix::CorrectNumberSuffix; use super::criteria_phenomena::CriteriaPhenomena; use super::cure_for::CureFor; use super::currency_placement::CurrencyPlacement; +use super::damages::Damages; use super::despite_of::DespiteOf; use super::didnt::Didnt; use super::discourse_markers::DiscourseMarkers; @@ -523,6 +524,7 @@ impl LintGroup { insert_expr_rule!(CriteriaPhenomena, true); insert_expr_rule!(CureFor, true); insert_struct_rule!(CurrencyPlacement, true); + insert_expr_rule!(Damages, true); insert_expr_rule!(Dashes, true); insert_expr_rule!(DespiteOf, true); insert_expr_rule!(Didnt, true); diff --git a/harper-core/src/linting/mod.rs b/harper-core/src/linting/mod.rs index 1173f38cd..494a5f593 100644 --- a/harper-core/src/linting/mod.rs +++ b/harper-core/src/linting/mod.rs @@ -41,6 +41,7 @@ mod correct_number_suffix; mod criteria_phenomena; mod cure_for; mod currency_placement; +mod damages; mod dashes; mod despite_of; mod determiner_without_noun; @@ -256,6 +257,44 @@ where } } +pub mod debug { + use crate::{Token, token_string_ext::TokenStringExt}; + + /// Formats a lint match with surrounding context for debug output. + /// + /// The function takes the same `matched_tokens` and `source`, and `context` parameters + /// passed to `[match_to_lint_with_context]`. + /// + /// # Arguments + /// * `log` - `matched_tokens` + /// * `ctx` - `context`, or `None` if calling from `[match_to_lint]` + /// * `src` - `source` from `[match_to_lint]` / `[match_to_lint_with_context]` + /// + /// # Returns + /// A string with ANSI escape codes where: + /// - Context tokens are dimmed before and after the matched tokens in normal weight. + /// - Markup and formatting text hidden in whitespace tokens is filtered out. + pub fn format_lint_match( + log: &[Token], + ctx: Option<(&[Token], &[Token])>, + src: &[char], + ) -> String { + if let Some((pro, epi)) = ctx { + let [pro, log, epi] = [pro, log, epi].map(|tt| { + tt.iter() + .filter(|t| !t.kind.is_whitespace() && !t.kind.is_newline()) + .map(|t| t.span.get_content_string(src)) + .collect::>() + .join(" ") + }); + format!("\x1b[2m{}\x1b[0m {} \x1b[2m{}\x1b[0m", pro, log, epi,) + } else { + log.span() + .map_or_else(String::new, |span| span.get_content_string(src)) + } + } +} + #[cfg(test)] pub mod tests { use crate::parsers::Markdown; From e86b2b0accca0b00576de10908839988aa917ece Mon Sep 17 00:00:00 2001 From: hippietrail Date: Sat, 20 Dec 2025 20:38:41 +0800 Subject: [PATCH 2/5] feat: flag plural "damages": partly working --- harper-core/dictionary.dict | 4 +- harper-core/src/linting/damages.rs | 139 +++++++++++++++++++++++--- harper-core/src/linting/lint_group.rs | 6 +- 3 files changed, 133 insertions(+), 16 deletions(-) diff --git a/harper-core/dictionary.dict b/harper-core/dictionary.dict index 84855c6ba..5291f693b 100644 --- a/harper-core/dictionary.dict +++ b/harper-core/dictionary.dict @@ -29646,7 +29646,7 @@ indignance/Ng indignant/JY indignation/~Ng indigo/~NgJ -indirect/~JYNV +indirect/~JYNV # noun senses: 1) type of const in finace; 2) type of radiator indiscipline/N indiscreet/JY indiscretion/NS @@ -33513,7 +33513,7 @@ menstruation/Ng mensurable/J mensuration/Ng menswear/Nmg -mental/~JYN +mental/~JY # removed slang/Indian/rare noun senses mentalist/JNSg mentality/~NSg menthol/Ng diff --git a/harper-core/src/linting/damages.rs b/harper-core/src/linting/damages.rs index ee6480b29..486d344a2 100644 --- a/harper-core/src/linting/damages.rs +++ b/harper-core/src/linting/damages.rs @@ -1,7 +1,7 @@ use crate::{ + CharStringExt, Lint, Token, expr::{Expr, SequenceExpr}, - linting::{ExprLinter, expr_linter::Chunk, debug::format_lint_match}, - Token, Lint + linting::{ExprLinter, LintKind, Suggestion, expr_linter::Sentence}, }; pub struct Damages { @@ -17,15 +17,121 @@ impl Default for Damages { } impl ExprLinter for Damages { - type Unit = Chunk; + type Unit = Sentence; fn expr(&self) -> &dyn Expr { self.expr.as_ref() } - fn match_to_lint_with_context(&self, toks: &[Token], src: &[char], ctx: Option<(&[Token], &[Token])>) -> Option { - eprintln!("🩵 {}", format_lint_match(toks, ctx, src)); - None + fn match_to_lint_with_context( + &self, + toks: &[Token], + src: &[char], + ctx: Option<(&[Token], &[Token])>, + ) -> Option { + let damage_idx = 0; + let damage_tok = &toks[damage_idx]; + let damage_span = damage_tok.span; + let damage_chars = damage_span.get_content(src); + + // Singular noun/verb lemma is not an error but during development we'll print uses of it + // to observe its context. + if damage_chars.eq_ignore_ascii_case_chars(&['d', 'a', 'm', 'a', 'g', 'e']) { + return None; + } + + let maybe_prev_word = ctx.and_then(|(pre, _)| { + let last_word = pre.last(); + match (last_word, pre.get(pre.len() - 2)) { + (Some(sp), Some(w)) if sp.kind.is_whitespace() && w.kind.is_word() => Some(w), + _ => None, + } + }); + + #[derive(PartialEq)] + enum CanPrecede { + Unknown, + NeitherNounNorVerb, + Noun, + Verb, + EitherNounOrVerb, + } + + let can_precede = maybe_prev_word.map_or(CanPrecede::Unknown, |prev_word| { + let mut can: CanPrecede = CanPrecede::Unknown; + if prev_word.kind.is_preposition() + && !prev_word + .span + .get_content(src) + .eq_ignore_ascii_case_chars(&['t', 'o']) + { + can = CanPrecede::Noun; + } + + if prev_word.kind.is_adjective() { + if prev_word.kind.is_noun() { + } else { + can = CanPrecede::Noun; + } + } + if prev_word.kind.is_determiner() { + can = CanPrecede::Noun; + } + + if prev_word.kind.is_auxiliary_verb() { + can = if can == CanPrecede::Noun { + CanPrecede::EitherNounOrVerb + } else { + CanPrecede::Verb + }; + } + + can + }); + + if can_precede == CanPrecede::Verb { + return None; + } + + // Check all the tokens for words that are used in the legal compesation context + // TODO: this fails when "damages" is misuses in a diclaimer: + // 1. "If you encounter any issues, errors, or damages resulting from the use of these templates, + // the repository author assumes no responsibility or liability." + // 2. "The author will not be liable for any losses and/or damages in connection with the use of our website" + if ctx.is_some_and(|(pre, aft)| { + let keywords = &[ + "claim", + "claims", + "judgments", + "liabilities", + "liability", + "liable", + "settlements", + "warranty", + ][..]; + pre.iter().any(|t| { + t.span + .get_content(src) + .eq_any_ignore_ascii_case_str(keywords) + }) || aft.iter().any(|t| { + t.span + .get_content(src) + .eq_any_ignore_ascii_case_str(keywords) + }) + }) { + return None; + } + + Some(Lint { + span: damage_span, + lint_kind: LintKind::Usage, + suggestions: vec![Suggestion::replace_with_match_case( + damage_chars[..6].to_vec(), + damage_chars, + )], + message: "Singular `damage` is correct when not refering to a court case.".to_string(), + ..Default::default() + }) } fn description(&self) -> &str { @@ -35,16 +141,23 @@ impl ExprLinter for Damages { #[cfg(test)] mod tests { + // Examples of the error from GitHub: + use crate::linting::tests::assert_lint_count; + #[test] + fn fix_robust_against_damages() { + assert_lint_count( + "Flow networks robust against damages are simple model networks described in a series of publications by Kaluza et al.", + super::Damages::default(), + 1, + ); + } - // Examples of the error from GitHub: - - // Flow networks robust against damages are simple model networks described in a series of publications by Kaluza et al. // POC to select vehicle damages on a car and mark the severity - sudheeshcm/vehicle-damage-selector. // This is a web application that detects damages on mangoes using a TensorFlow model with Django as the frontend framework // Detecting different types of damages of roads like cracks and potholes for the given image/video of the road. - + // Examples from GitHub where it seems to be used correctly in regard to financial compensation: // Code used for calculating damages in lost chance cases. @@ -54,17 +167,17 @@ mod tests { // performance claims and warranties, and if a court requires any other // Contributor to pay any damages as a result, the Commercial Contributor // must pay those damages. - + // Examples from GitHub where it's not an error but a verb: // Profiles pb's and damages them when their runtime goes over a set value - sirhamsteralot/HaE-PBLimiter. // Opening Wayland-native terminal damages Firefox // Open File Requester damages underlaying windows when moved - + // Examples from GitHub that are too hard to call - maybe they are talking about financial compensation? // The goal is to estimate the damages of each link in the Graph object using the Damages result (estimating the damages for each segment of a Network). // This repository contains code to conduct statistical inference in cartel damages estimation. It will be updated to include a Stata .do file which approximates the standard error of total damages from a fixed effects panel data model, using the delta method. // Financial damages caused by received errors $$$$ // It would be useful to be able to see asset-level damages after running FDA 2.0. -} \ No newline at end of file +} diff --git a/harper-core/src/linting/lint_group.rs b/harper-core/src/linting/lint_group.rs index 1ee7c516b..96bd51fc7 100644 --- a/harper-core/src/linting/lint_group.rs +++ b/harper-core/src/linting/lint_group.rs @@ -524,7 +524,6 @@ impl LintGroup { insert_expr_rule!(CriteriaPhenomena, true); insert_expr_rule!(CureFor, true); insert_struct_rule!(CurrencyPlacement, true); - insert_expr_rule!(Damages, true); insert_expr_rule!(Dashes, true); insert_expr_rule!(DespiteOf, true); insert_expr_rule!(Didnt, true); @@ -708,6 +707,11 @@ impl LintGroup { ); out.config.set_rule_enabled("DisjointPrefixes", true); + // add_chunk_expr_linter doesn't support the `Sentence` `Unit` and there is not yet any + // `add_sentence_expr_linter` + out.add("Damages", Damages::default()); + out.config.set_rule_enabled("Damages", true); + out } From 94e2a93a666641434c4c4bac1519e635fd58a1f2 Mon Sep 17 00:00:00 2001 From: hippietrail Date: Sun, 21 Dec 2025 15:09:37 +0800 Subject: [PATCH 3/5] refactor: cleaned up, ready for review --- harper-core/dictionary.dict | 3 +- harper-core/src/linting/damages.rs | 253 ++++++++++++++++++++++------- 2 files changed, 193 insertions(+), 63 deletions(-) diff --git a/harper-core/dictionary.dict b/harper-core/dictionary.dict index 5291f693b..1f0253015 100644 --- a/harper-core/dictionary.dict +++ b/harper-core/dictionary.dict @@ -50110,8 +50110,9 @@ underhood/J underinflated/V underinsured/J underinvest/VGdS +underlaid/tT underlain/V -underlay/VSNg +underlay/VSGNg underlie/VS underline/~NgSVGdJ underling/NgS diff --git a/harper-core/src/linting/damages.rs b/harper-core/src/linting/damages.rs index 486d344a2..333dc33ce 100644 --- a/harper-core/src/linting/damages.rs +++ b/harper-core/src/linting/damages.rs @@ -4,6 +4,21 @@ use crate::{ linting::{ExprLinter, LintKind, Suggestion, expr_linter::Sentence}, }; +static KEYWORDS: &[&str] = &[ + "case", + "cases", + "claim", + "claims", + "judgment", + "judgments", + "liabilities", + "liability", + "liable", + "settlement", + "settlements", + "warranty", +]; + pub struct Damages { expr: Box, } @@ -29,6 +44,7 @@ impl ExprLinter for Damages { src: &[char], ctx: Option<(&[Token], &[Token])>, ) -> Option { + let (pretoks, postoks) = ctx?; let damage_idx = 0; let damage_tok = &toks[damage_idx]; let damage_span = damage_tok.span; @@ -40,13 +56,21 @@ impl ExprLinter for Damages { return None; } - let maybe_prev_word = ctx.and_then(|(pre, _)| { - let last_word = pre.last(); - match (last_word, pre.get(pre.len() - 2)) { - (Some(sp), Some(w)) if sp.kind.is_whitespace() && w.kind.is_word() => Some(w), - _ => None, - } - }); + // If the word after "damages" is a noun or object pronoun, it's the object and "damages" is a verb. + let next_word_tok = match (postoks.first(), postoks.get(1)) { + (Some(sp), Some(w)) if sp.kind.is_whitespace() && w.kind.is_word() => Some(w), + _ => None, + }; + + if next_word_tok.is_some_and(|nwt| nwt.kind.is_object_pronoun() || nwt.kind.is_noun()) { + return None; + } + + // The word before "damages" may help us narrow down whether it's a noun or verb. + let prev_word_tok = match (pretoks.get(pretoks.len() - 2), pretoks.last()) { + (Some(w), Some(sp)) if sp.kind.is_whitespace() && w.kind.is_word() => Some(w), + _ => None, + }; #[derive(PartialEq)] enum CanPrecede { @@ -57,9 +81,13 @@ impl ExprLinter for Damages { EitherNounOrVerb, } - let can_precede = maybe_prev_word.map_or(CanPrecede::Unknown, |prev_word| { + // Try to disambiguate whether "damages" is a noun or verb. + let can_precede = prev_word_tok.map_or(CanPrecede::Unknown, |prev_word| { let mut can: CanPrecede = CanPrecede::Unknown; - if prev_word.kind.is_preposition() + + if (prev_word.kind.is_adjective() + || prev_word.kind.is_determiner() + || prev_word.kind.is_preposition()) && !prev_word .span .get_content(src) @@ -68,16 +96,6 @@ impl ExprLinter for Damages { can = CanPrecede::Noun; } - if prev_word.kind.is_adjective() { - if prev_word.kind.is_noun() { - } else { - can = CanPrecede::Noun; - } - } - if prev_word.kind.is_determiner() { - can = CanPrecede::Noun; - } - if prev_word.kind.is_auxiliary_verb() { can = if can == CanPrecede::Noun { CanPrecede::EitherNounOrVerb @@ -93,31 +111,45 @@ impl ExprLinter for Damages { return None; } + // We now know "damages" isn't unambiguously a verb, but it could still be an ambiguous verb-noun. + // Or it could be a noun. Or it could still be unknown. + + // Check if it's the object of the verb "to pay" + let pay_det = SequenceExpr::word_set(&["paid", "pay", "paying", "pays"]) + .then_optional(SequenceExpr::default().t_ws().then_determiner()) + .t_ws(); + + for i in (pretoks.len() - 4..pretoks.len() - 2).step_by(2) { + if pay_det.run(i, pretoks, src).is_some() { + return None; + } + } + + // TODO: Is this functional-style code better than the for loop version above? + // if pretoks + // .windows(2) + // .enumerate() + // .rev() + // .take_while(|(i, _)| pay_det.run(*i, pretoks, src).is_none()) + // .count() + // < pretoks.len() / 2 + // { + // return None; + // } + // Check all the tokens for words that are used in the legal compesation context // TODO: this fails when "damages" is misuses in a diclaimer: // 1. "If you encounter any issues, errors, or damages resulting from the use of these templates, // the repository author assumes no responsibility or liability." // 2. "The author will not be liable for any losses and/or damages in connection with the use of our website" - if ctx.is_some_and(|(pre, aft)| { - let keywords = &[ - "claim", - "claims", - "judgments", - "liabilities", - "liability", - "liable", - "settlements", - "warranty", - ][..]; - pre.iter().any(|t| { - t.span - .get_content(src) - .eq_any_ignore_ascii_case_str(keywords) - }) || aft.iter().any(|t| { - t.span - .get_content(src) - .eq_any_ignore_ascii_case_str(keywords) - }) + if pretoks.iter().any(|t| { + t.span + .get_content(src) + .eq_any_ignore_ascii_case_str(KEYWORDS) + }) || postoks.iter().any(|t| { + t.span + .get_content(src) + .eq_any_ignore_ascii_case_str(KEYWORDS) }) { return None; } @@ -141,43 +173,140 @@ impl ExprLinter for Damages { #[cfg(test)] mod tests { - // Examples of the error from GitHub: + use super::Damages; + use crate::linting::tests::{assert_no_lints, assert_suggestion_result}; - use crate::linting::tests::assert_lint_count; + // Examples of the error from GitHub: #[test] - fn fix_robust_against_damages() { - assert_lint_count( + fn fix_robust_against_damages_by_prev_preposition() { + assert_suggestion_result( "Flow networks robust against damages are simple model networks described in a series of publications by Kaluza et al.", - super::Damages::default(), - 1, + Damages::default(), + "Flow networks robust against damage are simple model networks described in a series of publications by Kaluza et al.", + ); + } + + #[test] + fn fix_vehicle_damages_on_a_car_by_fall_through() { + assert_suggestion_result( + "POC to select vehicle damages on a car and mark the severity - sudheeshcm/vehicle-damage-selector.", + Damages::default(), + "POC to select vehicle damage on a car and mark the severity - sudheeshcm/vehicle-damage-selector.", ); } - // POC to select vehicle damages on a car and mark the severity - sudheeshcm/vehicle-damage-selector. - // This is a web application that detects damages on mangoes using a TensorFlow model with Django as the frontend framework - // Detecting different types of damages of roads like cracks and potholes for the given image/video of the road. + #[test] + fn fix_damages_on_mangoes() { + assert_suggestion_result( + "This is a web application that detects damages on mangoes using a TensorFlow model with Django as the frontend framework", + Damages::default(), + "This is a web application that detects damage on mangoes using a TensorFlow model with Django as the frontend framework", + ); + } + + #[test] + fn fix_types_of_damages_of_roads() { + assert_suggestion_result( + "Detecting different types of damages of roads like cracks and potholes for the given image/video of the road.", + Damages::default(), + "Detecting different types of damage of roads like cracks and potholes for the given image/video of the road.", + ); + } // Examples from GitHub where it seems to be used correctly in regard to financial compensation: - // Code used for calculating damages in lost chance cases. - // Where the dispute involves a claim for damages in respect of a motor accident for cost of rental of a replacement vehicle - // Under this section, the Commercial Contributor would have to - // defend claims against the other Contributors related to those - // performance claims and warranties, and if a court requires any other - // Contributor to pay any damages as a result, the Commercial Contributor - // must pay those damages. + // TODO: would the word "calculate" before "damages" be a good heuristic? + #[test] + fn ignore_damages_in_lost_chance_cases() { + assert_no_lints( + "Code used for calculating damages in lost chance cases.", + Damages::default(), + ); + } + + #[test] + fn ignore_claim_for_damages() { + assert_no_lints( + "Where the dispute involves a claim for damages in respect of a motor accident for cost of rental of a replacement vehicle", + Damages::default(), + ); + } + + #[test] + fn ignore_pay_damages() { + assert_no_lints( + "Under this section, the Commercial Contributor would have to + defend claims against the other Contributors related to those + performance claims and warranties, and if a court requires any other + Contributor to pay any damages as a result, the Commercial Contributor + must pay those damages.", + Damages::default(), + ); + } // Examples from GitHub where it's not an error but a verb: - // Profiles pb's and damages them when their runtime goes over a set value - sirhamsteralot/HaE-PBLimiter. - // Opening Wayland-native terminal damages Firefox - // Open File Requester damages underlaying windows when moved + #[test] + fn ignore_damages_them() { + assert_no_lints( + "Profiles pb's and damages them when their runtime goes over a set value - sirhamsteralot/HaE-PBLimiter.", + Damages::default(), + ); + } + + #[test] + fn ignore_damages_firefox() { + assert_no_lints( + "Opening Wayland-native terminal damages Firefox", + Damages::default(), + ); + } + + #[test] + fn ignore_damages_underlaying_windows() { + assert_no_lints( + "Open File Requester damages underlaying windows when moved", + Damages::default(), + ); + } // Examples from GitHub that are too hard to call - maybe they are talking about financial compensation? - // The goal is to estimate the damages of each link in the Graph object using the Damages result (estimating the damages for each segment of a Network). - // This repository contains code to conduct statistical inference in cartel damages estimation. It will be updated to include a Stata .do file which approximates the standard error of total damages from a fixed effects panel data model, using the delta method. - // Financial damages caused by received errors $$$$ - // It would be useful to be able to see asset-level damages after running FDA 2.0. + #[test] + #[ignore = "too close to call for now"] + fn ignore_estimate_the_damages_and_the_damages_result() { + assert_no_lints( + "The goal is to estimate the damages of each link in the Graph object using the Damages result (estimating the damages for each segment of a Network).", + Damages::default(), + ); + } + + // https://github.com › dpasmat › cartel-damages-inference + #[test] + #[ignore = "too close to call for now"] + fn ignore_damages_inference() { + assert_no_lints( + "This repository contains code to conduct statistical inference in cartel damages estimation. It will be updated to include a Stata .do file which approximates the standard error of total damages from a fixed effects panel data model, using the delta method.", + Damages::default(), + ); + } + + #[test] + #[ignore = "too close to call for now"] + fn ignore_received_errors() { + assert_no_lints( + "Financial damages caused by received errors $$$$.", + Damages::default(), + ); + } + + #[test] + #[ignore = "too close to call for now"] + fn ignore_asset_level_damages() { + assert_no_lints( + "It would be useful to be able to see asset-level damages after running FDA 2.0.", + Damages::default(), + ); + } } From 189dd0e452f1c6913a9e779824031876ce2e79bb Mon Sep 17 00:00:00 2001 From: hippietrail Date: Fri, 16 Jan 2026 16:43:41 +0700 Subject: [PATCH 4/5] fix: address concerns in PR review --- harper-core/src/linting/damages.rs | 25 +++++--------- harper-core/src/linting/mod.rs | 53 ++++-------------------------- 2 files changed, 16 insertions(+), 62 deletions(-) diff --git a/harper-core/src/linting/damages.rs b/harper-core/src/linting/damages.rs index 333dc33ce..db0aa9b67 100644 --- a/harper-core/src/linting/damages.rs +++ b/harper-core/src/linting/damages.rs @@ -119,24 +119,17 @@ impl ExprLinter for Damages { .then_optional(SequenceExpr::default().t_ws().then_determiner()) .t_ws(); - for i in (pretoks.len() - 4..pretoks.len() - 2).step_by(2) { - if pay_det.run(i, pretoks, src).is_some() { - return None; - } + if pretoks + .windows(2) + .enumerate() + .rev() + .take_while(|(i, _)| pay_det.run(*i, pretoks, src).is_none()) + .count() + < pretoks.len() / 2 + { + return None; } - // TODO: Is this functional-style code better than the for loop version above? - // if pretoks - // .windows(2) - // .enumerate() - // .rev() - // .take_while(|(i, _)| pay_det.run(*i, pretoks, src).is_none()) - // .count() - // < pretoks.len() / 2 - // { - // return None; - // } - // Check all the tokens for words that are used in the legal compesation context // TODO: this fails when "damages" is misuses in a diclaimer: // 1. "If you encounter any issues, errors, or damages resulting from the use of these templates, diff --git a/harper-core/src/linting/mod.rs b/harper-core/src/linting/mod.rs index f19780941..0c5deb5f9 100644 --- a/harper-core/src/linting/mod.rs +++ b/harper-core/src/linting/mod.rs @@ -267,45 +267,6 @@ where } } -pub mod debug { - use crate::{Token, token_string_ext::TokenStringExt}; - - /// Formats a lint match with surrounding context for debug output. - /// - /// The function takes the same `matched_tokens` and `source`, and `context` parameters - /// passed to `[match_to_lint_with_context]`. - /// - /// # Arguments - /// * `log` - `matched_tokens` - /// * `ctx` - `context`, or `None` if calling from `[match_to_lint]` - /// * `src` - `source` from `[match_to_lint]` / `[match_to_lint_with_context]` - /// - /// # Returns - /// A string with ANSI escape codes where: - /// - Context tokens are dimmed before and after the matched tokens in normal weight. - /// - Markup and formatting text hidden in whitespace tokens is filtered out. - pub fn format_lint_match( - log: &[Token], - ctx: Option<(&[Token], &[Token])>, - src: &[char], - ) -> String { - if let Some((pro, epi)) = ctx { - let [pro, log, epi] = [pro, log, epi].map(|tt| { - tt.iter() - .filter(|t| !t.kind.is_whitespace() && !t.kind.is_newline()) - .map(|t| t.span.get_content_string(src)) - .collect::>() - .join(" ") - }); - format!("\x1b[2m{}\x1b[0m {} \x1b[2m{}\x1b[0m", pro, log, epi,) - } else { - log.span() - .map_or_else(String::new, |span| span.get_content_string(src)) - } - } -} - -#[cfg(test)] pub mod tests { use crate::parsers::Markdown; use crate::{Document, Span, Token}; @@ -532,13 +493,13 @@ pub mod tests { let lints = linter.lint(&test); // Just check the first lint for now - if let Some(lint) = lints.first() { - if lint.message != expected_message { - panic!( - "Expected lint message \"{expected_message}\", but got \"{}\"", - lint.message - ); - } + if let Some(lint) = lints.first() + && lint.message != expected_message + { + panic!( + "Expected lint message \"{expected_message}\", but got \"{}\"", + lint.message + ); } } From 77831c65bed853bf7fe489c8744797e02f72c4b4 Mon Sep 17 00:00:00 2001 From: hippietrail Date: Fri, 16 Jan 2026 17:20:53 +0700 Subject: [PATCH 5/5] chore: temporarily comment out debug stuff for CI clippy --- harper-core/src/linting/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/harper-core/src/linting/mod.rs b/harper-core/src/linting/mod.rs index 0c5deb5f9..83a2546b5 100644 --- a/harper-core/src/linting/mod.rs +++ b/harper-core/src/linting/mod.rs @@ -302,7 +302,7 @@ pub mod tests { pub fn assert_lint_count(text: &str, mut linter: impl Linter, count: usize) { let test = Document::new_markdown_default_curated(text); let lints = linter.lint(&test); - dbg!(&lints); + // dbg!(&lints); if lints.len() != count { panic!( "Expected \"{text}\" to create {count} lints, but it created {}.", @@ -520,8 +520,8 @@ pub mod tests { if let Some(sug) = lint.suggestions.get(n) { sug.apply(lint.span, &mut text_chars); - let transformed_str: String = text_chars.iter().collect(); - dbg!(transformed_str); + // let transformed_str: String = text_chars.iter().collect(); + // dbg!(transformed_str); } else { break; }