|
| 1 | +use biome_analyze::{Rule, RuleDiagnostic, RuleSource, context::RuleContext, declare_lint_rule}; |
| 2 | +use biome_console::markup; |
| 3 | +use biome_js_semantic::SemanticModel; |
| 4 | +use biome_js_syntax::{ |
| 5 | + AnyJsCallArgument, AnyJsExpression, JsCallArguments, JsNewOrCallExpression, |
| 6 | + JsRegexLiteralExpression, global_identifier, |
| 7 | +}; |
| 8 | +use biome_rowan::{AstNode, AstSeparatedList, TextRange, TextSize}; |
| 9 | +use biome_rule_options::use_named_capture_group::UseNamedCaptureGroupOptions; |
| 10 | + |
| 11 | +use crate::services::semantic::Semantic; |
| 12 | + |
| 13 | +declare_lint_rule! { |
| 14 | + /// Enforce using named capture groups in regular expression. |
| 15 | + /// |
| 16 | + /// Numbered capture groups like `(...)` can be difficult to work with, |
| 17 | + /// as they are matched by their position and not by a descriptive name. |
| 18 | + /// Named capture groups (`(?<name>...)`) associate a descriptive name |
| 19 | + /// with each match, making the regular expression more readable and |
| 20 | + /// its intent clearer. |
| 21 | + /// |
| 22 | + /// ## Examples |
| 23 | + /// |
| 24 | + /// ### Invalid |
| 25 | + /// |
| 26 | + /// ```js,expect_diagnostic |
| 27 | + /// /(ba[rz])/; |
| 28 | + /// ``` |
| 29 | + /// |
| 30 | + /// ```js,expect_diagnostic |
| 31 | + /// /([0-9]{4})/; |
| 32 | + /// ``` |
| 33 | + /// |
| 34 | + /// ```js,expect_diagnostic |
| 35 | + /// /(?:ab)(cd)/; |
| 36 | + /// ``` |
| 37 | + /// |
| 38 | + /// ```js,expect_diagnostic |
| 39 | + /// new RegExp("(foo)"); |
| 40 | + /// ``` |
| 41 | + /// |
| 42 | + /// ```js,expect_diagnostic |
| 43 | + /// RegExp("(foo)"); |
| 44 | + /// ``` |
| 45 | + /// |
| 46 | + /// ### Valid |
| 47 | + /// |
| 48 | + /// ```js |
| 49 | + /// /(?<id>ba[rz])/; |
| 50 | + /// /(?:ba[rz])/; |
| 51 | + /// /ba[rz]/; |
| 52 | + /// /(?<year>[0-9]{4})-(?<month>[0-9]{2})/; |
| 53 | + /// new RegExp("(?<id>foo)"); |
| 54 | + /// new RegExp(pattern); |
| 55 | + /// ``` |
| 56 | + /// |
| 57 | + pub UseNamedCaptureGroup { |
| 58 | + version: "next", |
| 59 | + name: "useNamedCaptureGroup", |
| 60 | + language: "js", |
| 61 | + sources: &[RuleSource::Eslint("prefer-named-capture-group").same()], |
| 62 | + recommended: false, |
| 63 | + } |
| 64 | +} |
| 65 | + |
| 66 | +impl Rule for UseNamedCaptureGroup { |
| 67 | + type Query = Semantic<AnyJsExpression>; |
| 68 | + type State = TextRange; |
| 69 | + type Signals = Box<[Self::State]>; |
| 70 | + type Options = UseNamedCaptureGroupOptions; |
| 71 | + |
| 72 | + fn run(ctx: &RuleContext<Self>) -> Self::Signals { |
| 73 | + let node = ctx.query(); |
| 74 | + match node { |
| 75 | + AnyJsExpression::AnyJsLiteralExpression( |
| 76 | + biome_js_syntax::AnyJsLiteralExpression::JsRegexLiteralExpression(regex), |
| 77 | + ) => run_regex_literal(regex), |
| 78 | + AnyJsExpression::JsNewExpression(_) | AnyJsExpression::JsCallExpression(_) => { |
| 79 | + run_regexp_constructor(node, ctx.model()) |
| 80 | + } |
| 81 | + _ => Default::default(), |
| 82 | + } |
| 83 | + } |
| 84 | + |
| 85 | + fn diagnostic(_: &RuleContext<Self>, state: &Self::State) -> Option<RuleDiagnostic> { |
| 86 | + Some( |
| 87 | + RuleDiagnostic::new( |
| 88 | + rule_category!(), |
| 89 | + state, |
| 90 | + markup! { |
| 91 | + "Capture group is not named." |
| 92 | + }, |
| 93 | + ) |
| 94 | + .note(markup! { |
| 95 | + "Named capture groups improve readability by associating a descriptive name with each match. Use "<Emphasis>"(?<name>...)"</Emphasis>" instead of "<Emphasis>"(...)"</Emphasis>"." |
| 96 | + }), |
| 97 | + ) |
| 98 | + } |
| 99 | +} |
| 100 | + |
| 101 | +/// Find byte offsets of unnamed capture groups in a regex pattern. |
| 102 | +/// |
| 103 | +/// Returns a list of byte offsets (relative to pattern start) for each |
| 104 | +/// unnamed capture group `(` found. |
| 105 | +fn find_unnamed_capture_groups(pattern: &str) -> Vec<u32> { |
| 106 | + let mut result = Vec::new(); |
| 107 | + let mut bytes = pattern.as_bytes().iter().enumerate().peekable(); |
| 108 | + while let Some((i, &byte)) = bytes.next() { |
| 109 | + match byte { |
| 110 | + b'\\' => { |
| 111 | + bytes.next(); |
| 112 | + } |
| 113 | + b'[' => { |
| 114 | + while let Some((_, &b)) = bytes.next() { |
| 115 | + match b { |
| 116 | + b'\\' => { |
| 117 | + bytes.next(); |
| 118 | + } |
| 119 | + b']' => break, |
| 120 | + _ => {} |
| 121 | + } |
| 122 | + } |
| 123 | + } |
| 124 | + b'(' => { |
| 125 | + if bytes.peek().is_some_and(|&(_, &b)| b == b'?') { |
| 126 | + // `(?:`, `(?=`, `(?!`, `(?<=`, `(?<!`, `(?<name>` — skip |
| 127 | + } else { |
| 128 | + result.push(i as u32); |
| 129 | + } |
| 130 | + } |
| 131 | + _ => {} |
| 132 | + } |
| 133 | + } |
| 134 | + result |
| 135 | +} |
| 136 | + |
| 137 | +fn is_regexp_object(expr: &AnyJsExpression, model: &SemanticModel) -> bool { |
| 138 | + match global_identifier(&expr.clone().omit_parentheses()) { |
| 139 | + Some((reference, name)) => match model.binding(&reference) { |
| 140 | + Some(_) if !reference.is_global_this() && !reference.has_name("window") => false, |
| 141 | + _ => name.text() == "RegExp", |
| 142 | + }, |
| 143 | + None => false, |
| 144 | + } |
| 145 | +} |
| 146 | + |
| 147 | +fn parse_regexp_node( |
| 148 | + node: &JsNewOrCallExpression, |
| 149 | +) -> Option<(AnyJsExpression, JsCallArguments)> { |
| 150 | + match node { |
| 151 | + JsNewOrCallExpression::JsNewExpression(node) => { |
| 152 | + let callee = node.callee().ok()?; |
| 153 | + let args = node.arguments()?; |
| 154 | + Some((callee, args)) |
| 155 | + } |
| 156 | + JsNewOrCallExpression::JsCallExpression(node) => { |
| 157 | + let callee = node.callee().ok()?; |
| 158 | + let args = node.arguments().ok()?; |
| 159 | + Some((callee, args)) |
| 160 | + } |
| 161 | + } |
| 162 | +} |
| 163 | + |
| 164 | +fn get_first_arg_expr(arguments: &JsCallArguments) -> Option<AnyJsExpression> { |
| 165 | + let first_arg = arguments.args().iter().next()?; |
| 166 | + let Ok(AnyJsCallArgument::AnyJsExpression(expr)) = first_arg else { |
| 167 | + return None; |
| 168 | + }; |
| 169 | + Some(expr) |
| 170 | +} |
| 171 | + |
| 172 | +/// Try to compute precise TextRange for each unnamed group in a string literal. |
| 173 | +/// Returns `Some` if the argument is a simple string literal without escape sequences |
| 174 | +/// (so byte offsets map 1:1 to source positions). Returns `None` otherwise. |
| 175 | +fn try_precise_string_ranges(arg_expr: &AnyJsExpression) -> Option<Box<[TextRange]>> { |
| 176 | + let AnyJsExpression::AnyJsLiteralExpression( |
| 177 | + biome_js_syntax::AnyJsLiteralExpression::JsStringLiteralExpression(string_lit), |
| 178 | + ) = arg_expr |
| 179 | + else { |
| 180 | + return None; |
| 181 | + }; |
| 182 | + let token = string_lit.value_token().ok()?; |
| 183 | + let token_text = token.text_trimmed(); |
| 184 | + let raw_inner = &token_text[1..token_text.len() - 1]; |
| 185 | + let inner_text = string_lit.inner_string_text().ok()?; |
| 186 | + // If raw source and interpreted text differ, escapes are present |
| 187 | + if raw_inner != inner_text.text() { |
| 188 | + return None; |
| 189 | + } |
| 190 | + let offsets = find_unnamed_capture_groups(raw_inner); |
| 191 | + if offsets.is_empty() { |
| 192 | + return Some(Default::default()); |
| 193 | + } |
| 194 | + let content_start = token.text_trimmed_range().start() + TextSize::from(1); |
| 195 | + Some( |
| 196 | + offsets |
| 197 | + .into_iter() |
| 198 | + .map(|offset| { |
| 199 | + let start = content_start + TextSize::from(offset); |
| 200 | + TextRange::new(start, start + TextSize::from(1)) |
| 201 | + }) |
| 202 | + .collect(), |
| 203 | + ) |
| 204 | +} |
| 205 | + |
| 206 | +fn run_regex_literal(node: &JsRegexLiteralExpression) -> Box<[TextRange]> { |
| 207 | + let Ok((pattern, _flags)) = node.decompose() else { |
| 208 | + return Default::default(); |
| 209 | + }; |
| 210 | + let pattern_text = pattern.text(); |
| 211 | + let offsets = find_unnamed_capture_groups(pattern_text); |
| 212 | + if offsets.is_empty() { |
| 213 | + return Default::default(); |
| 214 | + } |
| 215 | + let pattern_start = node.range().start() + TextSize::from(1); |
| 216 | + offsets |
| 217 | + .into_iter() |
| 218 | + .map(|offset| { |
| 219 | + let start = pattern_start + TextSize::from(offset); |
| 220 | + TextRange::new(start, start + TextSize::from(1)) |
| 221 | + }) |
| 222 | + .collect() |
| 223 | +} |
| 224 | + |
| 225 | +fn run_regexp_constructor(node: &AnyJsExpression, model: &SemanticModel) -> Box<[TextRange]> { |
| 226 | + let new_or_call = match node { |
| 227 | + AnyJsExpression::JsNewExpression(n) => JsNewOrCallExpression::from(n.clone()), |
| 228 | + AnyJsExpression::JsCallExpression(n) => JsNewOrCallExpression::from(n.clone()), |
| 229 | + _ => return Default::default(), |
| 230 | + }; |
| 231 | + let Some((callee, arguments)) = parse_regexp_node(&new_or_call) else { |
| 232 | + return Default::default(); |
| 233 | + }; |
| 234 | + if !is_regexp_object(&callee, model) { |
| 235 | + return Default::default(); |
| 236 | + } |
| 237 | + let Some(arg_expr) = get_first_arg_expr(&arguments) else { |
| 238 | + return Default::default(); |
| 239 | + }; |
| 240 | + // Try precise per-group diagnostics for simple string literals (no escapes) |
| 241 | + if let Some(ranges) = try_precise_string_ranges(&arg_expr) { |
| 242 | + return ranges; |
| 243 | + } |
| 244 | + // Fallback: use interpreted value, single diagnostic on the whole expression |
| 245 | + let Some(static_val) = arg_expr.omit_parentheses().as_static_value() else { |
| 246 | + return Default::default(); |
| 247 | + }; |
| 248 | + let Some(pattern) = static_val.as_string_constant() else { |
| 249 | + return Default::default(); |
| 250 | + }; |
| 251 | + if find_unnamed_capture_groups(pattern).is_empty() { |
| 252 | + return Default::default(); |
| 253 | + } |
| 254 | + Box::new([node.range()]) |
| 255 | +} |
| 256 | + |
| 257 | +#[cfg(test)] |
| 258 | +mod tests { |
| 259 | + use super::*; |
| 260 | + |
| 261 | + #[test] |
| 262 | + fn test_unnamed_capture_groups() { |
| 263 | + assert_eq!(find_unnamed_capture_groups("(foo)"), vec![0]); |
| 264 | + assert_eq!(find_unnamed_capture_groups("(?:foo)"), Vec::<u32>::new()); |
| 265 | + assert_eq!(find_unnamed_capture_groups("(?<name>foo)"), Vec::<u32>::new()); |
| 266 | + assert_eq!(find_unnamed_capture_groups("(?=foo)"), Vec::<u32>::new()); |
| 267 | + assert_eq!(find_unnamed_capture_groups("(?!foo)"), Vec::<u32>::new()); |
| 268 | + assert_eq!(find_unnamed_capture_groups("(?<=foo)"), Vec::<u32>::new()); |
| 269 | + assert_eq!(find_unnamed_capture_groups("(?<!foo)"), Vec::<u32>::new()); |
| 270 | + assert_eq!(find_unnamed_capture_groups("(foo)(bar)"), vec![0, 5]); |
| 271 | + assert_eq!(find_unnamed_capture_groups("\\(foo)"), Vec::<u32>::new()); |
| 272 | + assert_eq!(find_unnamed_capture_groups("\\\\(foo)"), vec![2]); |
| 273 | + assert_eq!(find_unnamed_capture_groups("[(]foo"), Vec::<u32>::new()); |
| 274 | + assert_eq!(find_unnamed_capture_groups("[\\]]foo"), Vec::<u32>::new()); |
| 275 | + } |
| 276 | +} |
0 commit comments