Skip to content

Commit 9bbdf4d

Browse files
feat(lint): add nursery rule useNamedCaptureGroup (#9048)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
1 parent 9a23352 commit 9bbdf4d

File tree

13 files changed

+685
-0
lines changed

13 files changed

+685
-0
lines changed

.changeset/loose-cooks-report.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
---
2+
"@biomejs/biome": patch
3+
---
4+
5+
Added the nursery rule [`useNamedCaptureGroup`](https://biomejs.dev/linter/rules/use-named-capture-group/).
6+
The rule enforces using named capture groups in regular expressions instead of numbered ones. It supports both regex literals and `RegExp` constructor calls.
7+
8+
```js
9+
// Invalid: unnamed capture group
10+
/(foo)/;
11+
new RegExp("(foo)");
12+
13+
// Valid: named capture group
14+
/(?<id>foo)/;
15+
new RegExp("(?<id>foo)");
16+
```

crates/biome_cli/src/execute/migrate/eslint_any_rule_to_biome.rs

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/biome_configuration/src/analyzer/linter/rules.rs

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/biome_diagnostics_categories/src/categories.rs

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 276 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,276 @@
1+
use biome_analyze::{Rule, RuleDiagnostic, RuleSource, context::RuleContext, declare_lint_rule};
2+
use biome_console::markup;
3+
use biome_js_semantic::SemanticModel;
4+
use biome_js_syntax::{
5+
AnyJsCallArgument, AnyJsExpression, JsCallArguments, JsNewOrCallExpression,
6+
JsRegexLiteralExpression, global_identifier,
7+
};
8+
use biome_rowan::{AstNode, AstSeparatedList, TextRange, TextSize};
9+
use biome_rule_options::use_named_capture_group::UseNamedCaptureGroupOptions;
10+
11+
use crate::services::semantic::Semantic;
12+
13+
declare_lint_rule! {
14+
/// Enforce using named capture groups in regular expression.
15+
///
16+
/// Numbered capture groups like `(...)` can be difficult to work with,
17+
/// as they are matched by their position and not by a descriptive name.
18+
/// Named capture groups (`(?<name>...)`) associate a descriptive name
19+
/// with each match, making the regular expression more readable and
20+
/// its intent clearer.
21+
///
22+
/// ## Examples
23+
///
24+
/// ### Invalid
25+
///
26+
/// ```js,expect_diagnostic
27+
/// /(ba[rz])/;
28+
/// ```
29+
///
30+
/// ```js,expect_diagnostic
31+
/// /([0-9]{4})/;
32+
/// ```
33+
///
34+
/// ```js,expect_diagnostic
35+
/// /(?:ab)(cd)/;
36+
/// ```
37+
///
38+
/// ```js,expect_diagnostic
39+
/// new RegExp("(foo)");
40+
/// ```
41+
///
42+
/// ```js,expect_diagnostic
43+
/// RegExp("(foo)");
44+
/// ```
45+
///
46+
/// ### Valid
47+
///
48+
/// ```js
49+
/// /(?<id>ba[rz])/;
50+
/// /(?:ba[rz])/;
51+
/// /ba[rz]/;
52+
/// /(?<year>[0-9]{4})-(?<month>[0-9]{2})/;
53+
/// new RegExp("(?<id>foo)");
54+
/// new RegExp(pattern);
55+
/// ```
56+
///
57+
pub UseNamedCaptureGroup {
58+
version: "next",
59+
name: "useNamedCaptureGroup",
60+
language: "js",
61+
sources: &[RuleSource::Eslint("prefer-named-capture-group").same()],
62+
recommended: false,
63+
}
64+
}
65+
66+
impl Rule for UseNamedCaptureGroup {
67+
type Query = Semantic<AnyJsExpression>;
68+
type State = TextRange;
69+
type Signals = Box<[Self::State]>;
70+
type Options = UseNamedCaptureGroupOptions;
71+
72+
fn run(ctx: &RuleContext<Self>) -> Self::Signals {
73+
let node = ctx.query();
74+
match node {
75+
AnyJsExpression::AnyJsLiteralExpression(
76+
biome_js_syntax::AnyJsLiteralExpression::JsRegexLiteralExpression(regex),
77+
) => run_regex_literal(regex),
78+
AnyJsExpression::JsNewExpression(_) | AnyJsExpression::JsCallExpression(_) => {
79+
run_regexp_constructor(node, ctx.model())
80+
}
81+
_ => Default::default(),
82+
}
83+
}
84+
85+
fn diagnostic(_: &RuleContext<Self>, state: &Self::State) -> Option<RuleDiagnostic> {
86+
Some(
87+
RuleDiagnostic::new(
88+
rule_category!(),
89+
state,
90+
markup! {
91+
"Capture group is not named."
92+
},
93+
)
94+
.note(markup! {
95+
"Named capture groups improve readability by associating a descriptive name with each match. Use "<Emphasis>"(?<name>...)"</Emphasis>" instead of "<Emphasis>"(...)"</Emphasis>"."
96+
}),
97+
)
98+
}
99+
}
100+
101+
/// Find byte offsets of unnamed capture groups in a regex pattern.
102+
///
103+
/// Returns a list of byte offsets (relative to pattern start) for each
104+
/// unnamed capture group `(` found.
105+
fn find_unnamed_capture_groups(pattern: &str) -> Vec<u32> {
106+
let mut result = Vec::new();
107+
let mut bytes = pattern.as_bytes().iter().enumerate().peekable();
108+
while let Some((i, &byte)) = bytes.next() {
109+
match byte {
110+
b'\\' => {
111+
bytes.next();
112+
}
113+
b'[' => {
114+
while let Some((_, &b)) = bytes.next() {
115+
match b {
116+
b'\\' => {
117+
bytes.next();
118+
}
119+
b']' => break,
120+
_ => {}
121+
}
122+
}
123+
}
124+
b'(' => {
125+
if bytes.peek().is_some_and(|&(_, &b)| b == b'?') {
126+
// `(?:`, `(?=`, `(?!`, `(?<=`, `(?<!`, `(?<name>` — skip
127+
} else {
128+
result.push(i as u32);
129+
}
130+
}
131+
_ => {}
132+
}
133+
}
134+
result
135+
}
136+
137+
fn is_regexp_object(expr: &AnyJsExpression, model: &SemanticModel) -> bool {
138+
match global_identifier(&expr.clone().omit_parentheses()) {
139+
Some((reference, name)) => match model.binding(&reference) {
140+
Some(_) if !reference.is_global_this() && !reference.has_name("window") => false,
141+
_ => name.text() == "RegExp",
142+
},
143+
None => false,
144+
}
145+
}
146+
147+
fn parse_regexp_node(
148+
node: &JsNewOrCallExpression,
149+
) -> Option<(AnyJsExpression, JsCallArguments)> {
150+
match node {
151+
JsNewOrCallExpression::JsNewExpression(node) => {
152+
let callee = node.callee().ok()?;
153+
let args = node.arguments()?;
154+
Some((callee, args))
155+
}
156+
JsNewOrCallExpression::JsCallExpression(node) => {
157+
let callee = node.callee().ok()?;
158+
let args = node.arguments().ok()?;
159+
Some((callee, args))
160+
}
161+
}
162+
}
163+
164+
fn get_first_arg_expr(arguments: &JsCallArguments) -> Option<AnyJsExpression> {
165+
let first_arg = arguments.args().iter().next()?;
166+
let Ok(AnyJsCallArgument::AnyJsExpression(expr)) = first_arg else {
167+
return None;
168+
};
169+
Some(expr)
170+
}
171+
172+
/// Try to compute precise TextRange for each unnamed group in a string literal.
173+
/// Returns `Some` if the argument is a simple string literal without escape sequences
174+
/// (so byte offsets map 1:1 to source positions). Returns `None` otherwise.
175+
fn try_precise_string_ranges(arg_expr: &AnyJsExpression) -> Option<Box<[TextRange]>> {
176+
let AnyJsExpression::AnyJsLiteralExpression(
177+
biome_js_syntax::AnyJsLiteralExpression::JsStringLiteralExpression(string_lit),
178+
) = arg_expr
179+
else {
180+
return None;
181+
};
182+
let token = string_lit.value_token().ok()?;
183+
let token_text = token.text_trimmed();
184+
let raw_inner = &token_text[1..token_text.len() - 1];
185+
let inner_text = string_lit.inner_string_text().ok()?;
186+
// If raw source and interpreted text differ, escapes are present
187+
if raw_inner != inner_text.text() {
188+
return None;
189+
}
190+
let offsets = find_unnamed_capture_groups(raw_inner);
191+
if offsets.is_empty() {
192+
return Some(Default::default());
193+
}
194+
let content_start = token.text_trimmed_range().start() + TextSize::from(1);
195+
Some(
196+
offsets
197+
.into_iter()
198+
.map(|offset| {
199+
let start = content_start + TextSize::from(offset);
200+
TextRange::new(start, start + TextSize::from(1))
201+
})
202+
.collect(),
203+
)
204+
}
205+
206+
fn run_regex_literal(node: &JsRegexLiteralExpression) -> Box<[TextRange]> {
207+
let Ok((pattern, _flags)) = node.decompose() else {
208+
return Default::default();
209+
};
210+
let pattern_text = pattern.text();
211+
let offsets = find_unnamed_capture_groups(pattern_text);
212+
if offsets.is_empty() {
213+
return Default::default();
214+
}
215+
let pattern_start = node.range().start() + TextSize::from(1);
216+
offsets
217+
.into_iter()
218+
.map(|offset| {
219+
let start = pattern_start + TextSize::from(offset);
220+
TextRange::new(start, start + TextSize::from(1))
221+
})
222+
.collect()
223+
}
224+
225+
fn run_regexp_constructor(node: &AnyJsExpression, model: &SemanticModel) -> Box<[TextRange]> {
226+
let new_or_call = match node {
227+
AnyJsExpression::JsNewExpression(n) => JsNewOrCallExpression::from(n.clone()),
228+
AnyJsExpression::JsCallExpression(n) => JsNewOrCallExpression::from(n.clone()),
229+
_ => return Default::default(),
230+
};
231+
let Some((callee, arguments)) = parse_regexp_node(&new_or_call) else {
232+
return Default::default();
233+
};
234+
if !is_regexp_object(&callee, model) {
235+
return Default::default();
236+
}
237+
let Some(arg_expr) = get_first_arg_expr(&arguments) else {
238+
return Default::default();
239+
};
240+
// Try precise per-group diagnostics for simple string literals (no escapes)
241+
if let Some(ranges) = try_precise_string_ranges(&arg_expr) {
242+
return ranges;
243+
}
244+
// Fallback: use interpreted value, single diagnostic on the whole expression
245+
let Some(static_val) = arg_expr.omit_parentheses().as_static_value() else {
246+
return Default::default();
247+
};
248+
let Some(pattern) = static_val.as_string_constant() else {
249+
return Default::default();
250+
};
251+
if find_unnamed_capture_groups(pattern).is_empty() {
252+
return Default::default();
253+
}
254+
Box::new([node.range()])
255+
}
256+
257+
#[cfg(test)]
258+
mod tests {
259+
use super::*;
260+
261+
#[test]
262+
fn test_unnamed_capture_groups() {
263+
assert_eq!(find_unnamed_capture_groups("(foo)"), vec![0]);
264+
assert_eq!(find_unnamed_capture_groups("(?:foo)"), Vec::<u32>::new());
265+
assert_eq!(find_unnamed_capture_groups("(?<name>foo)"), Vec::<u32>::new());
266+
assert_eq!(find_unnamed_capture_groups("(?=foo)"), Vec::<u32>::new());
267+
assert_eq!(find_unnamed_capture_groups("(?!foo)"), Vec::<u32>::new());
268+
assert_eq!(find_unnamed_capture_groups("(?<=foo)"), Vec::<u32>::new());
269+
assert_eq!(find_unnamed_capture_groups("(?<!foo)"), Vec::<u32>::new());
270+
assert_eq!(find_unnamed_capture_groups("(foo)(bar)"), vec![0, 5]);
271+
assert_eq!(find_unnamed_capture_groups("\\(foo)"), Vec::<u32>::new());
272+
assert_eq!(find_unnamed_capture_groups("\\\\(foo)"), vec![2]);
273+
assert_eq!(find_unnamed_capture_groups("[(]foo"), Vec::<u32>::new());
274+
assert_eq!(find_unnamed_capture_groups("[\\]]foo"), Vec::<u32>::new());
275+
}
276+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
/(ba[rz])/;
2+
/([0-9]{4})/;
3+
/(foo)(bar)/;
4+
/(?:ab)(cd)/;
5+
/([a-z])\1/;
6+
new RegExp("(foo)");
7+
RegExp("(foo)");
8+
new RegExp("(foo)(bar)");
9+
new RegExp("\\d+(foo)");

0 commit comments

Comments
 (0)