@@ -6,6 +6,7 @@ use clippy_utils::source::SpanRangeExt;
66use clippy_utils:: { def_path_def_ids, path_def_id, paths} ;
77use rustc_ast:: ast:: { LitKind , StrStyle } ;
88use rustc_hir:: def_id:: DefIdMap ;
9+ use rustc_hir:: intravisit:: { self , Visitor } ;
910use rustc_hir:: { BorrowKind , Expr , ExprKind } ;
1011use rustc_lint:: { LateContext , LateLintPass } ;
1112use rustc_session:: impl_lint_pass;
@@ -55,6 +56,42 @@ declare_clippy_lint! {
5556 "trivial regular expressions"
5657}
5758
59+ declare_clippy_lint ! {
60+ /// ### What it does
61+ ///
62+ /// Checks for [regex](https://crates.io/crates/regex) compilation inside a loop with a literal.
63+ ///
64+ /// ### Why is this bad?
65+ ///
66+ /// Compiling a regex is a much more expensive operation than using one, and a compiled regex can be used multiple times.
67+ ///
68+ /// ### Example
69+ /// ```no_run
70+ /// # let haystacks = [""];
71+ /// for haystack in haystacks {
72+ /// let regex = regex::Regex::new(MY_REGEX);
73+ /// if regex.is_match(heystack) {
74+ /// // Perform operation
75+ /// }
76+ /// }
77+ /// ```
78+ /// should be replaced with
79+ /// ```no_run
80+ /// # let haystacks = [""];
81+ /// let regex = regex::Regex::new(MY_REGEX);
82+ /// for haystack in haystacks {
83+ /// if regex.is_match(heystack) {
84+ /// // Perform operation
85+ /// }
86+ /// }
87+ /// ```
88+ ///
89+ #[ clippy:: version = "1.83.0" ]
90+ pub REGEX_COMPILE_IN_LOOP ,
91+ perf,
92+ "regular expression compilation performed in a loop"
93+ }
94+
5895#[ derive( Copy , Clone ) ]
5996enum RegexKind {
6097 Unicode ,
@@ -68,7 +105,7 @@ pub struct Regex {
68105 definitions : DefIdMap < RegexKind > ,
69106}
70107
71- impl_lint_pass ! ( Regex => [ INVALID_REGEX , TRIVIAL_REGEX ] ) ;
108+ impl_lint_pass ! ( Regex => [ INVALID_REGEX , TRIVIAL_REGEX , REGEX_COMPILE_IN_LOOP ] ) ;
72109
73110impl < ' tcx > LateLintPass < ' tcx > for Regex {
74111 fn check_crate ( & mut self , cx : & LateContext < ' tcx > ) {
@@ -92,17 +129,69 @@ impl<'tcx> LateLintPass<'tcx> for Regex {
92129 }
93130
94131 fn check_expr ( & mut self , cx : & LateContext < ' tcx > , expr : & ' tcx Expr < ' _ > ) {
95- if let ExprKind :: Call ( fun, [ arg] ) = expr. kind
96- && let Some ( def_id) = path_def_id ( cx, fun)
97- && let Some ( regex_kind) = self . definitions . get ( & def_id)
98- {
132+ if let Some ( ( regex_kind, _, arg) ) = extract_regex_call ( & self . definitions , cx, expr) {
99133 match regex_kind {
100134 RegexKind :: Unicode => check_regex ( cx, arg, true ) ,
101135 RegexKind :: UnicodeSet => check_set ( cx, arg, true ) ,
102136 RegexKind :: Bytes => check_regex ( cx, arg, false ) ,
103137 RegexKind :: BytesSet => check_set ( cx, arg, false ) ,
104138 }
105139 }
140+
141+ if let ExprKind :: Loop ( block, _, _, span) = expr. kind {
142+ let mut visitor = RegexCompVisitor {
143+ cx,
144+ loop_span : span,
145+ definitions : & self . definitions ,
146+ } ;
147+
148+ visitor. visit_block ( block) ;
149+ }
150+ }
151+ }
152+
153+ struct RegexCompVisitor < ' pass , ' tcx > {
154+ definitions : & ' pass DefIdMap < RegexKind > ,
155+ cx : & ' pass LateContext < ' tcx > ,
156+ loop_span : Span ,
157+ }
158+
159+ impl < ' pass , ' tcx > Visitor < ' tcx > for RegexCompVisitor < ' pass , ' tcx > {
160+ type NestedFilter = intravisit:: nested_filter:: None ;
161+
162+ fn visit_expr ( & mut self , expr : & ' tcx Expr < ' tcx > ) {
163+ if let Some ( ( _, fun, arg) ) = extract_regex_call ( self . definitions , self . cx , expr)
164+ && ( matches ! ( arg. kind, ExprKind :: Lit ( _) ) || const_str ( self . cx , arg) . is_some ( ) )
165+ {
166+ span_lint_and_help (
167+ self . cx ,
168+ REGEX_COMPILE_IN_LOOP ,
169+ fun. span ,
170+ "compiling a regex in a loop" ,
171+ Some ( self . loop_span ) ,
172+ "move the regex construction outside this loop" ,
173+ ) ;
174+ }
175+
176+ // Avoid recursing into loops, as the LateLintPass::visit_expr will do this already.
177+ if !matches ! ( expr. kind, ExprKind :: Loop ( ..) ) {
178+ intravisit:: walk_expr ( self , expr) ;
179+ }
180+ }
181+ }
182+
183+ fn extract_regex_call < ' tcx > (
184+ definitions : & DefIdMap < RegexKind > ,
185+ cx : & LateContext < ' tcx > ,
186+ expr : & ' tcx Expr < ' tcx > ,
187+ ) -> Option < ( RegexKind , & ' tcx Expr < ' tcx > , & ' tcx Expr < ' tcx > ) > {
188+ if let ExprKind :: Call ( fun, [ arg] ) = expr. kind
189+ && let Some ( def_id) = path_def_id ( cx, fun)
190+ && let Some ( regex_kind) = definitions. get ( & def_id)
191+ {
192+ Some ( ( * regex_kind, fun, arg) )
193+ } else {
194+ None
106195 }
107196}
108197
0 commit comments