@@ -6,6 +6,7 @@ use clippy_utils::source::SpanRangeExt;
66use  clippy_utils:: { def_path_def_ids,  path_def_id,  paths} ; 
77use  rustc_ast:: ast:: { LitKind ,  StrStyle } ; 
88use  rustc_hir:: def_id:: DefIdMap ; 
9+ use  rustc_hir:: intravisit:: { self ,  Visitor } ; 
910use  rustc_hir:: { BorrowKind ,  Expr ,  ExprKind } ; 
1011use  rustc_lint:: { LateContext ,  LateLintPass } ; 
1112use  rustc_session:: impl_lint_pass; 
@@ -55,6 +56,44 @@ declare_clippy_lint! {
5556    "trivial regular expressions" 
5657} 
5758
59+ declare_clippy_lint !  { 
60+     /// ### What it does 
61+      /// 
62+      /// Checks for [regex](https://crates.io/crates/regex) compilation inside a loop with a literal. 
63+      /// 
64+      /// ### Why is this bad? 
65+      /// 
66+      /// Compiling a regex is a much more expensive operation than using one, and a compiled regex can be used multiple times. 
67+      /// 
68+      /// ### Example 
69+      /// ```no_run 
70+      /// # let haystacks = [""]; 
71+      /// # const MY_REGEX: &str = "a.b"; 
72+      /// for haystack in haystacks { 
73+      ///     let regex = regex::Regex::new(MY_REGEX).unwrap(); 
74+      ///     if regex.is_match(haystack) { 
75+      ///         // Perform operation 
76+      ///     } 
77+      /// } 
78+      /// ``` 
79+      /// should be replaced with 
80+      /// ```no_run 
81+      /// # let haystacks = [""]; 
82+      /// # const MY_REGEX: &str = "a.b"; 
83+      /// let regex = regex::Regex::new(MY_REGEX).unwrap(); 
84+      /// for haystack in haystacks { 
85+      ///     if regex.is_match(haystack) { 
86+      ///         // Perform operation 
87+      ///     } 
88+      /// } 
89+      /// ``` 
90+      /// 
91+      #[ clippy:: version = "1.83.0" ] 
92+     pub  REGEX_COMPILE_IN_LOOP , 
93+     perf, 
94+     "regular expression compilation performed in a loop" 
95+ } 
96+ 
5897#[ derive( Copy ,  Clone ) ]  
5998enum  RegexKind  { 
6099    Unicode , 
@@ -68,7 +107,7 @@ pub struct Regex {
68107    definitions :  DefIdMap < RegexKind > , 
69108} 
70109
71- impl_lint_pass ! ( Regex  => [ INVALID_REGEX ,  TRIVIAL_REGEX ] ) ; 
110+ impl_lint_pass ! ( Regex  => [ INVALID_REGEX ,  TRIVIAL_REGEX ,   REGEX_COMPILE_IN_LOOP ] ) ; 
72111
73112impl < ' tcx >  LateLintPass < ' tcx >  for  Regex  { 
74113    fn  check_crate ( & mut  self ,  cx :  & LateContext < ' tcx > )  { 
@@ -92,17 +131,69 @@ impl<'tcx> LateLintPass<'tcx> for Regex {
92131    } 
93132
94133    fn  check_expr ( & mut  self ,  cx :  & LateContext < ' tcx > ,  expr :  & ' tcx  Expr < ' _ > )  { 
95-         if  let  ExprKind :: Call ( fun,  [ arg] )  = expr. kind 
96-             && let  Some ( def_id)  = path_def_id ( cx,  fun) 
97-             && let  Some ( regex_kind)  = self . definitions . get ( & def_id) 
98-         { 
134+         if  let  Some ( ( regex_kind,  _,  arg) )  = extract_regex_call ( & self . definitions ,  cx,  expr)  { 
99135            match  regex_kind { 
100136                RegexKind :: Unicode  => check_regex ( cx,  arg,  true ) , 
101137                RegexKind :: UnicodeSet  => check_set ( cx,  arg,  true ) , 
102138                RegexKind :: Bytes  => check_regex ( cx,  arg,  false ) , 
103139                RegexKind :: BytesSet  => check_set ( cx,  arg,  false ) , 
104140            } 
105141        } 
142+ 
143+         if  let  ExprKind :: Loop ( block,  _,  _,  span)  = expr. kind  { 
144+             let  mut  visitor = RegexCompVisitor  { 
145+                 cx, 
146+                 loop_span :  span, 
147+                 definitions :  & self . definitions , 
148+             } ; 
149+ 
150+             visitor. visit_block ( block) ; 
151+         } 
152+     } 
153+ } 
154+ 
155+ struct  RegexCompVisitor < ' pass ,  ' tcx >  { 
156+     definitions :  & ' pass  DefIdMap < RegexKind > , 
157+     cx :  & ' pass  LateContext < ' tcx > , 
158+     loop_span :  Span , 
159+ } 
160+ 
161+ impl < ' pass ,  ' tcx >  Visitor < ' tcx >  for  RegexCompVisitor < ' pass ,  ' tcx >  { 
162+     type  NestedFilter  = intravisit:: nested_filter:: None ; 
163+ 
164+     fn  visit_expr ( & mut  self ,  expr :  & ' tcx  Expr < ' tcx > )  { 
165+         if  let  Some ( ( _,  fun,  arg) )  = extract_regex_call ( self . definitions ,  self . cx ,  expr) 
166+             && ( matches ! ( arg. kind,  ExprKind :: Lit ( _) )  || const_str ( self . cx ,  arg) . is_some ( ) ) 
167+         { 
168+             span_lint_and_help ( 
169+                 self . cx , 
170+                 REGEX_COMPILE_IN_LOOP , 
171+                 fun. span , 
172+                 "compiling a regex in a loop" , 
173+                 Some ( self . loop_span ) , 
174+                 "move the regex construction outside this loop" , 
175+             ) ; 
176+         } 
177+ 
178+         // Avoid recursing into loops, as the LateLintPass::visit_expr will do this already. 
179+         if  !matches ! ( expr. kind,  ExprKind :: Loop ( ..) )  { 
180+             intravisit:: walk_expr ( self ,  expr) ; 
181+         } 
182+     } 
183+ } 
184+ 
185+ fn  extract_regex_call < ' tcx > ( 
186+     definitions :  & DefIdMap < RegexKind > , 
187+     cx :  & LateContext < ' tcx > , 
188+     expr :  & ' tcx  Expr < ' tcx > , 
189+ )  -> Option < ( RegexKind ,  & ' tcx  Expr < ' tcx > ,  & ' tcx  Expr < ' tcx > ) >  { 
190+     if  let  ExprKind :: Call ( fun,  [ arg] )  = expr. kind 
191+         && let  Some ( def_id)  = path_def_id ( cx,  fun) 
192+         && let  Some ( regex_kind)  = definitions. get ( & def_id) 
193+     { 
194+         Some ( ( * regex_kind,  fun,  arg) ) 
195+     }  else  { 
196+         None 
106197    } 
107198} 
108199
0 commit comments