@@ -22,6 +22,19 @@ use regex_syntax::hir::{Capture, Hir, HirKind, Literal, Look};
2222/// Maximum number of regex alternations (`foo|bar|...`) that will be expanded into multiple `LIKE` expressions.
2323const MAX_REGEX_ALTERNATIONS_EXPANSION : usize = 4 ;
2424
25+ /// Tries to convert a regexp expression to a `LIKE` or `Eq`/`NotEq` expression.
26+ ///
27+ /// This function also validates the regex pattern. And will return error if the
28+ /// pattern is invalid.
29+ ///
30+ /// Typical cases this function can simplify:
31+ /// - empty regex pattern to `LIKE '%'`
32+ /// - literal regex patterns to `LIKE '%foo%'`
33+ /// - full anchored regex patterns (e.g. `^foo$`) to `= 'foo'`
34+ /// - partial anchored regex patterns (e.g. `^foo`) to `LIKE 'foo%'`
35+ /// - combinations (alternatives) of the above, will be concatenated with `OR` or `AND`
36+ ///
37+ /// Dev note: unit tests of this function are in `expr_simplifier.rs`, case `test_simplify_regex`.
2538pub fn simplify_regex_expr (
2639 left : Box < Expr > ,
2740 op : Operator ,
@@ -53,13 +66,15 @@ pub fn simplify_regex_expr(
5366 }
5467 }
5568
56- // leave untouched if optimization didn't work
69+ // Leave untouched if optimization didn't work
5770 Ok ( Expr :: BinaryExpr ( BinaryExpr { left, op, right } ) )
5871}
5972
6073#[ derive( Debug ) ]
6174struct OperatorMode {
75+ /// Negative match.
6276 not : bool ,
77+ /// Ignore case (`true` for case-insensitive).
6378 i : bool ,
6479}
6580
@@ -80,6 +95,7 @@ impl OperatorMode {
8095 Self { not, i }
8196 }
8297
98+ /// Creates an [`LIKE`](Expr::Like) from the given `LIKE` pattern.
8399 fn expr ( & self , expr : Box < Expr > , pattern : String ) -> Expr {
84100 let like = Like {
85101 negated : self . not ,
@@ -92,6 +108,7 @@ impl OperatorMode {
92108 Expr :: Like ( like)
93109 }
94110
111+ /// Creates an [`Expr::BinaryExpr`] of "`left` = `right`" or "`left` != `right`".
95112 fn expr_matches_literal ( & self , left : Box < Expr > , right : Box < Expr > ) -> Expr {
96113 let op = if self . not {
97114 Operator :: NotEq
@@ -118,7 +135,7 @@ fn collect_concat_to_like_string(parts: &[Hir]) -> Option<String> {
118135 Some ( s)
119136}
120137
121- /// returns a str represented by `Literal` if it contains a valid utf8
138+ /// Returns a str represented by `Literal` if it contains a valid utf8
122139/// sequence and is safe for like (has no '%' and '_')
123140fn like_str_from_literal ( l : & Literal ) -> Option < & str > {
124141 // if not utf8, no good
@@ -131,7 +148,7 @@ fn like_str_from_literal(l: &Literal) -> Option<&str> {
131148 }
132149}
133150
134- /// returns a str represented by `Literal` if it contains a valid utf8
151+ /// Returns a str represented by `Literal` if it contains a valid utf8
135152fn str_from_literal ( l : & Literal ) -> Option < & str > {
136153 // if not utf8, no good
137154 let s = std:: str:: from_utf8 ( & l. 0 ) . ok ( ) ?;
@@ -143,7 +160,7 @@ fn is_safe_for_like(c: char) -> bool {
143160 ( c != '%' ) && ( c != '_' )
144161}
145162
146- /// returns true if the elements in a `Concat` pattern are:
163+ /// Returns true if the elements in a `Concat` pattern are:
147164/// - `[Look::Start, Look::End]`
148165/// - `[Look::Start, Literal(_), Look::End]`
149166fn is_anchored_literal ( v : & [ Hir ] ) -> bool {
@@ -157,10 +174,9 @@ fn is_anchored_literal(v: &[Hir]) -> bool {
157174 v. last ( ) . expect ( "length checked" ) ,
158175 ) ;
159176 if !matches ! ( first_last,
160- ( s, e) if s. kind( ) == & HirKind :: Look ( Look :: Start )
177+ ( s, e) if s. kind( ) == & HirKind :: Look ( Look :: Start )
161178 && e. kind( ) == & HirKind :: Look ( Look :: End )
162- )
163- {
179+ ) {
164180 return false ;
165181 }
166182
@@ -170,7 +186,7 @@ fn is_anchored_literal(v: &[Hir]) -> bool {
170186 . all ( |h| matches ! ( h. kind( ) , HirKind :: Literal ( _) ) )
171187}
172188
173- /// returns true if the elements in a `Concat` pattern are:
189+ /// Returns true if the elements in a `Concat` pattern are:
174190/// - `[Look::Start, Capture(Alternation(Literals...)), Look::End]`
175191fn is_anchored_capture ( v : & [ Hir ] ) -> bool {
176192 if v. len ( ) != 3
@@ -197,7 +213,33 @@ fn is_anchored_capture(v: &[Hir]) -> bool {
197213 true
198214}
199215
200- /// extracts a string literal expression assuming that [`is_anchored_literal`]
216+ /// Returns the `LIKE` pattern if the `Concat` pattern is partial anchored:
217+ /// - `[Look::Start, Literal(_)]`
218+ /// - `[Literal(_), Look::End]`
219+ /// Full anchored patterns are handled by [`anchored_literal_to_expr`].
220+ fn partial_anchored_literal_to_like ( v : & [ Hir ] ) -> Option < String > {
221+ if v. len ( ) != 2 {
222+ return None ;
223+ }
224+
225+ let ( lit, match_begin) = match ( & v[ 0 ] . kind ( ) , & v[ 1 ] . kind ( ) ) {
226+ ( HirKind :: Look ( Look :: Start ) , HirKind :: Literal ( l) ) => {
227+ ( like_str_from_literal ( l) ?, true )
228+ }
229+ ( HirKind :: Literal ( l) , HirKind :: Look ( Look :: End ) ) => {
230+ ( like_str_from_literal ( l) ?, false )
231+ }
232+ _ => return None ,
233+ } ;
234+
235+ if match_begin {
236+ Some ( format ! ( "{}%" , lit) )
237+ } else {
238+ Some ( format ! ( "%{}" , lit) )
239+ }
240+ }
241+
242+ /// Extracts a string literal expression assuming that [`is_anchored_literal`]
201243/// returned true.
202244fn anchored_literal_to_expr ( v : & [ Hir ] ) -> Option < Expr > {
203245 match v. len ( ) {
@@ -246,6 +288,7 @@ fn anchored_alternation_to_exprs(v: &[Hir]) -> Option<Vec<Expr>> {
246288 None
247289}
248290
291+ /// Tries to lower (transform) a simple regex pattern to a LIKE expression.
249292fn lower_simple ( mode : & OperatorMode , left : & Expr , hir : & Hir ) -> Option < Expr > {
250293 match hir. kind ( ) {
251294 HirKind :: Empty => {
@@ -265,7 +308,9 @@ fn lower_simple(mode: &OperatorMode, left: &Expr, hir: &Hir) -> Option<Expr> {
265308 . map ( |right| left. clone ( ) . in_list ( right, mode. not ) ) ;
266309 }
267310 HirKind :: Concat ( inner) => {
268- if let Some ( pattern) = collect_concat_to_like_string ( inner) {
311+ if let Some ( pattern) = partial_anchored_literal_to_like ( inner)
312+ . or ( collect_concat_to_like_string ( inner) )
313+ {
269314 return Some ( mode. expr ( Box :: new ( left. clone ( ) ) , pattern) ) ;
270315 }
271316 }
@@ -274,6 +319,9 @@ fn lower_simple(mode: &OperatorMode, left: &Expr, hir: &Hir) -> Option<Expr> {
274319 None
275320}
276321
322+ /// Calls [`lower_simple`] for each alternative and combine the results with `or` or `and`
323+ /// based on [`OperatorMode`]. Any fail attempt to lower an alternative will makes this
324+ /// function to return `None`.
277325fn lower_alt ( mode : & OperatorMode , left : & Expr , alts : & [ Hir ] ) -> Option < Expr > {
278326 let mut accu: Option < Expr > = None ;
279327
0 commit comments