@@ -237,6 +237,47 @@ pub fn ilike_utf8<OffsetSize: StringOffsetSizeTrait>(
237
237
like_utf8_impl ( left, right, false , false )
238
238
}
239
239
240
+ fn like_to_regex ( pat : & str ) -> Result < String > {
241
+ let mut is_escaped = false ;
242
+ let mut re_pattern = String :: new ( ) ;
243
+ let regex_chars = "-[]{}()*+?.,^$|#" ;
244
+ for c in pat. chars ( ) {
245
+ if is_escaped {
246
+ is_escaped = false ;
247
+ if c == '%' {
248
+ re_pattern. push ( '%' ) ;
249
+ continue ;
250
+ } else if c == '_' {
251
+ re_pattern. push ( '_' ) ;
252
+ continue ;
253
+ } else if c == '\\' {
254
+ re_pattern. push_str ( "\\ \\ " ) ;
255
+ continue ;
256
+ }
257
+ }
258
+
259
+ if regex_chars. find ( c) . is_some ( ) {
260
+ re_pattern. push ( '\\' ) ;
261
+ re_pattern. push ( c) ;
262
+ } else if c == '%' {
263
+ re_pattern. push_str ( ".*" ) ;
264
+ } else if c == '_' {
265
+ re_pattern. push ( '.' ) ;
266
+ } else if c == '\\' {
267
+ is_escaped = true ;
268
+ } else {
269
+ re_pattern. push ( c) ;
270
+ }
271
+ }
272
+ if is_escaped {
273
+ return Err ( ArrowError :: InvalidArgumentError ( format ! (
274
+ "LIKE pattern must not end with escape character. Pattern {}" ,
275
+ pat
276
+ ) ) ) ;
277
+ }
278
+ Ok ( re_pattern)
279
+ }
280
+
240
281
fn like_utf8_impl < OffsetSize : StringOffsetSizeTrait > (
241
282
left : & GenericStringArray < OffsetSize > ,
242
283
right : & GenericStringArray < OffsetSize > ,
@@ -261,43 +302,7 @@ fn like_utf8_impl<OffsetSize: StringOffsetSizeTrait>(
261
302
let re = if let Some ( ref regex) = map. get ( pat) {
262
303
regex
263
304
} else {
264
- let mut is_escaped = false ;
265
- let mut re_pattern = String :: new ( ) ;
266
- let regex_chars = "-[]{}()*+?.,^$|#" ;
267
- for c in pat. chars ( ) {
268
- if is_escaped {
269
- is_escaped = false ;
270
- if c == '%' {
271
- re_pattern. push ( '%' ) ;
272
- continue ;
273
- } else if c == '_' {
274
- re_pattern. push ( '_' ) ;
275
- continue ;
276
- } else if c == '\\' {
277
- re_pattern. push_str ( "\\ \\ " ) ;
278
- continue ;
279
- }
280
- }
281
-
282
- if regex_chars. find ( c) . is_some ( ) {
283
- re_pattern. push ( '\\' ) ;
284
- re_pattern. push ( c) ;
285
- } else if c == '%' {
286
- re_pattern. push_str ( ".*" ) ;
287
- } else if c == '_' {
288
- re_pattern. push ( '.' ) ;
289
- } else if c == '\\' {
290
- is_escaped = true ;
291
- } else {
292
- re_pattern. push ( c) ;
293
- }
294
- }
295
- if is_escaped {
296
- return Err ( ArrowError :: InvalidArgumentError ( format ! (
297
- "LIKE pattern must not end with escape character. Pattern {}" ,
298
- pat
299
- ) ) ) ;
300
- }
305
+ let re_pattern = like_to_regex ( pat) ?;
301
306
let re = RegexBuilder :: new ( & format ! ( "^{}$" , re_pattern) )
302
307
. case_insensitive ( !case_sensitive)
303
308
. build ( )
@@ -406,29 +411,7 @@ fn like_utf8_scalar_impl<OffsetSize: StringOffsetSizeTrait>(
406
411
}
407
412
}
408
413
} else {
409
- let mut prev_char = None ;
410
- let mut re_pattern = right
411
- . replace (
412
- |c| {
413
- let res = c == '%' && prev_char != Some ( '\\' ) ;
414
- prev_char = Some ( c) ;
415
- res
416
- } ,
417
- ".*" ,
418
- )
419
- . replace ( "\\ %" , "%" ) ;
420
-
421
- let mut prev_char = None ;
422
- re_pattern = re_pattern
423
- . replace (
424
- |c| {
425
- let res = c == '_' && prev_char != Some ( '\\' ) ;
426
- prev_char = Some ( c) ;
427
- res
428
- } ,
429
- "." ,
430
- )
431
- . replace ( "\\ _" , "_" ) ;
414
+ let re_pattern = like_to_regex ( right) ?;
432
415
let re = RegexBuilder :: new ( & format ! ( "^{}$" , re_pattern) )
433
416
. case_insensitive ( !case_sensitive)
434
417
. build ( )
0 commit comments