22
22
//! `RUSTFLAGS="-C target-feature=+avx2"` for example. See the documentation
23
23
//! [here](https://doc.rust-lang.org/stable/core/arch/) for more information.
24
24
25
- use regex:: Regex ;
25
+ use regex:: RegexBuilder ;
26
26
use std:: collections:: HashMap ;
27
27
28
28
use crate :: array:: * ;
@@ -225,6 +225,23 @@ where
225
225
pub fn like_utf8 < OffsetSize : StringOffsetSizeTrait > (
226
226
left : & GenericStringArray < OffsetSize > ,
227
227
right : & GenericStringArray < OffsetSize > ,
228
+ ) -> Result < BooleanArray > {
229
+ like_utf8_impl ( left, right, true , false )
230
+ }
231
+
232
+ /// Case-insensitive version of [like_utf8]
233
+ pub fn ilike_utf8 < OffsetSize : StringOffsetSizeTrait > (
234
+ left : & GenericStringArray < OffsetSize > ,
235
+ right : & GenericStringArray < OffsetSize > ,
236
+ ) -> Result < BooleanArray > {
237
+ like_utf8_impl ( left, right, false , false )
238
+ }
239
+
240
+ fn like_utf8_impl < OffsetSize : StringOffsetSizeTrait > (
241
+ left : & GenericStringArray < OffsetSize > ,
242
+ right : & GenericStringArray < OffsetSize > ,
243
+ case_sensitive : bool ,
244
+ reverse_results : bool ,
228
245
) -> Result < BooleanArray > {
229
246
let mut map = HashMap :: new ( ) ;
230
247
if left. len ( ) != right. len ( ) {
@@ -245,17 +262,24 @@ pub fn like_utf8<OffsetSize: StringOffsetSizeTrait>(
245
262
regex
246
263
} else {
247
264
let re_pattern = pat. replace ( "%" , ".*" ) . replace ( "_" , "." ) ;
248
- let re = Regex :: new ( & format ! ( "^{}$" , re_pattern) ) . map_err ( |e| {
249
- ArrowError :: ComputeError ( format ! (
250
- "Unable to build regex from LIKE pattern: {}" ,
251
- e
252
- ) )
253
- } ) ?;
265
+ let re = RegexBuilder :: new ( & format ! ( "^{}$" , re_pattern) )
266
+ . case_insensitive ( !case_sensitive)
267
+ . build ( )
268
+ . map_err ( |e| {
269
+ ArrowError :: ComputeError ( format ! (
270
+ "Unable to build regex from LIKE pattern: {}" ,
271
+ e
272
+ ) )
273
+ } ) ?;
254
274
map. insert ( pat, re) ;
255
275
map. get ( pat) . unwrap ( )
256
276
} ;
257
277
258
- result. append ( re. is_match ( haystack) ) ;
278
+ let mut r = re. is_match ( haystack) ;
279
+ if reverse_results {
280
+ r = !r;
281
+ }
282
+ result. append ( r) ;
259
283
}
260
284
261
285
let data = ArrayData :: new (
@@ -281,48 +305,89 @@ fn is_like_pattern(c: char) -> bool {
281
305
pub fn like_utf8_scalar < OffsetSize : StringOffsetSizeTrait > (
282
306
left : & GenericStringArray < OffsetSize > ,
283
307
right : & str ,
308
+ ) -> Result < BooleanArray > {
309
+ like_utf8_scalar_impl ( left, right, true , false )
310
+ }
311
+
312
+ /// Case-insensitive version of [like_utf8_scalar]
313
+ pub fn ilike_utf8_scalar < OffsetSize : StringOffsetSizeTrait > (
314
+ left : & GenericStringArray < OffsetSize > ,
315
+ right : & str ,
316
+ ) -> Result < BooleanArray > {
317
+ like_utf8_scalar_impl ( left, right, false , false )
318
+ }
319
+
320
+ fn like_utf8_scalar_impl < OffsetSize : StringOffsetSizeTrait > (
321
+ left : & GenericStringArray < OffsetSize > ,
322
+ right : & str ,
323
+ case_sensitive : bool ,
324
+ reverse_results : bool ,
284
325
) -> Result < BooleanArray > {
285
326
let null_bit_buffer = left. data ( ) . null_buffer ( ) . cloned ( ) ;
286
327
let bytes = bit_util:: ceil ( left. len ( ) , 8 ) ;
287
328
let mut bool_buf = MutableBuffer :: from_len_zeroed ( bytes) ;
288
329
let bool_slice = bool_buf. as_slice_mut ( ) ;
289
330
290
- if !right. contains ( is_like_pattern) {
331
+ if case_sensitive && !right. contains ( is_like_pattern) {
291
332
// fast path, can use equals
292
333
for i in 0 ..left. len ( ) {
293
- if left. value ( i) == right {
334
+ let mut r = left. value ( i) == right;
335
+ if reverse_results {
336
+ r = !r;
337
+ }
338
+ if r {
294
339
bit_util:: set_bit ( bool_slice, i) ;
295
340
}
296
341
}
297
- } else if right. ends_with ( '%' ) && !right[ ..right. len ( ) - 1 ] . contains ( is_like_pattern)
342
+ } else if case_sensitive
343
+ && right. ends_with ( '%' )
344
+ && !right[ ..right. len ( ) - 1 ] . contains ( is_like_pattern)
298
345
{
299
346
// fast path, can use starts_with
300
347
let starts_with = & right[ ..right. len ( ) - 1 ] ;
301
348
for i in 0 ..left. len ( ) {
302
- if left. value ( i) . starts_with ( starts_with) {
349
+ let mut r = left. value ( i) . starts_with ( starts_with) ;
350
+ if reverse_results {
351
+ r = !r;
352
+ }
353
+ if r {
303
354
bit_util:: set_bit ( bool_slice, i) ;
304
355
}
305
356
}
306
- } else if right. starts_with ( '%' ) && !right[ 1 ..] . contains ( is_like_pattern) {
357
+ } else if case_sensitive
358
+ && right. starts_with ( '%' )
359
+ && !right[ 1 ..] . contains ( is_like_pattern)
360
+ {
307
361
// fast path, can use ends_with
308
362
let ends_with = & right[ 1 ..] ;
309
363
for i in 0 ..left. len ( ) {
310
- if left. value ( i) . ends_with ( ends_with) {
364
+ let mut r = left. value ( i) . ends_with ( ends_with) ;
365
+ if reverse_results {
366
+ r = !r;
367
+ }
368
+ if r {
311
369
bit_util:: set_bit ( bool_slice, i) ;
312
370
}
313
371
}
314
372
} else {
315
373
let re_pattern = right. replace ( "%" , ".*" ) . replace ( "_" , "." ) ;
316
- let re = Regex :: new ( & format ! ( "^{}$" , re_pattern) ) . map_err ( |e| {
317
- ArrowError :: ComputeError ( format ! (
318
- "Unable to build regex from LIKE pattern: {}" ,
319
- e
320
- ) )
321
- } ) ?;
374
+ let re = RegexBuilder :: new ( & format ! ( "^{}$" , re_pattern) )
375
+ . case_insensitive ( !case_sensitive)
376
+ . build ( )
377
+ . map_err ( |e| {
378
+ ArrowError :: ComputeError ( format ! (
379
+ "Unable to build regex from LIKE pattern: {}" ,
380
+ e
381
+ ) )
382
+ } ) ?;
322
383
323
384
for i in 0 ..left. len ( ) {
324
385
let haystack = left. value ( i) ;
325
- if re. is_match ( haystack) {
386
+ let mut r = re. is_match ( haystack) ;
387
+ if reverse_results {
388
+ r = !r;
389
+ }
390
+ if r {
326
391
bit_util:: set_bit ( bool_slice, i) ;
327
392
}
328
393
}
@@ -348,48 +413,15 @@ pub fn nlike_utf8<OffsetSize: StringOffsetSizeTrait>(
348
413
left : & GenericStringArray < OffsetSize > ,
349
414
right : & GenericStringArray < OffsetSize > ,
350
415
) -> Result < BooleanArray > {
351
- let mut map = HashMap :: new ( ) ;
352
- if left. len ( ) != right. len ( ) {
353
- return Err ( ArrowError :: ComputeError (
354
- "Cannot perform comparison operation on arrays of different length"
355
- . to_string ( ) ,
356
- ) ) ;
357
- }
358
-
359
- let null_bit_buffer =
360
- combine_option_bitmap ( left. data_ref ( ) , right. data_ref ( ) , left. len ( ) ) ?;
361
-
362
- let mut result = BooleanBufferBuilder :: new ( left. len ( ) ) ;
363
- for i in 0 ..left. len ( ) {
364
- let haystack = left. value ( i) ;
365
- let pat = right. value ( i) ;
366
- let re = if let Some ( ref regex) = map. get ( pat) {
367
- regex
368
- } else {
369
- let re_pattern = pat. replace ( "%" , ".*" ) . replace ( "_" , "." ) ;
370
- let re = Regex :: new ( & format ! ( "^{}$" , re_pattern) ) . map_err ( |e| {
371
- ArrowError :: ComputeError ( format ! (
372
- "Unable to build regex from LIKE pattern: {}" ,
373
- e
374
- ) )
375
- } ) ?;
376
- map. insert ( pat, re) ;
377
- map. get ( pat) . unwrap ( )
378
- } ;
379
-
380
- result. append ( !re. is_match ( haystack) ) ;
381
- }
416
+ like_utf8_impl ( left, right, true , true )
417
+ }
382
418
383
- let data = ArrayData :: new (
384
- DataType :: Boolean ,
385
- left. len ( ) ,
386
- None ,
387
- null_bit_buffer,
388
- 0 ,
389
- vec ! [ result. finish( ) ] ,
390
- vec ! [ ] ,
391
- ) ;
392
- Ok ( BooleanArray :: from ( data) )
419
+ /// Case-insensitive `NOT ILIKE` operator.
420
+ pub fn nilike_utf8 < OffsetSize : StringOffsetSizeTrait > (
421
+ left : & GenericStringArray < OffsetSize > ,
422
+ right : & GenericStringArray < OffsetSize > ,
423
+ ) -> Result < BooleanArray > {
424
+ like_utf8_impl ( left, right, false , true )
393
425
}
394
426
395
427
/// Perform SQL `left NOT LIKE right` operation on [`StringArray`] /
@@ -400,49 +432,15 @@ pub fn nlike_utf8_scalar<OffsetSize: StringOffsetSizeTrait>(
400
432
left : & GenericStringArray < OffsetSize > ,
401
433
right : & str ,
402
434
) -> Result < BooleanArray > {
403
- let null_bit_buffer = left. data ( ) . null_buffer ( ) . cloned ( ) ;
404
- let mut result = BooleanBufferBuilder :: new ( left. len ( ) ) ;
405
-
406
- if !right. contains ( is_like_pattern) {
407
- // fast path, can use equals
408
- for i in 0 ..left. len ( ) {
409
- result. append ( left. value ( i) != right) ;
410
- }
411
- } else if right. ends_with ( '%' ) && !right[ ..right. len ( ) - 1 ] . contains ( is_like_pattern)
412
- {
413
- // fast path, can use ends_with
414
- for i in 0 ..left. len ( ) {
415
- result. append ( !left. value ( i) . starts_with ( & right[ ..right. len ( ) - 1 ] ) ) ;
416
- }
417
- } else if right. starts_with ( '%' ) && !right[ 1 ..] . contains ( is_like_pattern) {
418
- // fast path, can use starts_with
419
- for i in 0 ..left. len ( ) {
420
- result. append ( !left. value ( i) . ends_with ( & right[ 1 ..] ) ) ;
421
- }
422
- } else {
423
- let re_pattern = right. replace ( "%" , ".*" ) . replace ( "_" , "." ) ;
424
- let re = Regex :: new ( & format ! ( "^{}$" , re_pattern) ) . map_err ( |e| {
425
- ArrowError :: ComputeError ( format ! (
426
- "Unable to build regex from LIKE pattern: {}" ,
427
- e
428
- ) )
429
- } ) ?;
430
- for i in 0 ..left. len ( ) {
431
- let haystack = left. value ( i) ;
432
- result. append ( !re. is_match ( haystack) ) ;
433
- }
434
- }
435
+ like_utf8_scalar_impl ( left, right, true , true )
436
+ }
435
437
436
- let data = ArrayData :: new (
437
- DataType :: Boolean ,
438
- left. len ( ) ,
439
- None ,
440
- null_bit_buffer,
441
- 0 ,
442
- vec ! [ result. finish( ) ] ,
443
- vec ! [ ] ,
444
- ) ;
445
- Ok ( BooleanArray :: from ( data) )
438
+ /// Case-insensitive `NOT ILIKE` operator.
439
+ pub fn nilike_utf8_scalar < OffsetSize : StringOffsetSizeTrait > (
440
+ left : & GenericStringArray < OffsetSize > ,
441
+ right : & str ,
442
+ ) -> Result < BooleanArray > {
443
+ like_utf8_scalar_impl ( left, right, false , true )
446
444
}
447
445
448
446
pub fn eq_bool ( left : & BooleanArray , right : & BooleanArray ) -> Result < BooleanArray > {
0 commit comments