@@ -30,7 +30,6 @@ use datafusion_expr::{
3030} ;
3131use itertools:: izip;
3232use regex:: Regex ;
33- use std:: collections:: hash_map:: Entry ;
3433use std:: collections:: HashMap ;
3534use std:: sync:: { Arc , OnceLock } ;
3635
@@ -312,12 +311,12 @@ where
312311
313312 let pattern = compile_regex ( regex, flags_scalar) ?;
314313
315- Ok ( Arc :: new ( Int64Array :: from_iter_values (
314+ Ok ( Arc :: new (
316315 values
317316 . iter ( )
318317 . map ( |value| count_matches ( value, & pattern, start_scalar) )
319- . collect :: < Result < Vec < i64 > , ArrowError > > ( ) ?,
320- ) ) )
318+ . collect :: < Result < Int64Array , ArrowError > > ( ) ?,
319+ ) )
321320 }
322321 ( true , true , false ) => {
323322 let regex = match regex_scalar {
@@ -336,17 +335,17 @@ where
336335 ) ) ) ;
337336 }
338337
339- Ok ( Arc :: new ( Int64Array :: from_iter_values (
338+ Ok ( Arc :: new (
340339 values
341340 . iter ( )
342341 . zip ( flags_array. iter ( ) )
343342 . map ( |( value, flags) | {
344343 let pattern =
345344 compile_and_cache_regex ( regex, flags, & mut regex_cache) ?;
346- count_matches ( value, & pattern, start_scalar)
345+ count_matches ( value, pattern, start_scalar)
347346 } )
348- . collect :: < Result < Vec < i64 > , ArrowError > > ( ) ?,
349- ) ) )
347+ . collect :: < Result < Int64Array , ArrowError > > ( ) ?,
348+ ) )
350349 }
351350 ( true , false , true ) => {
352351 let regex = match regex_scalar {
@@ -360,13 +359,13 @@ where
360359
361360 let start_array = start_array. unwrap ( ) ;
362361
363- Ok ( Arc :: new ( Int64Array :: from_iter_values (
362+ Ok ( Arc :: new (
364363 values
365364 . iter ( )
366365 . zip ( start_array. iter ( ) )
367366 . map ( |( value, start) | count_matches ( value, & pattern, start) )
368- . collect :: < Result < Vec < i64 > , ArrowError > > ( ) ?,
369- ) ) )
367+ . collect :: < Result < Int64Array , ArrowError > > ( ) ?,
368+ ) )
370369 }
371370 ( true , false , false ) => {
372371 let regex = match regex_scalar {
@@ -385,7 +384,7 @@ where
385384 ) ) ) ;
386385 }
387386
388- Ok ( Arc :: new ( Int64Array :: from_iter_values (
387+ Ok ( Arc :: new (
389388 izip ! (
390389 values. iter( ) ,
391390 start_array. unwrap( ) . iter( ) ,
@@ -395,10 +394,10 @@ where
395394 let pattern =
396395 compile_and_cache_regex ( regex, flags, & mut regex_cache) ?;
397396
398- count_matches ( value, & pattern, start)
397+ count_matches ( value, pattern, start)
399398 } )
400- . collect :: < Result < Vec < i64 > , ArrowError > > ( ) ?,
401- ) ) )
399+ . collect :: < Result < Int64Array , ArrowError > > ( ) ?,
400+ ) )
402401 }
403402 ( false , true , true ) => {
404403 if values. len ( ) != regex_array. len ( ) {
@@ -409,7 +408,7 @@ where
409408 ) ) ) ;
410409 }
411410
412- Ok ( Arc :: new ( Int64Array :: from_iter_values (
411+ Ok ( Arc :: new (
413412 values
414413 . iter ( )
415414 . zip ( regex_array. iter ( ) )
@@ -424,10 +423,10 @@ where
424423 flags_scalar,
425424 & mut regex_cache,
426425 ) ?;
427- count_matches ( value, & pattern, start_scalar)
426+ count_matches ( value, pattern, start_scalar)
428427 } )
429- . collect :: < Result < Vec < i64 > , ArrowError > > ( ) ?,
430- ) ) )
428+ . collect :: < Result < Int64Array , ArrowError > > ( ) ?,
429+ ) )
431430 }
432431 ( false , true , false ) => {
433432 if values. len ( ) != regex_array. len ( ) {
@@ -447,7 +446,7 @@ where
447446 ) ) ) ;
448447 }
449448
450- Ok ( Arc :: new ( Int64Array :: from_iter_values (
449+ Ok ( Arc :: new (
451450 izip ! ( values. iter( ) , regex_array. iter( ) , flags_array. iter( ) )
452451 . map ( |( value, regex, flags) | {
453452 let regex = match regex {
@@ -458,10 +457,10 @@ where
458457 let pattern =
459458 compile_and_cache_regex ( regex, flags, & mut regex_cache) ?;
460459
461- count_matches ( value, & pattern, start_scalar)
460+ count_matches ( value, pattern, start_scalar)
462461 } )
463- . collect :: < Result < Vec < i64 > , ArrowError > > ( ) ?,
464- ) ) )
462+ . collect :: < Result < Int64Array , ArrowError > > ( ) ?,
463+ ) )
465464 }
466465 ( false , false , true ) => {
467466 if values. len ( ) != regex_array. len ( ) {
@@ -481,7 +480,7 @@ where
481480 ) ) ) ;
482481 }
483482
484- Ok ( Arc :: new ( Int64Array :: from_iter_values (
483+ Ok ( Arc :: new (
485484 izip ! ( values. iter( ) , regex_array. iter( ) , start_array. iter( ) )
486485 . map ( |( value, regex, start) | {
487486 let regex = match regex {
@@ -494,10 +493,10 @@ where
494493 flags_scalar,
495494 & mut regex_cache,
496495 ) ?;
497- count_matches ( value, & pattern, start)
496+ count_matches ( value, pattern, start)
498497 } )
499- . collect :: < Result < Vec < i64 > , ArrowError > > ( ) ?,
500- ) ) )
498+ . collect :: < Result < Int64Array , ArrowError > > ( ) ?,
499+ ) )
501500 }
502501 ( false , false , false ) => {
503502 if values. len ( ) != regex_array. len ( ) {
@@ -526,7 +525,7 @@ where
526525 ) ) ) ;
527526 }
528527
529- Ok ( Arc :: new ( Int64Array :: from_iter_values (
528+ Ok ( Arc :: new (
530529 izip ! (
531530 values. iter( ) ,
532531 regex_array. iter( ) ,
@@ -541,27 +540,24 @@ where
541540
542541 let pattern =
543542 compile_and_cache_regex ( regex, flags, & mut regex_cache) ?;
544- count_matches ( value, & pattern, start)
543+ count_matches ( value, pattern, start)
545544 } )
546- . collect :: < Result < Vec < i64 > , ArrowError > > ( ) ?,
547- ) ) )
545+ . collect :: < Result < Int64Array , ArrowError > > ( ) ?,
546+ ) )
548547 }
549548 }
550549}
551550
552- fn compile_and_cache_regex (
553- regex : & str ,
554- flags : Option < & str > ,
555- regex_cache : & mut HashMap < String , Regex > ,
556- ) -> Result < Regex , ArrowError > {
557- match regex_cache. entry ( regex. to_string ( ) ) {
558- Entry :: Vacant ( entry) => {
559- let compiled = compile_regex ( regex, flags) ?;
560- entry. insert ( compiled. clone ( ) ) ;
561- Ok ( compiled)
562- }
563- Entry :: Occupied ( entry) => Ok ( entry. get ( ) . to_owned ( ) ) ,
551+ fn compile_and_cache_regex < ' a > (
552+ regex : & ' a str ,
553+ flags : Option < & ' a str > ,
554+ regex_cache : & ' a mut HashMap < String , Regex > ,
555+ ) -> Result < & ' a Regex , ArrowError > {
556+ if !regex_cache. contains_key ( regex) {
557+ let compiled = compile_regex ( regex, flags) ?;
558+ regex_cache. insert ( regex. to_string ( ) , compiled) ;
564559 }
560+ Ok ( regex_cache. get ( regex) . unwrap ( ) )
565561}
566562
567563fn compile_regex ( regex : & str , flags : Option < & str > ) -> Result < Regex , ArrowError > {
0 commit comments