@@ -162,12 +162,12 @@ fn spark_substring(args: &[ArrayRef]) -> Result<ArrayRef> {
162162/// Returns the converted 1-based start position for use with `get_true_start_end`.
163163#[ inline]
164164fn spark_start_to_datafusion_start ( start : i64 , len : usize ) -> i64 {
165- let start = if start >= 0 {
165+ if start >= 0 {
166166 start. max ( 1 )
167167 } else {
168- start + len as i64 + 1
169- } ;
170- start . max ( 1 )
168+ let start = start + len as i64 + 1 ;
169+ start . max ( 1 )
170+ }
171171}
172172
173173fn spark_substring_utf8 < ' a , O , V > (
@@ -282,244 +282,3 @@ fn spark_substring_view(
282282
283283 Ok ( Arc :: new ( builder. finish ( ) ) as ArrayRef )
284284}
285-
286- #[ cfg( test) ]
287- mod tests {
288- use arrow:: array:: { Array , LargeStringArray , StringArray , StringViewArray } ;
289- use arrow:: datatypes:: DataType :: { self } ;
290-
291- use datafusion_common:: { Result , ScalarValue } ;
292- use datafusion_expr:: { ColumnarValue , ScalarUDFImpl } ;
293-
294- use crate :: function:: string:: substring:: SparkSubstring ;
295- use crate :: function:: utils:: test:: test_scalar_function;
296-
297- /// Test macro for substring(str, pos) - 2 argument version
298- macro_rules! test_substring_2 {
299- ( $INPUT: expr, $POS: expr, $EXPECTED: expr) => {
300- test_scalar_function!(
301- SparkSubstring :: new( ) ,
302- vec![
303- ColumnarValue :: Scalar ( ScalarValue :: Utf8 ( $INPUT) ) ,
304- ColumnarValue :: Scalar ( ScalarValue :: Int64 ( $POS) )
305- ] ,
306- $EXPECTED,
307- & str ,
308- DataType :: Utf8 ,
309- StringArray
310- ) ;
311-
312- test_scalar_function!(
313- SparkSubstring :: new( ) ,
314- vec![
315- ColumnarValue :: Scalar ( ScalarValue :: LargeUtf8 ( $INPUT) ) ,
316- ColumnarValue :: Scalar ( ScalarValue :: Int64 ( $POS) )
317- ] ,
318- $EXPECTED,
319- & str ,
320- DataType :: LargeUtf8 ,
321- LargeStringArray
322- ) ;
323-
324- test_scalar_function!(
325- SparkSubstring :: new( ) ,
326- vec![
327- ColumnarValue :: Scalar ( ScalarValue :: Utf8View ( $INPUT) ) ,
328- ColumnarValue :: Scalar ( ScalarValue :: Int64 ( $POS) )
329- ] ,
330- $EXPECTED,
331- & str ,
332- DataType :: Utf8View ,
333- StringViewArray
334- ) ;
335- } ;
336- }
337-
338- /// Test macro for substring(str, pos, len) - 3 argument version
339- macro_rules! test_substring_3 {
340- ( $INPUT: expr, $POS: expr, $LEN: expr, $EXPECTED: expr) => {
341- test_scalar_function!(
342- SparkSubstring :: new( ) ,
343- vec![
344- ColumnarValue :: Scalar ( ScalarValue :: Utf8 ( $INPUT) ) ,
345- ColumnarValue :: Scalar ( ScalarValue :: Int64 ( $POS) ) ,
346- ColumnarValue :: Scalar ( ScalarValue :: Int64 ( $LEN) )
347- ] ,
348- $EXPECTED,
349- & str ,
350- DataType :: Utf8 ,
351- StringArray
352- ) ;
353-
354- test_scalar_function!(
355- SparkSubstring :: new( ) ,
356- vec![
357- ColumnarValue :: Scalar ( ScalarValue :: LargeUtf8 ( $INPUT) ) ,
358- ColumnarValue :: Scalar ( ScalarValue :: Int64 ( $POS) ) ,
359- ColumnarValue :: Scalar ( ScalarValue :: Int64 ( $LEN) )
360- ] ,
361- $EXPECTED,
362- & str ,
363- DataType :: LargeUtf8 ,
364- LargeStringArray
365- ) ;
366-
367- test_scalar_function!(
368- SparkSubstring :: new( ) ,
369- vec![
370- ColumnarValue :: Scalar ( ScalarValue :: Utf8View ( $INPUT) ) ,
371- ColumnarValue :: Scalar ( ScalarValue :: Int64 ( $POS) ) ,
372- ColumnarValue :: Scalar ( ScalarValue :: Int64 ( $LEN) )
373- ] ,
374- $EXPECTED,
375- & str ,
376- DataType :: Utf8View ,
377- StringViewArray
378- ) ;
379- } ;
380- }
381-
382- #[ test]
383- fn test_length_is_respected ( ) -> Result < ( ) > {
384- // substring("hello, world!", 2, 7) -> "ello, w"
385- test_substring_3 ! (
386- Some ( String :: from( "hello, world!" ) ) ,
387- Some ( 2 ) ,
388- Some ( 7 ) ,
389- Ok ( Some ( "ello, w" ) )
390- ) ;
391- // substring("short", 2, 7) -> "hort" (length exceeds remaining)
392- test_substring_3 ! (
393- Some ( String :: from( "short" ) ) ,
394- Some ( 2 ) ,
395- Some ( 7 ) ,
396- Ok ( Some ( "hort" ) )
397- ) ;
398- Ok ( ( ) )
399- }
400-
401- #[ test]
402- fn test_negative_start_with_length ( ) -> Result < ( ) > {
403- // substring("hello, world!", -5, 3) -> "orl"
404- // -5 + 13 = 8, so start at index 8 (0-indexed), take 3 chars
405- test_substring_3 ! (
406- Some ( String :: from( "hello, world!" ) ) ,
407- Some ( -5 ) ,
408- Some ( 3 ) ,
409- Ok ( Some ( "orl" ) )
410- ) ;
411- // substring("short", -5, 3) -> "sho"
412- // -5 + 5 = 0, so start at index 0, take 3 chars
413- test_substring_3 ! (
414- Some ( String :: from( "short" ) ) ,
415- Some ( -5 ) ,
416- Some ( 3 ) ,
417- Ok ( Some ( "sho" ) )
418- ) ;
419- Ok ( ( ) )
420- }
421-
422- #[ test]
423- fn test_negative_start_without_length ( ) -> Result < ( ) > {
424- // substring("hello, world!", -2) -> "d!"
425- // -2 + 13 = 11, so start at index 11 (0-indexed)
426- test_substring_2 ! (
427- Some ( String :: from( "hello, world!" ) ) ,
428- Some ( -2 ) ,
429- Ok ( Some ( "d!" ) )
430- ) ;
431- // substring("short", -2) -> "rt"
432- // -2 + 5 = 3, so start at index 3
433- test_substring_2 ! ( Some ( String :: from( "short" ) ) , Some ( -2 ) , Ok ( Some ( "rt" ) ) ) ;
434- Ok ( ( ) )
435- }
436-
437- #[ test]
438- fn test_null_start_returns_null ( ) -> Result < ( ) > {
439- test_substring_2 ! ( Some ( String :: from( "hello, world!" ) ) , None , Ok ( None :: <& str >) ) ;
440- Ok ( ( ) )
441- }
442-
443- #[ test]
444- fn test_null_length_returns_null ( ) -> Result < ( ) > {
445- test_substring_3 ! (
446- Some ( String :: from( "hello, world!" ) ) ,
447- Some ( 1 ) ,
448- None ,
449- Ok ( None :: <& str >)
450- ) ;
451- Ok ( ( ) )
452- }
453-
454- #[ test]
455- fn test_null_expression_returns_null ( ) -> Result < ( ) > {
456- test_substring_3 ! ( None :: <String >, Some ( 2 ) , Some ( 7 ) , Ok ( None :: <& str >) ) ;
457- Ok ( ( ) )
458- }
459-
460- #[ test]
461- fn test_negative_length_returns_empty_string ( ) -> Result < ( ) > {
462- // Spark returns empty string for negative length
463- test_substring_3 ! (
464- Some ( String :: from( "hello, world!" ) ) ,
465- Some ( 2 ) ,
466- Some ( -2 ) ,
467- Ok ( Some ( "" ) )
468- ) ;
469- Ok ( ( ) )
470- }
471-
472- #[ test]
473- fn test_positive_positions ( ) -> Result < ( ) > {
474- // 1-indexed positions
475- test_substring_2 ! (
476- Some ( String :: from( "alphabet" ) ) ,
477- Some ( 1 ) ,
478- Ok ( Some ( "alphabet" ) )
479- ) ;
480- test_substring_2 ! ( Some ( String :: from( "alphabet" ) ) , Some ( 2 ) , Ok ( Some ( "lphabet" ) ) ) ;
481- test_substring_2 ! ( Some ( String :: from( "alphabet" ) ) , Some ( 3 ) , Ok ( Some ( "phabet" ) ) ) ;
482- test_substring_3 ! (
483- Some ( String :: from( "alphabet" ) ) ,
484- Some ( 3 ) ,
485- Some ( 2 ) ,
486- Ok ( Some ( "ph" ) )
487- ) ;
488- Ok ( ( ) )
489- }
490-
491- #[ test]
492- fn test_position_zero ( ) -> Result < ( ) > {
493- // Position 0 is treated as position 1 in Spark
494- test_substring_2 ! (
495- Some ( String :: from( "alphabet" ) ) ,
496- Some ( 0 ) ,
497- Ok ( Some ( "alphabet" ) )
498- ) ;
499- test_substring_3 ! (
500- Some ( String :: from( "alphabet" ) ) ,
501- Some ( 0 ) ,
502- Some ( 5 ) ,
503- Ok ( Some ( "alpha" ) )
504- ) ;
505- Ok ( ( ) )
506- }
507-
508- #[ test]
509- fn test_position_beyond_length ( ) -> Result < ( ) > {
510- test_substring_2 ! ( Some ( String :: from( "alphabet" ) ) , Some ( 30 ) , Ok ( Some ( "" ) ) ) ;
511- Ok ( ( ) )
512- }
513-
514- #[ test]
515- fn test_unicode_strings ( ) -> Result < ( ) > {
516- test_substring_2 ! ( Some ( String :: from( "joséésoj" ) ) , Some ( 5 ) , Ok ( Some ( "ésoj" ) ) ) ;
517- test_substring_3 ! (
518- Some ( String :: from( "joséésoj" ) ) ,
519- Some ( 5 ) ,
520- Some ( 2 ) ,
521- Ok ( Some ( "és" ) )
522- ) ;
523- Ok ( ( ) )
524- }
525- }
0 commit comments