@@ -551,14 +551,7 @@ public static String[] suffixesWithSuffix(String input, int maxlength, @Nullable
551551 */
552552 @ ScalarFunction
553553 public static String splitPart (String input , String delimiter , int index ) {
554- String [] splitString = StringUtils .splitByWholeSeparator (input , delimiter );
555- if (index >= 0 && index < splitString .length ) {
556- return splitString [index ];
557- } else if (index < 0 && index >= -splitString .length ) {
558- return splitString [splitString .length + index ];
559- } else {
560- return "null" ;
561- }
554+ return splitPart (input , delimiter , 0 , index );
562555 }
563556
564557 /**
@@ -570,14 +563,83 @@ public static String splitPart(String input, String delimiter, int index) {
570563 */
571564 @ ScalarFunction
572565 public static String splitPart (String input , String delimiter , int limit , int index ) {
573- String [] splitString = StringUtils .splitByWholeSeparator (input , delimiter , limit );
574- if (index >= 0 && index < splitString .length ) {
575- return splitString [index ];
576- } else if (index < 0 && index >= -splitString .length ) {
577- return splitString [splitString .length + index ];
578- } else {
566+ if (delimiter == null || delimiter .isEmpty ()) {
579567 return "null" ;
580568 }
569+
570+ // Normalize limit: non-positive means no limit
571+ int maxParts = (limit <= 0 ) ? Integer .MAX_VALUE : limit ;
572+
573+ int targetIndex = index ;
574+
575+ // Handle Negative Index: We must count total tokens first
576+ if (index < 0 ) {
577+ // Pass -1 as targetIndex to run in "Count Mode"
578+ int totalTokens = scanAndGet (input , delimiter , maxParts , -1 , null );
579+
580+ targetIndex = totalTokens + index ;
581+ if (targetIndex < 0 ) {
582+ return "null" ;
583+ }
584+ }
585+
586+ // Handle Positive Index: Retrieve the token
587+ // We use a 1-element array as a mutable container to avoid allocating a wrapper object
588+ String [] result = new String [1 ];
589+ scanAndGet (input , delimiter , maxParts , targetIndex , result );
590+
591+ return result [0 ] != null ? result [0 ] : "null" ;
592+ }
593+
594+ /**
595+ * Unified logic to scan tokens.
596+ * If container is null -> Returns token count (Count Mode).
597+ * If container is set -> Extracts the token at targetIndex (Fetch Mode).
598+ */
599+ private static int scanAndGet (String input , String delimiter , int maxParts , int targetIndex , String [] container ) {
600+ int count = 0 ;
601+ int start = 0 ;
602+ int len = input .length ();
603+ int dLen = delimiter .length ();
604+
605+ while (start < len ) {
606+ int nextDelim = input .indexOf (delimiter , start );
607+
608+ // Check if this is the last token (End of string OR Hit limit)
609+ if (nextDelim == -1 || (count + 1 == maxParts )) {
610+ if (targetIndex == count && container != null ) {
611+ container [0 ] = input .substring (start );
612+ }
613+ return count + 1 ; // Return total count (current + 1)
614+ }
615+
616+ // Skip empty tokens (consecutive delimiters)
617+ if (nextDelim == start ) {
618+ start += dLen ;
619+ continue ;
620+ }
621+
622+ // Found a standard token
623+ if (targetIndex == count ) {
624+ if (container != null ) {
625+ container [0 ] = input .substring (start , nextDelim );
626+ }
627+ return count ; // Found target, return doesn't matter much here but strictly it's 'count'
628+ }
629+
630+ count ++;
631+ start = nextDelim + dLen ;
632+ }
633+
634+ // Edge Case: Input purely delimiters (e.g. "+++++") or empty
635+ if (count == 0 && len > 0 ) {
636+ if (targetIndex == 0 && container != null ) {
637+ container [0 ] = "" ;
638+ }
639+ return 1 ;
640+ }
641+
642+ return count ;
581643 }
582644
583645 /**
@@ -672,7 +734,7 @@ public static int levenshteinDistance(String input1, String input2) {
672734 int cost = (input1 .charAt (i - 1 ) == input2 .charAt (j - 1 )) ? 0 : 1 ;
673735 dp [i ][j ] = Math .min (
674736 Math .min (dp [i - 1 ][j ] + 1 , // deletion
675- dp [i ][j - 1 ] + 1 ), // insertion
737+ dp [i ][j - 1 ] + 1 ), // insertion
676738 dp [i - 1 ][j - 1 ] + cost // substitution
677739 );
678740 }
0 commit comments