1717package org .apache .commons .lang3 ;
1818
1919import java .io .UnsupportedEncodingException ;
20+ import java .nio .CharBuffer ;
2021import java .nio .charset .Charset ;
2122import java .text .Normalizer ;
2223import java .util .ArrayList ;
@@ -1038,15 +1039,7 @@ public static boolean containsAny(final CharSequence cs, final char... searchCha
10381039 final char ch = cs .charAt (i );
10391040 for (int j = 0 ; j < searchLength ; j ++) {
10401041 if (searchChars [j ] == ch ) {
1041- if (!Character .isHighSurrogate (ch )) {
1042- // ch is in the Basic Multilingual Plane
1043- return true ;
1044- }
1045- if (j == searchLast ) {
1046- // missing low surrogate, fine, like String.indexOf(String)
1047- return true ;
1048- }
1049- if (i < csLast && searchChars [j + 1 ] == cs .charAt (i + 1 )) {
1042+ if (!Character .isHighSurrogate (ch ) || (j == searchLast ) || (i < csLast && searchChars [j + 1 ] == cs .charAt (i + 1 ))) {
10501043 return true ;
10511044 }
10521045 }
@@ -1218,15 +1211,7 @@ public static boolean containsNone(final CharSequence cs, final char... searchCh
12181211 final char ch = cs .charAt (i );
12191212 for (int j = 0 ; j < searchLen ; j ++) {
12201213 if (searchChars [j ] == ch ) {
1221- if (!Character .isHighSurrogate (ch )) {
1222- // ch is in the Basic Multilingual Plane
1223- return false ;
1224- }
1225- if (j == searchLast ) {
1226- // missing low surrogate, fine, like String.indexOf(String)
1227- return false ;
1228- }
1229- if (i < csLast && searchChars [j + 1 ] == cs .charAt (i + 1 )) {
1214+ if (!Character .isHighSurrogate (ch ) || (j == searchLast ) || (i < csLast && searchChars [j + 1 ] == cs .charAt (i + 1 ))) {
12301215 return false ;
12311216 }
12321217 }
@@ -2715,11 +2700,8 @@ public static int indexOfAny(final CharSequence cs, final char... searchChars) {
27152700 final char ch = cs .charAt (i );
27162701 for (int j = 0 ; j < searchLen ; j ++) {
27172702 if (searchChars [j ] == ch ) {
2718- if (i >= csLast || j >= searchLast || !Character .isHighSurrogate (ch )) {
2719- return i ;
2720- }
27212703 // ch is a supplementary character
2722- if (searchChars [j + 1 ] == cs .charAt (i + 1 )) {
2704+ if (i >= csLast || j >= searchLast || ! Character . isHighSurrogate ( ch ) || ( searchChars [j + 1 ] == cs .charAt (i + 1 ) )) {
27232705 return i ;
27242706 }
27252707 }
@@ -2813,7 +2795,8 @@ public static int indexOfAny(final CharSequence cs, final String searchChars) {
28132795
28142796 /**
28152797 * Searches a CharSequence to find the first index of any
2816- * character not in the given set of characters.
2798+ * character not in the given set of characters, i.e.,
2799+ * find index i of first char in cs such that (cs.codePointAt(i) ∉ { x ∈ codepoints(searchChars) })
28172800 *
28182801 * <p>A {@code null} CharSequence will return {@code -1}.
28192802 * A {@code null} or zero length search array will return {@code -1}.</p>
@@ -2839,31 +2822,13 @@ public static int indexOfAnyBut(final CharSequence cs, final char... searchChars
28392822 if (isEmpty (cs ) || ArrayUtils .isEmpty (searchChars )) {
28402823 return INDEX_NOT_FOUND ;
28412824 }
2842- final int csLen = cs .length ();
2843- final int csLast = csLen - 1 ;
2844- final int searchLen = searchChars .length ;
2845- final int searchLast = searchLen - 1 ;
2846- outer :
2847- for (int i = 0 ; i < csLen ; i ++) {
2848- final char ch = cs .charAt (i );
2849- for (int j = 0 ; j < searchLen ; j ++) {
2850- if (searchChars [j ] == ch ) {
2851- if (i >= csLast || j >= searchLast || !Character .isHighSurrogate (ch )) {
2852- continue outer ;
2853- }
2854- if (searchChars [j + 1 ] == cs .charAt (i + 1 )) {
2855- continue outer ;
2856- }
2857- }
2858- }
2859- return i ;
2860- }
2861- return INDEX_NOT_FOUND ;
2825+ return indexOfAnyBut (cs , CharBuffer .wrap (searchChars ));
28622826 }
28632827
28642828 /**
28652829 * Search a CharSequence to find the first index of any
2866- * character not in the given set of characters.
2830+ * character not in the given set of characters, i.e.,
2831+ * find index i of first char in seq such that (seq.codePointAt(i) ∉ { x ∈ codepoints(searchChars) })
28672832 *
28682833 * <p>A {@code null} CharSequence will return {@code -1}.
28692834 * A {@code null} or empty search string will return {@code -1}.</p>
@@ -2888,18 +2853,14 @@ public static int indexOfAnyBut(final CharSequence seq, final CharSequence searc
28882853 if (isEmpty (seq ) || isEmpty (searchChars )) {
28892854 return INDEX_NOT_FOUND ;
28902855 }
2891- final int strLen = seq .length ();
2892- for (int i = 0 ; i < strLen ; i ++) {
2893- final char ch = seq .charAt (i );
2894- final boolean chFound = CharSequenceUtils .indexOf (searchChars , ch , 0 ) >= 0 ;
2895- if (i + 1 < strLen && Character .isHighSurrogate (ch )) {
2896- final char ch2 = seq .charAt (i + 1 );
2897- if (chFound && CharSequenceUtils .indexOf (searchChars , ch2 , 0 ) < 0 ) {
2898- return i ;
2899- }
2900- } else if (!chFound ) {
2901- return i ;
2856+ final int [] codePoints = searchChars .codePoints ().sorted ().toArray ();
2857+ // advance character index from one interpreted codepoint to the next
2858+ for (int curSeqCharIdx = 0 ; curSeqCharIdx < seq .length ();) {
2859+ final int curSeqCodePoint = Character .codePointAt (seq , curSeqCharIdx );
2860+ if (Arrays .binarySearch (codePoints , curSeqCodePoint ) < 0 ) {
2861+ return curSeqCharIdx ;
29022862 }
2863+ curSeqCharIdx += Character .charCount (curSeqCodePoint ); // skip indices to paired low-surrogates
29032864 }
29042865 return INDEX_NOT_FOUND ;
29052866 }
0 commit comments