@@ -340,29 +340,34 @@ protected boolean isAcceptableBreak(CharSequence s, int fromIndex, int candidate
340340 return true ;
341341
342342 final String text = s .toString ();
343+ final boolean caseSensitive = abbDict .isCaseSensitive ();
344+ final String searchText = caseSensitive ? text : StringUtil .toLowerCase (text );
343345 for (StringList abb : abbDict ) {
344- final String abbToken = abb .getToken (0 );
345- final int tokenStartPos = text .indexOf (abbToken , fromIndex );
346- if (tokenStartPos == -1 ) {
347- continue ; // skip fast when abb is not present in text
348- }
349- if (tokenStartPos == 0 && text .substring (tokenStartPos , candidateIndex + 1 ).equals (abbToken )) {
350- return false ; // full abbreviation match at sentence start -> no acceptable break
351- } else {
352- final int tokenLength = abbToken .length ();
346+ final String abbToken = caseSensitive ? abb .getToken (0 )
347+ : StringUtil .toLowerCase (abb .getToken (0 ));
348+ final int tokenLength = abbToken .length ();
349+ int tokenStartPos = searchText .indexOf (abbToken , fromIndex );
350+ while (tokenStartPos != -1 ) {
351+ if (tokenStartPos > candidateIndex ) {
352+ break ; // past candidate position, no point searching further
353+ }
354+ if (tokenStartPos == 0
355+ && searchText .substring (tokenStartPos , candidateIndex + 1 ).equals (abbToken )) {
356+ return false ; // full abbreviation match at sentence start -> no acceptable break
357+ }
353358 final char prevChar = s .charAt (tokenStartPos == 0 ? tokenStartPos : tokenStartPos - 1 );
354- if (tokenStartPos + tokenLength < candidateIndex || tokenStartPos > candidateIndex ||
359+ if (tokenStartPos + tokenLength >= candidateIndex
355360 /*
356361 * Note:
357362 * Skip abbreviation candidate if regular characters exist directly before it,
358363 * That is, any letter or digit except: a whitespace, an apostrophe, or an opening round bracket.
359364 * This prevents mismatches from overlaps close to an actual sentence end.
360365 */
361- !(Character .isWhitespace (prevChar ) || isApostrophe (prevChar ) || prevChar == '(' )) {
362-
363- continue ;
366+ && (Character .isWhitespace (prevChar ) || isApostrophe (prevChar ) || prevChar == '(' )) {
367+ return false ; // in case of a valid abbreviation: the (sentence) break is not accepted
364368 }
365- return false ; // in case of a valid abbreviation: the (sentence) break is not accepted
369+ // Try next occurrence of this abbreviation in the text
370+ tokenStartPos = searchText .indexOf (abbToken , tokenStartPos + 1 );
366371 }
367372 }
368373 return true ; // no abbreviation(s) at given positions: valid sentence boundary
0 commit comments