Skip to content

Commit 454ce01

Browse files
committed
ICU-22987 Return DONE for BreakIterator.preceding on negative index
See #3507
1 parent b30c63d commit 454ce01

File tree

6 files changed

+68
-4
lines changed

6 files changed

+68
-4
lines changed

icu4c/source/test/intltest/rbbitst.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha
107107
TESTCASE_AUTO(TestGetDisplayName);
108108
#if !UCONFIG_NO_FILE_IO
109109
TESTCASE_AUTO(TestEndBehaviour);
110+
TESTCASE_AUTO(TestPreceding_NegativeIndex);
110111
TESTCASE_AUTO(TestWordBreaks);
111112
TESTCASE_AUTO(TestWordBoundary);
112113
TESTCASE_AUTO(TestLineBreaks);
@@ -740,6 +741,34 @@ void RBBITest::executeTest(TestParams *t, UErrorCode &status) {
740741
}
741742
}
742743

744+
void RBBITest::TestPreceding_NegativeIndex() {
745+
UErrorCode status = U_ZERO_ERROR;
746+
Locale loc = Locale::getRoot();
747+
LocalPointer<BreakIterator> bi(BreakIterator::createWordInstance(loc, status));
748+
UnicodeString source(u"The quick brown fox jumped over the lazy dog.");
749+
bi->setText(source);
750+
751+
assertEquals("length of source string", 45, source.length());
752+
753+
struct TestCase {
754+
const UnicodeString description;
755+
const int32_t startIdx;
756+
const int32_t expected;
757+
} cases[] = {
758+
{"negative index", -2, BreakIterator::DONE},
759+
{"zero", 0, BreakIterator::DONE},
760+
{"one", 1, 0},
761+
{"middle", 41, 40},
762+
{"end", 45, 44},
763+
{"after the end", source.length() + 2, source.length()}
764+
};
765+
766+
for (const auto& cas : cases) {
767+
int32_t actual = bi->preceding(cas.startIdx);
768+
assertEquals(cas.description, cas.expected, actual);
769+
}
770+
}
771+
743772
void RBBITest::TestExtended() {
744773
// The expectations in this test heavily depends on the Thai dictionary.
745774
// Therefore, we skip this test under the LSTM configuration.

icu4c/source/test/intltest/rbbitst.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ class RBBITest: public IntlTest {
5858

5959
void TestExtended();
6060
void executeTest(TestParams *, UErrorCode &status);
61+
void TestPreceding_NegativeIndex();
6162

6263
void TestWordBreaks();
6364
void TestWordBoundary();

icu4j/main/core/src/main/java/com/ibm/icu/text/BreakIterator.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,10 @@ public int preceding(int offset) {
347347
// NOTE: This implementation is here solely because we can't add new
348348
// abstract methods to an existing class. There is almost ALWAYS a
349349
// better, faster way to do this.
350+
351+
if (offset < 0) {
352+
return DONE;
353+
}
350354
int pos = following(offset);
351355
while (pos >= offset && pos != DONE)
352356
pos = previous();

icu4j/main/core/src/main/java/com/ibm/icu/text/RuleBasedBreakIterator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,7 @@ public int preceding(int offset) {
511511
if (fText == null || offset > fText.getEndIndex()) {
512512
return last();
513513
} else if (offset < fText.getBeginIndex()) {
514-
return first();
514+
return DONE;
515515
}
516516

517517
// Move requested offset to a code point start. It might be between a lead and trail surrogate.

icu4j/main/core/src/test/java/com/ibm/icu/dev/test/rbbi/BreakIteratorTest.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
*/
99
package com.ibm.icu.dev.test.rbbi;
1010

11+
import static org.hamcrest.CoreMatchers.is;
12+
import static org.junit.Assert.assertThat;
13+
1114
import java.text.StringCharacterIterator;
1215
import java.util.Locale;
1316

@@ -107,6 +110,33 @@ public void debugLogln(String s) {
107110
// tests
108111
//=========================================================================
109112

113+
@Test
114+
public void TestPreceding_NegativeIndex() {
115+
String source = "The quick brown fox jumped over the lazy dog.";
116+
BreakIterator iter = BreakIterator.getWordInstance();
117+
iter.setText(source);
118+
119+
assertEquals("length of source string", 45, source.length());
120+
121+
Object[][] casesData = {
122+
{"negative index", -2, BreakIterator.DONE},
123+
{"zero", 0, BreakIterator.DONE},
124+
{"one", 1, 0},
125+
{"middle", 41, 40},
126+
{"end", 45, 44},
127+
{"after the end", source.length() + 2, source.length()}
128+
};
129+
130+
for (Object[] caseDatum : casesData) {
131+
String desc = (String) caseDatum[0];
132+
int startIdx = (int) caseDatum[1];
133+
int expected = (int) caseDatum[2];
134+
135+
int actual = iter.preceding(startIdx);
136+
137+
assertThat(desc, actual, is(expected));
138+
}
139+
}
110140

111141
/*
112142
* @bug 4153072

icu4j/main/core/src/test/java/com/ibm/icu/dev/test/rbbi/RBBITest.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -324,14 +324,14 @@ public void TestPreceding() {
324324
rbbi.setText((CharacterIterator)null);
325325
if (rbbi.preceding(-1) != BreakIterator.DONE) {
326326
errln("RuleBasedBreakIterator.preceding(-1) was suppose to return "
327-
+ "0 when the object has a fText of null.");
327+
+ "DONE when the object has a fText of null.");
328328
}
329329

330330
// Tests when "else if (offset < fText.getBeginIndex())" is true
331331
rbbi.setText("dummy");
332-
if (rbbi.preceding(-1) != 0) {
332+
if (rbbi.preceding(-1) != BreakIterator.DONE) {
333333
errln("RuleBasedBreakIterator.preceding(-1) was suppose to return "
334-
+ "0 when the object has a fText of dummy.");
334+
+ "DONE when the object has a fText of dummy.");
335335
}
336336
}
337337

0 commit comments

Comments
 (0)