Skip to content

Commit 59929c3

Browse files
committed
[CODEC-331]
org.apache.commons.codec.language.bm.Rule.parsePhonemeExpr(String) adds duplicate empty phoneme when input ends with |
1 parent 25225b0 commit 59929c3

File tree

3 files changed

+17
-4
lines changed

3 files changed

+17
-4
lines changed

src/changes/changes.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ The <action> type attribute can be add,update,fix,remove.
5656
<action type="fix" dev="ggregory" due-to="Gary Gregory">Remove redundant checks for whitespace in DaitchMokotoffSoundex.soundex(String, boolean).</action>
5757
<action type="fix" dev="ggregory" due-to="Sebastian Baunsgaard">Javadoc typo in Base16.java #380.</action>
5858
<action type="fix" dev="ggregory" due-to="Gary Gregory">Deprecate unused constant org.apache.commons.codec.language.bm.Rule.ALL.</action>
59+
<action type="fix" dev="ggregory" issue="CODEC-331" due-to="IlikeCode, Gary Gregory">org.apache.commons.codec.language.bm.Rule.parsePhonemeExpr(String) adds duplicate empty phoneme when input ends with |.</action>
5960
<!-- ADD -->
6061
<action type="add" dev="ggregory" due-to="Gary Gregory">Add HmacUtils.hmac(Path).</action>
6162
<action type="add" dev="ggregory" due-to="Gary Gregory">Add HmacUtils.hmacHex(Path).</action>

src/main/java/org/apache/commons/codec/language/bm/Rule.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@
8080
*/
8181
public class Rule {
8282

83+
private static final String PIPE = "|";
84+
8385
/**
8486
* A phoneme.
8587
*/
@@ -426,18 +428,19 @@ private static Phoneme parsePhoneme(final String ph) {
426428
return new Phoneme(ph, Languages.ANY_LANGUAGE);
427429
}
428430

429-
private static PhonemeExpr parsePhonemeExpr(final String ph) {
431+
static PhonemeExpr parsePhonemeExpr(final String ph) {
430432
if (ph.startsWith("(")) {
431433
// we have a bracketed list of options
432434
if (!ph.endsWith(")")) {
433-
throw new IllegalArgumentException("Phoneme starts with '(' so must end with ')'");
435+
throw new IllegalArgumentException("Phoneme starting with '(' must end with ')'");
434436
}
435437
final List<Phoneme> phs = new ArrayList<>();
436438
final String body = ph.substring(1, ph.length() - 1);
437-
for (final String part : AROUND_PIPE.split(body)) {
439+
final String[] split = AROUND_PIPE.split(body);
440+
for (final String part : split) {
438441
phs.add(parsePhoneme(part));
439442
}
440-
if (body.startsWith("|") || body.endsWith("|")) {
443+
if (split.length > 1 && split[0].length() != 0 && body.startsWith(PIPE) || split[split.length - 1].length() != 0 && body.endsWith(PIPE)) {
441444
phs.add(new Phoneme("", Languages.ANY_LANGUAGE));
442445
}
443446
return new PhonemeList(phs);

src/test/java/org/apache/commons/codec/language/bm/RuleTest.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,15 @@ void testPhonemeComparedToSelfIsZero() {
6363
}
6464
}
6565

66+
@Test
67+
void testParsePhonemeExprLang311() {
68+
assertEquals(1, Rule.parsePhonemeExpr("()").size());
69+
assertEquals(1, Rule.parsePhonemeExpr("(())").size());
70+
assertEquals(2, Rule.parsePhonemeExpr("(()|)").size());
71+
assertEquals(2, Rule.parsePhonemeExpr("(|())").size());
72+
assertEquals(3, Rule.parsePhonemeExpr("(|()|)").size());
73+
}
74+
6675
@Test
6776
void testSubSequenceWorks() {
6877
// AppendableCharSequence is private to Rule. We can only make it through a Phoneme.

0 commit comments

Comments
 (0)