Skip to content

Commit fd27c48

Browse files
stevenschlanskerecheran
authored andcommitted
ICU-23061 Transliterator improve scalability by avoiding monitor contention
Current implementation of lower, upper, title, etc synchronizes the whole operation, so it is always single-threaded Removing the shared state lets us utilize multiple CPU cores
1 parent 3aa85e9 commit fd27c48

14 files changed

+253
-32
lines changed

icu4j/main/translit/src/main/java/com/ibm/icu/text/BreakTransliterator.java

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@
2222
final class BreakTransliterator extends Transliterator {
2323
private BreakIterator bi;
2424
private String insertion;
25-
private int[] boundaries = new int[50];
26-
private int boundaryCount = 0;
2725

2826
public BreakTransliterator(String ID, UnicodeFilter filter, BreakIterator bi, String insertion) {
2927
super(ID, filter);
@@ -52,8 +50,9 @@ public void setInsertion(String insertion) {
5250
public BreakIterator getBreakIterator() {
5351
// Defer initialization of BreakIterator because it is slow,
5452
// typically over 2000 ms.
55-
if (bi == null) bi = BreakIterator.getWordInstance(new ULocale("th_TH"));
56-
return bi;
53+
// Using a holder class for safe init without a volatile-read.
54+
if (bi == null) bi = WordBreakIteratorHolder.BI;
55+
return (BreakIterator) bi.clone();
5756
}
5857

5958
///CLOVER:OFF
@@ -74,10 +73,11 @@ public void setBreakIterator(BreakIterator bi) {
7473
| (1<<Character.ENCLOSING_MARK)
7574
;
7675
@Override
77-
protected synchronized void handleTransliterate(Replaceable text, Position pos, boolean incremental) {
78-
boundaryCount = 0;
76+
protected void handleTransliterate(Replaceable text, Position pos, boolean incremental) {
77+
int[] boundaries = new int[50];
78+
int boundaryCount = 0;
7979
int boundary = 0;
80-
getBreakIterator(); // Lazy-create it if necessary
80+
BreakIterator bi = getBreakIterator(); // Lazy-create it if necessary
8181
bi.setText(new ReplaceableCharacterIterator(text, pos.start, pos.limit, pos.start));
8282
// TODO: fix clumsy workaround used below.
8383
/*
@@ -416,4 +416,7 @@ public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, Uni
416416
}
417417
}
418418

419+
private static class WordBreakIteratorHolder {
420+
static final BreakIterator BI = BreakIterator.getWordInstance(new ULocale("th_TH"));
421+
}
419422
}

icu4j/main/translit/src/main/java/com/ibm/icu/text/CaseFoldTransliterator.java

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,6 @@ public Transliterator getInstance(String ID) {
3939
}
4040

4141
private final UCaseProps csp;
42-
private ReplaceableContextIterator iter;
43-
private StringBuilder result;
4442

4543
/**
4644
* Constructs a transliterator.
@@ -49,15 +47,13 @@ public Transliterator getInstance(String ID) {
4947
public CaseFoldTransliterator() {
5048
super(_ID, null);
5149
csp=UCaseProps.INSTANCE;
52-
iter=new ReplaceableContextIterator();
53-
result = new StringBuilder();
5450
}
5551

5652
/**
5753
* Implements {@link Transliterator#handleTransliterate}.
5854
*/
5955
@Override
60-
protected synchronized void handleTransliterate(Replaceable text,
56+
protected void handleTransliterate(Replaceable text,
6157
Position offsets, boolean isIncremental) {
6258
if(csp==null) {
6359
return;
@@ -67,8 +63,10 @@ protected synchronized void handleTransliterate(Replaceable text,
6763
return;
6864
}
6965

66+
ReplaceableContextIterator iter = new ReplaceableContextIterator();
67+
StringBuilder result = new StringBuilder();
68+
7069
iter.setText(text);
71-
result.setLength(0);
7270
int c, delta;
7371

7472
// Walk through original string

icu4j/main/translit/src/main/java/com/ibm/icu/text/LowercaseTransliterator.java

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,6 @@ public Transliterator getInstance(String ID) {
4242
private final ULocale locale;
4343

4444
private final UCaseProps csp;
45-
private ReplaceableContextIterator iter;
46-
private StringBuilder result;
4745
private int caseLocale;
4846

4947
/**
@@ -54,16 +52,14 @@ public LowercaseTransliterator(ULocale loc) {
5452
super(_ID, null);
5553
locale = loc;
5654
csp=UCaseProps.INSTANCE;
57-
iter=new ReplaceableContextIterator();
58-
result = new StringBuilder();
5955
caseLocale = UCaseProps.getCaseLocale(locale);
6056
}
6157

6258
/**
6359
* Implements {@link Transliterator#handleTransliterate}.
6460
*/
6561
@Override
66-
protected synchronized void handleTransliterate(Replaceable text,
62+
protected void handleTransliterate(Replaceable text,
6763
Position offsets, boolean isIncremental) {
6864
if(csp==null) {
6965
return;
@@ -73,8 +69,10 @@ protected synchronized void handleTransliterate(Replaceable text,
7369
return;
7470
}
7571

72+
ReplaceableContextIterator iter = new ReplaceableContextIterator();
73+
StringBuilder result = new StringBuilder();
74+
7675
iter.setText(text);
77-
result.setLength(0);
7876
int c, delta;
7977

8078
// Walk through original string

icu4j/main/translit/src/main/java/com/ibm/icu/text/TitlecaseTransliterator.java

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,6 @@ public Transliterator getInstance(String ID) {
4040
private final ULocale locale;
4141

4242
private final UCaseProps csp;
43-
private ReplaceableContextIterator iter;
44-
private StringBuilder result;
4543
private int caseLocale;
4644

4745
/**
@@ -53,16 +51,14 @@ public TitlecaseTransliterator(ULocale loc) {
5351
// Need to look back 2 characters in the case of "can't"
5452
setMaximumContextLength(2);
5553
csp=UCaseProps.INSTANCE;
56-
iter=new ReplaceableContextIterator();
57-
result = new StringBuilder();
5854
caseLocale = UCaseProps.getCaseLocale(locale);
5955
}
6056

6157
/**
6258
* Implements {@link Transliterator#handleTransliterate}.
6359
*/
6460
@Override
65-
protected synchronized void handleTransliterate(Replaceable text,
61+
protected void handleTransliterate(Replaceable text,
6662
Position offsets, boolean isIncremental) {
6763
// TODO reimplement, see ustrcase.c
6864
// using a real word break iterator
@@ -75,6 +71,9 @@ protected synchronized void handleTransliterate(Replaceable text,
7571
return;
7672
}
7773

74+
ReplaceableContextIterator iter = new ReplaceableContextIterator();
75+
StringBuilder result = new StringBuilder();
76+
7877
// case type: >0 cased (UCaseProps.LOWER etc.) ==0 uncased <0 case-ignorable
7978
int type;
8079

@@ -108,8 +107,6 @@ protected synchronized void handleTransliterate(Replaceable text,
108107
iter.setLimit(offsets.limit);
109108
iter.setContextLimits(offsets.contextStart, offsets.contextLimit);
110109

111-
result.setLength(0);
112-
113110
// Walk through original string
114111
// If there is a case change, modify corresponding position in replaceable
115112
int delta;

icu4j/main/translit/src/main/java/com/ibm/icu/text/UppercaseTransliterator.java

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,6 @@ public Transliterator getInstance(String ID) {
3939
private final ULocale locale;
4040

4141
private final UCaseProps csp;
42-
private ReplaceableContextIterator iter;
43-
private StringBuilder result;
4442
private int caseLocale;
4543

4644
/**
@@ -50,16 +48,14 @@ public UppercaseTransliterator(ULocale loc) {
5048
super(_ID, null);
5149
locale = loc;
5250
csp=UCaseProps.INSTANCE;
53-
iter=new ReplaceableContextIterator();
54-
result = new StringBuilder();
5551
caseLocale = UCaseProps.getCaseLocale(locale);
5652
}
5753

5854
/**
5955
* Implements {@link Transliterator#handleTransliterate}.
6056
*/
6157
@Override
62-
protected synchronized void handleTransliterate(Replaceable text,
58+
protected void handleTransliterate(Replaceable text,
6359
Position offsets, boolean isIncremental) {
6460
if(csp==null) {
6561
return;
@@ -69,8 +65,10 @@ protected synchronized void handleTransliterate(Replaceable text,
6965
return;
7066
}
7167

68+
ReplaceableContextIterator iter = new ReplaceableContextIterator();
69+
StringBuilder result = new StringBuilder();
70+
7271
iter.setText(text);
73-
result.setLength(0);
7472
int c, delta;
7573

7674
// Walk through original string

icu4j/perf-tests/README.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ COLLATION TESTS
4343
The collation tests run only on the command line with tabular output:
4444
perl collationperf.pl |& tee collation_output.txt
4545

46+
JMH
47+
Some performance tests run using OpenJDK JMH. Example invocation:
48+
mvn clean package exec:java -pl perf-tests -Pjmh_benchmark
4649

4750
OTHER COMMAND LINE TESTS
4851
Additional tests are run from the command line, each producing an HTML

icu4j/perf-tests/pom.xml

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,73 @@
4343
<artifactId>commons-cli</artifactId>
4444
<version>${commons-cli.version}</version>
4545
</dependency>
46+
<dependency>
47+
<groupId>org.openjdk.jmh</groupId>
48+
<artifactId>jmh-core</artifactId>
49+
<version>${jmh.version}</version>
50+
</dependency>
51+
<dependency>
52+
<groupId>org.openjdk.jmh</groupId>
53+
<artifactId>jmh-generator-annprocess</artifactId>
54+
<version>${jmh.version}</version>
55+
<scope>provided</scope>
56+
</dependency>
4657
</dependencies>
4758

59+
<profiles>
60+
<profile>
61+
<id>jmh_benchmark</id>
62+
<build>
63+
<plugins>
64+
<plugin>
65+
<groupId>org.apache.maven.plugins</groupId>
66+
<artifactId>maven-compiler-plugin</artifactId>
67+
<configuration>
68+
<annotationProcessors>
69+
<annotationProcessor>org.openjdk.jmh.generators.BenchmarkProcessor</annotationProcessor>
70+
</annotationProcessors>
71+
</configuration>
72+
</plugin>
73+
<plugin>
74+
<groupId>org.apache.maven.plugins</groupId>
75+
<artifactId>maven-dependency-plugin</artifactId>
76+
<executions>
77+
<execution>
78+
<id>build-jmh-classpath</id>
79+
<goals>
80+
<goal>build-classpath</goal>
81+
</goals>
82+
<configuration>
83+
<includeScope>runtime</includeScope>
84+
<outputProperty>jmhClasspath</outputProperty>
85+
</configuration>
86+
</execution>
87+
</executions>
88+
</plugin>
89+
<plugin>
90+
<groupId>org.codehaus.mojo</groupId>
91+
<artifactId>exec-maven-plugin</artifactId>
92+
<configuration>
93+
<mainClass>org.openjdk.jmh.Main</mainClass>
94+
<arguments>
95+
<argument>-f</argument>
96+
<argument>1</argument>
97+
<argument>-wi</argument>
98+
<argument>5</argument>
99+
<argument>-i</argument>
100+
<argument>10</argument>
101+
</arguments>
102+
<systemProperties>
103+
<property>
104+
<key>java.class.path</key>
105+
<value>${project.build.outputDirectory}${path.separator}${jmhClasspath}</value>
106+
</property>
107+
</systemProperties>
108+
</configuration>
109+
</plugin>
110+
</plugins>
111+
</build>
112+
</profile>
113+
</profiles>
114+
48115
</project>
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// © 2025 and later: Unicode, Inc. and others.
2+
// License & terms of use: http://www.unicode.org/copyright.html
3+
package com.ibm.icu.dev.test.perf;
4+
5+
import java.util.concurrent.TimeUnit;
6+
7+
import com.ibm.icu.text.BreakTransliteratorAccess;
8+
import com.ibm.icu.text.Transliterator;
9+
import org.openjdk.jmh.annotations.Benchmark;
10+
import org.openjdk.jmh.annotations.BenchmarkMode;
11+
import org.openjdk.jmh.annotations.Mode;
12+
import org.openjdk.jmh.annotations.OutputTimeUnit;
13+
14+
@BenchmarkMode(Mode.Throughput)
15+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
16+
public class BreakTransliteratorPerfTest {
17+
18+
static final Transliterator TITLE = BreakTransliteratorAccess.newInstance();
19+
20+
@Benchmark
21+
public String testShort() {
22+
return TITLE.transliterate("Cat");
23+
}
24+
25+
@Benchmark
26+
public String testSentence() {
27+
return TITLE.transliterate("The Quick Brown Fox jumped over the Lazy Dog");
28+
}
29+
30+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// © 2025 and later: Unicode, Inc. and others.
2+
// License & terms of use: http://www.unicode.org/copyright.html
3+
package com.ibm.icu.dev.test.perf;
4+
5+
import java.util.concurrent.TimeUnit;
6+
7+
import com.ibm.icu.text.Transliterator;
8+
import org.openjdk.jmh.annotations.Benchmark;
9+
import org.openjdk.jmh.annotations.BenchmarkMode;
10+
import org.openjdk.jmh.annotations.Mode;
11+
import org.openjdk.jmh.annotations.OutputTimeUnit;
12+
13+
@BenchmarkMode(Mode.Throughput)
14+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
15+
public class CaseFoldTransliteratorPerfTest {
16+
17+
static final Transliterator CASE = Transliterator.getInstance("CaseFold");
18+
19+
@Benchmark
20+
public String testShort() {
21+
return CASE.transliterate("Cat");
22+
}
23+
24+
@Benchmark
25+
public String testSentence() {
26+
return CASE.transliterate("The Quick Brown Fox Jumped Over The Lazy Dog");
27+
}
28+
29+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// © 2025 and later: Unicode, Inc. and others.
2+
// License & terms of use: http://www.unicode.org/copyright.html
3+
package com.ibm.icu.dev.test.perf;
4+
5+
import java.util.concurrent.TimeUnit;
6+
7+
import com.ibm.icu.text.Transliterator;
8+
import org.openjdk.jmh.annotations.Benchmark;
9+
import org.openjdk.jmh.annotations.BenchmarkMode;
10+
import org.openjdk.jmh.annotations.Mode;
11+
import org.openjdk.jmh.annotations.OutputTimeUnit;
12+
13+
@BenchmarkMode(Mode.Throughput)
14+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
15+
public class LowercaseTransliteratorPerfTest {
16+
17+
static final Transliterator LOWER = Transliterator.getInstance("Lower");
18+
19+
@Benchmark
20+
public String testShort() {
21+
return LOWER.transliterate("Cat");
22+
}
23+
24+
@Benchmark
25+
public String testSentence() {
26+
return LOWER.transliterate("The Quick Brown Fox Jumped Over The Lazy Dog");
27+
}
28+
29+
}

0 commit comments

Comments
 (0)