Skip to content

Commit d240297

Browse files
committed
ICU-23354 Remove m_utilIntBuffer_ and m_utilStringBuffer_ from UCharacterName.AlgorithmName
The `m_utilIntBuffer_` array in `AlgorithmName` is a 1KB `int[256]` that was eagerly initialized, leading to unnecessary memory consumption for instances that never use it (e.g., `TYPE_0_` instances or unqueried algorithms). Use a local variable instead for m_utilIntBuffer_ and m_utilStringBuffer_.
1 parent f83f165 commit d240297

File tree

2 files changed

+108
-160
lines changed

2 files changed

+108
-160
lines changed

icu4j/main/core/src/main/java/com/ibm/icu/impl/UCharacterName.java

Lines changed: 105 additions & 157 deletions
Original file line numberDiff line numberDiff line change
@@ -237,49 +237,45 @@ public String getGroupName(int index, int length, int choice) {
237237
}
238238
}
239239

240-
synchronized (m_utilStringBuffer_) {
241-
m_utilStringBuffer_.setLength(0);
242-
byte b;
243-
char token;
244-
for (int i = 0; i < length; ) {
245-
b = m_groupstring_[index + i];
246-
i++;
240+
StringBuilder sb = new StringBuilder();
241+
byte b;
242+
char token;
243+
for (int i = 0; i < length; ) {
244+
b = m_groupstring_[index + i];
245+
i++;
247246

248-
if (b >= m_tokentable_.length) {
247+
if (b >= m_tokentable_.length) {
248+
if (b == ';') {
249+
break;
250+
}
251+
sb.append(b); // implicit letter
252+
} else {
253+
token = m_tokentable_[b & 0x00ff];
254+
if (token == 0xFFFE) {
255+
// this is a lead byte for a double-byte token
256+
token = m_tokentable_[b << 8 | (m_groupstring_[index + i] & 0x00ff)];
257+
i++;
258+
}
259+
if (token == 0xFFFF) {
249260
if (b == ';') {
250-
break;
251-
}
252-
m_utilStringBuffer_.append(b); // implicit letter
253-
} else {
254-
token = m_tokentable_[b & 0x00ff];
255-
if (token == 0xFFFE) {
256-
// this is a lead byte for a double-byte token
257-
token = m_tokentable_[b << 8 | (m_groupstring_[index + i] & 0x00ff)];
258-
i++;
259-
}
260-
if (token == 0xFFFF) {
261-
if (b == ';') {
262-
// skip the semicolon if we are seeking extended
263-
// names and there was no 2.0 name but there
264-
// is a 1.0 name.
265-
if (m_utilStringBuffer_.length() == 0
266-
&& choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
267-
continue;
268-
}
269-
break;
261+
// skip the semicolon if we are seeking extended
262+
// names and there was no 2.0 name but there
263+
// is a 1.0 name.
264+
if (sb.length() == 0 && choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
265+
continue;
270266
}
271-
// explicit letter
272-
m_utilStringBuffer_.append((char) (b & 0x00ff));
273-
} else { // write token word
274-
UCharacterUtility.getNullTermByteSubString(
275-
m_utilStringBuffer_, m_tokenstring_, token);
267+
break;
276268
}
269+
// explicit letter
270+
sb.append((char) (b & 0x00ff));
271+
} else { // write token word
272+
UCharacterUtility.getNullTermByteSubString(sb, m_tokenstring_, token);
277273
}
278274
}
275+
}
279276

280-
if (m_utilStringBuffer_.length() > 0) {
281-
return m_utilStringBuffer_.toString();
282-
}
277+
if (sb.length() > 0) {
278+
return sb.toString();
283279
}
284280
return null;
285281
}
@@ -336,21 +332,19 @@ public String getExtendedOr10Name(int ch) {
336332
} else {
337333
result = TYPE_NAMES_[type];
338334
}
339-
synchronized (m_utilStringBuffer_) {
340-
m_utilStringBuffer_.setLength(0);
341-
m_utilStringBuffer_.append('<');
342-
m_utilStringBuffer_.append(result);
343-
m_utilStringBuffer_.append('-');
344-
String chStr = Integer.toHexString(ch).toUpperCase(Locale.ENGLISH);
345-
int zeros = 4 - chStr.length();
346-
while (zeros > 0) {
347-
m_utilStringBuffer_.append('0');
348-
zeros--;
349-
}
350-
m_utilStringBuffer_.append(chStr);
351-
m_utilStringBuffer_.append('>');
352-
result = m_utilStringBuffer_.toString();
335+
StringBuilder sb = new StringBuilder();
336+
sb.append('<');
337+
sb.append(result);
338+
sb.append('-');
339+
String chStr = Integer.toHexString(ch).toUpperCase(Locale.ENGLISH);
340+
int zeros = 4 - chStr.length();
341+
while (zeros > 0) {
342+
sb.append('0');
343+
zeros--;
353344
}
345+
sb.append(chStr);
346+
sb.append('>');
347+
result = sb.toString();
354348
}
355349
return result;
356350
}
@@ -458,12 +452,9 @@ public int getAlgorithmEnd(int index) {
458452
* @return algorithmic name of codepoint
459453
*/
460454
public String getAlgorithmName(int index, int codepoint) {
461-
String result = null;
462-
synchronized (m_utilStringBuffer_) {
463-
m_utilStringBuffer_.setLength(0);
464-
m_algorithm_[index].appendName(codepoint, m_utilStringBuffer_);
465-
result = m_utilStringBuffer_.toString();
466-
}
455+
StringBuilder sb = new StringBuilder();
456+
m_algorithm_[index].appendName(codepoint, sb);
457+
String result = sb.toString();
467458
return result;
468459
}
469460

@@ -635,14 +626,14 @@ boolean contains(int ch) {
635626
}
636627

637628
/**
638-
* Appends algorithm name of code point into StringBuffer. Note this method does not check
629+
* Appends algorithm name of code point into StringBuilder. Note this method does not check
639630
* for validity of code point in Algorithm, result is undefined if code point does not
640631
* belong in Algorithm.
641632
*
642633
* @param ch code point
643-
* @param str StringBuffer to append to
634+
* @param str StringBuilder to append to
644635
*/
645-
void appendName(int ch, StringBuffer str) {
636+
void appendName(int ch, StringBuilder str) {
646637
str.append(m_prefix_);
647638
switch (m_type_) {
648639
case TYPE_0_:
@@ -652,27 +643,24 @@ void appendName(int ch, StringBuffer str) {
652643
case TYPE_1_:
653644
// prefix followed by factorized-elements
654645
int offset = ch - m_rangestart_;
655-
int indexes[] = m_utilIntBuffer_;
656-
int factor;
657646

658647
// write elements according to the factors
659648
// the factorized elements are determined by modulo
660649
// arithmetic
661-
synchronized (m_utilIntBuffer_) {
662-
for (int i = m_variant_ - 1; i > 0; i--) {
663-
factor = m_factor_[i] & 0x00FF;
664-
indexes[i] = offset % factor;
665-
offset /= factor;
666-
}
650+
int[] indexes = new int[m_variant_];
651+
for (int i = m_variant_ - 1; i > 0; i--) {
652+
int factor = m_factor_[i] & 0x00FF;
653+
indexes[i] = offset % factor;
654+
offset /= factor;
655+
}
667656

668-
// we don't need to calculate the last modulus because
669-
// start <= code <= end guarantees here that
670-
// code <= factors[0]
671-
indexes[0] = offset;
657+
// we don't need to calculate the last modulus because
658+
// start <= code <= end guarantees here that
659+
// code <= factors[0]
660+
indexes[0] = offset;
672661

673-
// joining up the factorized strings
674-
str.append(getFactorString(indexes, m_variant_));
675-
}
662+
// joining up the factorized strings
663+
str.append(getFactorString(indexes, m_variant_));
676664
break;
677665
}
678666
}
@@ -705,28 +693,25 @@ int getChar(String name) {
705693
// offset is the character code - start
706694
for (int ch = m_rangestart_; ch <= m_rangeend_; ch++) {
707695
int offset = ch - m_rangestart_;
708-
int indexes[] = m_utilIntBuffer_;
709-
int factor;
710696

711697
// write elements according to the factors
712698
// the factorized elements are determined by modulo
713699
// arithmetic
714-
synchronized (m_utilIntBuffer_) {
715-
for (int i = m_variant_ - 1; i > 0; i--) {
716-
factor = m_factor_[i] & 0x00FF;
717-
indexes[i] = offset % factor;
718-
offset /= factor;
719-
}
700+
int[] indexes = new int[m_variant_];
701+
for (int i = m_variant_ - 1; i > 0; i--) {
702+
int factor = m_factor_[i] & 0x00FF;
703+
indexes[i] = offset % factor;
704+
offset /= factor;
705+
}
720706

721-
// we don't need to calculate the last modulus
722-
// because start <= code <= end guarantees here that
723-
// code <= factors[0]
724-
indexes[0] = offset;
707+
// we don't need to calculate the last modulus
708+
// because start <= code <= end guarantees here that
709+
// code <= factors[0]
710+
indexes[0] = offset;
725711

726-
// joining up the factorized strings
727-
if (compareFactorString(indexes, m_variant_, name, prefixlen)) {
728-
return ch;
729-
}
712+
// joining up the factorized strings
713+
if (compareFactorString(indexes, m_variant_, name, prefixlen)) {
714+
return ch;
730715
}
731716
}
732717
}
@@ -762,19 +747,18 @@ int add(int set[], int maxlength) {
762747
// name = prefix factorized-elements
763748
// get the set and maximum factor suffix length for each
764749
// factor
750+
StringBuilder sb = new StringBuilder();
765751
for (int i = m_variant_ - 1; i > 0; i--) {
766752
int maxfactorlength = 0;
767753
int count = 0;
768754
for (int factor = m_factor_[i]; factor > 0; --factor) {
769-
synchronized (m_utilStringBuffer_) {
770-
m_utilStringBuffer_.setLength(0);
771-
count =
772-
UCharacterUtility.getNullTermByteSubString(
773-
m_utilStringBuffer_, m_factorstring_, count);
774-
UCharacterName.add(set, m_utilStringBuffer_);
775-
if (m_utilStringBuffer_.length() > maxfactorlength) {
776-
maxfactorlength = m_utilStringBuffer_.length();
777-
}
755+
sb.setLength(0);
756+
count =
757+
UCharacterUtility.getNullTermByteSubString(
758+
sb, m_factorstring_, count);
759+
UCharacterName.add(set, sb);
760+
if (sb.length() > maxfactorlength) {
761+
maxfactorlength = sb.length();
778762
}
779763
}
780764
length += maxfactorlength;
@@ -799,12 +783,6 @@ int add(int set[], int maxlength) {
799783
private String m_prefix_;
800784
private byte m_factorstring_[];
801785

802-
/** Utility StringBuffer */
803-
private StringBuffer m_utilStringBuffer_ = new StringBuffer();
804-
805-
/** Utility int buffer */
806-
private int m_utilIntBuffer_[] = new int[256];
807-
808786
// private methods -----------------------------------------------
809787

810788
/**
@@ -820,27 +798,22 @@ private String getFactorString(int index[], int length) {
820798
return null;
821799
}
822800

823-
synchronized (m_utilStringBuffer_) {
824-
m_utilStringBuffer_.setLength(0);
825-
int count = 0;
826-
int factor;
827-
size--;
828-
for (int i = 0; i <= size; i++) {
829-
factor = m_factor_[i];
801+
StringBuilder sb = new StringBuilder();
802+
int count = 0;
803+
size--;
804+
for (int i = 0; i <= size; i++) {
805+
int factor = m_factor_[i];
806+
count =
807+
UCharacterUtility.skipNullTermByteSubString(
808+
m_factorstring_, count, index[i]);
809+
count = UCharacterUtility.getNullTermByteSubString(sb, m_factorstring_, count);
810+
if (i != size) {
830811
count =
831812
UCharacterUtility.skipNullTermByteSubString(
832-
m_factorstring_, count, index[i]);
833-
count =
834-
UCharacterUtility.getNullTermByteSubString(
835-
m_utilStringBuffer_, m_factorstring_, count);
836-
if (i != size) {
837-
count =
838-
UCharacterUtility.skipNullTermByteSubString(
839-
m_factorstring_, count, factor - index[i] - 1);
840-
}
813+
m_factorstring_, count, factor - index[i] - 1);
841814
}
842-
return m_utilStringBuffer_.toString();
843815
}
816+
return sb.toString();
844817
}
845818

846819
/**
@@ -1011,9 +984,6 @@ boolean setGroup(char group[], byte groupstring[]) {
1011984
*/
1012985
private int m_ISOCommentSet_[] = new int[8];
1013986

1014-
/** Utility StringBuffer */
1015-
private StringBuffer m_utilStringBuffer_ = new StringBuffer();
1016-
1017987
/** Utility int buffer */
1018988
private int m_utilIntBuffer_[] = new int[2];
1019989

@@ -1102,14 +1072,11 @@ private String getAlgName(int ch, int choice) {
11021072
if (choice == UCharacterNameChoice.UNICODE_CHAR_NAME
11031073
|| choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
11041074
// index in terms integer index
1105-
synchronized (m_utilStringBuffer_) {
1106-
m_utilStringBuffer_.setLength(0);
1107-
1108-
for (int index = m_algorithm_.length - 1; index >= 0; index--) {
1109-
if (m_algorithm_[index].contains(ch)) {
1110-
m_algorithm_[index].appendName(ch, m_utilStringBuffer_);
1111-
return m_utilStringBuffer_.toString();
1112-
}
1075+
StringBuilder sb = new StringBuilder();
1076+
for (int index = m_algorithm_.length - 1; index >= 0; index--) {
1077+
if (m_algorithm_[index].contains(ch)) {
1078+
m_algorithm_[index].appendName(ch, sb);
1079+
return sb.toString();
11131080
}
11141081
}
11151082
}
@@ -1316,25 +1283,9 @@ private static boolean contains(int set[], char ch) {
13161283
* calcStringSetLength.
13171284
*
13181285
* @param set set to add all chars of str to
1319-
* @param str string to add
1320-
*/
1321-
private static int add(int set[], String str) {
1322-
int result = str.length();
1323-
1324-
for (int i = result - 1; i >= 0; i--) {
1325-
add(set, str.charAt(i));
1326-
}
1327-
return result;
1328-
}
1329-
1330-
/**
1331-
* Adds all characters of the argument str and gets the length Equivalent to
1332-
* calcStringSetLength.
1333-
*
1334-
* @param set set to add all chars of str to
1335-
* @param str string to add
1286+
* @param str CharSequence to add
13361287
*/
1337-
private static int add(int set[], StringBuffer str) {
1288+
private static int add(int[] set, CharSequence str) {
13381289
int result = str.length();
13391290

13401291
for (int i = result - 1; i >= 0; i--) {
@@ -1420,12 +1371,9 @@ private int[] addGroupName(int offset, int length, byte tokenlength[], int set[]
14201371
// use cached token length
14211372
byte tlength = tokenlength[b];
14221373
if (tlength == 0) {
1423-
synchronized (m_utilStringBuffer_) {
1424-
m_utilStringBuffer_.setLength(0);
1425-
UCharacterUtility.getNullTermByteSubString(
1426-
m_utilStringBuffer_, m_tokenstring_, token);
1427-
tlength = (byte) add(set, m_utilStringBuffer_);
1428-
}
1374+
StringBuilder sb = new StringBuilder();
1375+
UCharacterUtility.getNullTermByteSubString(sb, m_tokenstring_, token);
1376+
tlength = (byte) add(set, sb);
14291377
tokenlength[b] = tlength;
14301378
}
14311379
resultnlength += tlength;

0 commit comments

Comments
 (0)