From d2402978cc3540e4759a27bf69fdea77ab3f7a65 Mon Sep 17 00:00:00 2001 From: Victor Chang Date: Thu, 26 Mar 2026 11:09:58 +0000 Subject: [PATCH] ICU-23354 Remove m_utilIntBuffer_ and m_utilStringBuffer_ from UCharacterName.AlgorithmName The `m_utilIntBuffer_` array in `AlgorithmName` is a 1KB `int[256]` that was eagerly initialized, leading to unnecessary memory consumption for instances that never use it (e.g., `TYPE_0_` instances or unqueried algorithms). Use a local variable instead for m_utilIntBuffer_ and m_utilStringBuffer_. --- .../java/com/ibm/icu/impl/UCharacterName.java | 262 +++++++----------- .../com/ibm/icu/impl/UCharacterUtility.java | 6 +- 2 files changed, 108 insertions(+), 160 deletions(-) diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/impl/UCharacterName.java b/icu4j/main/core/src/main/java/com/ibm/icu/impl/UCharacterName.java index 845b629bea40..f76a3630236c 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/impl/UCharacterName.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/impl/UCharacterName.java @@ -237,49 +237,45 @@ public String getGroupName(int index, int length, int choice) { } } - synchronized (m_utilStringBuffer_) { - m_utilStringBuffer_.setLength(0); - byte b; - char token; - for (int i = 0; i < length; ) { - b = m_groupstring_[index + i]; - i++; + StringBuilder sb = new StringBuilder(); + byte b; + char token; + for (int i = 0; i < length; ) { + b = m_groupstring_[index + i]; + i++; - if (b >= m_tokentable_.length) { + if (b >= m_tokentable_.length) { + if (b == ';') { + break; + } + sb.append(b); // implicit letter + } else { + token = m_tokentable_[b & 0x00ff]; + if (token == 0xFFFE) { + // this is a lead byte for a double-byte token + token = m_tokentable_[b << 8 | (m_groupstring_[index + i] & 0x00ff)]; + i++; + } + if (token == 0xFFFF) { if (b == ';') { - break; - } - m_utilStringBuffer_.append(b); // implicit letter - } else { - token = m_tokentable_[b & 0x00ff]; - if (token == 0xFFFE) { - // this is a lead byte for a double-byte token - token = m_tokentable_[b << 8 | (m_groupstring_[index + i] & 0x00ff)]; - i++; - } - if (token == 0xFFFF) { - if (b == ';') { - // skip the semicolon if we are seeking extended - // names and there was no 2.0 name but there - // is a 1.0 name. - if (m_utilStringBuffer_.length() == 0 - && choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) { - continue; - } - break; + // skip the semicolon if we are seeking extended + // names and there was no 2.0 name but there + // is a 1.0 name. + if (sb.length() == 0 && choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) { + continue; } - // explicit letter - m_utilStringBuffer_.append((char) (b & 0x00ff)); - } else { // write token word - UCharacterUtility.getNullTermByteSubString( - m_utilStringBuffer_, m_tokenstring_, token); + break; } + // explicit letter + sb.append((char) (b & 0x00ff)); + } else { // write token word + UCharacterUtility.getNullTermByteSubString(sb, m_tokenstring_, token); } } + } - if (m_utilStringBuffer_.length() > 0) { - return m_utilStringBuffer_.toString(); - } + if (sb.length() > 0) { + return sb.toString(); } return null; } @@ -336,21 +332,19 @@ public String getExtendedOr10Name(int ch) { } else { result = TYPE_NAMES_[type]; } - synchronized (m_utilStringBuffer_) { - m_utilStringBuffer_.setLength(0); - m_utilStringBuffer_.append('<'); - m_utilStringBuffer_.append(result); - m_utilStringBuffer_.append('-'); - String chStr = Integer.toHexString(ch).toUpperCase(Locale.ENGLISH); - int zeros = 4 - chStr.length(); - while (zeros > 0) { - m_utilStringBuffer_.append('0'); - zeros--; - } - m_utilStringBuffer_.append(chStr); - m_utilStringBuffer_.append('>'); - result = m_utilStringBuffer_.toString(); + StringBuilder sb = new StringBuilder(); + sb.append('<'); + sb.append(result); + sb.append('-'); + String chStr = Integer.toHexString(ch).toUpperCase(Locale.ENGLISH); + int zeros = 4 - chStr.length(); + while (zeros > 0) { + sb.append('0'); + zeros--; } + sb.append(chStr); + sb.append('>'); + result = sb.toString(); } return result; } @@ -458,12 +452,9 @@ public int getAlgorithmEnd(int index) { * @return algorithmic name of codepoint */ public String getAlgorithmName(int index, int codepoint) { - String result = null; - synchronized (m_utilStringBuffer_) { - m_utilStringBuffer_.setLength(0); - m_algorithm_[index].appendName(codepoint, m_utilStringBuffer_); - result = m_utilStringBuffer_.toString(); - } + StringBuilder sb = new StringBuilder(); + m_algorithm_[index].appendName(codepoint, sb); + String result = sb.toString(); return result; } @@ -635,14 +626,14 @@ boolean contains(int ch) { } /** - * Appends algorithm name of code point into StringBuffer. Note this method does not check + * Appends algorithm name of code point into StringBuilder. Note this method does not check * for validity of code point in Algorithm, result is undefined if code point does not * belong in Algorithm. * * @param ch code point - * @param str StringBuffer to append to + * @param str StringBuilder to append to */ - void appendName(int ch, StringBuffer str) { + void appendName(int ch, StringBuilder str) { str.append(m_prefix_); switch (m_type_) { case TYPE_0_: @@ -652,27 +643,24 @@ void appendName(int ch, StringBuffer str) { case TYPE_1_: // prefix followed by factorized-elements int offset = ch - m_rangestart_; - int indexes[] = m_utilIntBuffer_; - int factor; // write elements according to the factors // the factorized elements are determined by modulo // arithmetic - synchronized (m_utilIntBuffer_) { - for (int i = m_variant_ - 1; i > 0; i--) { - factor = m_factor_[i] & 0x00FF; - indexes[i] = offset % factor; - offset /= factor; - } + int[] indexes = new int[m_variant_]; + for (int i = m_variant_ - 1; i > 0; i--) { + int factor = m_factor_[i] & 0x00FF; + indexes[i] = offset % factor; + offset /= factor; + } - // we don't need to calculate the last modulus because - // start <= code <= end guarantees here that - // code <= factors[0] - indexes[0] = offset; + // we don't need to calculate the last modulus because + // start <= code <= end guarantees here that + // code <= factors[0] + indexes[0] = offset; - // joining up the factorized strings - str.append(getFactorString(indexes, m_variant_)); - } + // joining up the factorized strings + str.append(getFactorString(indexes, m_variant_)); break; } } @@ -705,28 +693,25 @@ int getChar(String name) { // offset is the character code - start for (int ch = m_rangestart_; ch <= m_rangeend_; ch++) { int offset = ch - m_rangestart_; - int indexes[] = m_utilIntBuffer_; - int factor; // write elements according to the factors // the factorized elements are determined by modulo // arithmetic - synchronized (m_utilIntBuffer_) { - for (int i = m_variant_ - 1; i > 0; i--) { - factor = m_factor_[i] & 0x00FF; - indexes[i] = offset % factor; - offset /= factor; - } + int[] indexes = new int[m_variant_]; + for (int i = m_variant_ - 1; i > 0; i--) { + int factor = m_factor_[i] & 0x00FF; + indexes[i] = offset % factor; + offset /= factor; + } - // we don't need to calculate the last modulus - // because start <= code <= end guarantees here that - // code <= factors[0] - indexes[0] = offset; + // we don't need to calculate the last modulus + // because start <= code <= end guarantees here that + // code <= factors[0] + indexes[0] = offset; - // joining up the factorized strings - if (compareFactorString(indexes, m_variant_, name, prefixlen)) { - return ch; - } + // joining up the factorized strings + if (compareFactorString(indexes, m_variant_, name, prefixlen)) { + return ch; } } } @@ -762,19 +747,18 @@ int add(int set[], int maxlength) { // name = prefix factorized-elements // get the set and maximum factor suffix length for each // factor + StringBuilder sb = new StringBuilder(); for (int i = m_variant_ - 1; i > 0; i--) { int maxfactorlength = 0; int count = 0; for (int factor = m_factor_[i]; factor > 0; --factor) { - synchronized (m_utilStringBuffer_) { - m_utilStringBuffer_.setLength(0); - count = - UCharacterUtility.getNullTermByteSubString( - m_utilStringBuffer_, m_factorstring_, count); - UCharacterName.add(set, m_utilStringBuffer_); - if (m_utilStringBuffer_.length() > maxfactorlength) { - maxfactorlength = m_utilStringBuffer_.length(); - } + sb.setLength(0); + count = + UCharacterUtility.getNullTermByteSubString( + sb, m_factorstring_, count); + UCharacterName.add(set, sb); + if (sb.length() > maxfactorlength) { + maxfactorlength = sb.length(); } } length += maxfactorlength; @@ -799,12 +783,6 @@ int add(int set[], int maxlength) { private String m_prefix_; private byte m_factorstring_[]; - /** Utility StringBuffer */ - private StringBuffer m_utilStringBuffer_ = new StringBuffer(); - - /** Utility int buffer */ - private int m_utilIntBuffer_[] = new int[256]; - // private methods ----------------------------------------------- /** @@ -820,27 +798,22 @@ private String getFactorString(int index[], int length) { return null; } - synchronized (m_utilStringBuffer_) { - m_utilStringBuffer_.setLength(0); - int count = 0; - int factor; - size--; - for (int i = 0; i <= size; i++) { - factor = m_factor_[i]; + StringBuilder sb = new StringBuilder(); + int count = 0; + size--; + for (int i = 0; i <= size; i++) { + int factor = m_factor_[i]; + count = + UCharacterUtility.skipNullTermByteSubString( + m_factorstring_, count, index[i]); + count = UCharacterUtility.getNullTermByteSubString(sb, m_factorstring_, count); + if (i != size) { count = UCharacterUtility.skipNullTermByteSubString( - m_factorstring_, count, index[i]); - count = - UCharacterUtility.getNullTermByteSubString( - m_utilStringBuffer_, m_factorstring_, count); - if (i != size) { - count = - UCharacterUtility.skipNullTermByteSubString( - m_factorstring_, count, factor - index[i] - 1); - } + m_factorstring_, count, factor - index[i] - 1); } - return m_utilStringBuffer_.toString(); } + return sb.toString(); } /** @@ -1011,9 +984,6 @@ boolean setGroup(char group[], byte groupstring[]) { */ private int m_ISOCommentSet_[] = new int[8]; - /** Utility StringBuffer */ - private StringBuffer m_utilStringBuffer_ = new StringBuffer(); - /** Utility int buffer */ private int m_utilIntBuffer_[] = new int[2]; @@ -1102,14 +1072,11 @@ private String getAlgName(int ch, int choice) { if (choice == UCharacterNameChoice.UNICODE_CHAR_NAME || choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) { // index in terms integer index - synchronized (m_utilStringBuffer_) { - m_utilStringBuffer_.setLength(0); - - for (int index = m_algorithm_.length - 1; index >= 0; index--) { - if (m_algorithm_[index].contains(ch)) { - m_algorithm_[index].appendName(ch, m_utilStringBuffer_); - return m_utilStringBuffer_.toString(); - } + StringBuilder sb = new StringBuilder(); + for (int index = m_algorithm_.length - 1; index >= 0; index--) { + if (m_algorithm_[index].contains(ch)) { + m_algorithm_[index].appendName(ch, sb); + return sb.toString(); } } } @@ -1316,25 +1283,9 @@ private static boolean contains(int set[], char ch) { * calcStringSetLength. * * @param set set to add all chars of str to - * @param str string to add - */ - private static int add(int set[], String str) { - int result = str.length(); - - for (int i = result - 1; i >= 0; i--) { - add(set, str.charAt(i)); - } - return result; - } - - /** - * Adds all characters of the argument str and gets the length Equivalent to - * calcStringSetLength. - * - * @param set set to add all chars of str to - * @param str string to add + * @param str CharSequence to add */ - private static int add(int set[], StringBuffer str) { + private static int add(int[] set, CharSequence str) { int result = str.length(); for (int i = result - 1; i >= 0; i--) { @@ -1420,12 +1371,9 @@ private int[] addGroupName(int offset, int length, byte tokenlength[], int set[] // use cached token length byte tlength = tokenlength[b]; if (tlength == 0) { - synchronized (m_utilStringBuffer_) { - m_utilStringBuffer_.setLength(0); - UCharacterUtility.getNullTermByteSubString( - m_utilStringBuffer_, m_tokenstring_, token); - tlength = (byte) add(set, m_utilStringBuffer_); - } + StringBuilder sb = new StringBuilder(); + UCharacterUtility.getNullTermByteSubString(sb, m_tokenstring_, token); + tlength = (byte) add(set, sb); tokenlength[b] = tlength; } resultnlength += tlength; diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/impl/UCharacterUtility.java b/icu4j/main/core/src/main/java/com/ibm/icu/impl/UCharacterUtility.java index 3ee2b64b8fe7..9e60c8b47cae 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/impl/UCharacterUtility.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/impl/UCharacterUtility.java @@ -50,18 +50,18 @@ static int toInt(char msc, char lsc) { * bytes starting from argument start to the next zero byte. If the first byte is a zero, the * next byte will be taken as the first byte. * - * @param str stringbuffer to store data in, data will be store with each byte as a char + * @param sb StringBuilder to store data in, data will be store with each byte as a char * @param array byte array * @param index to start substring in byte count * @return the end position of the substring within the character array */ - static int getNullTermByteSubString(StringBuffer str, byte[] array, int index) { + static int getNullTermByteSubString(StringBuilder sb, byte[] array, int index) { byte b = 1; while (b != 0) { b = array[index]; if (b != 0) { - str.append((char) (b & 0x00FF)); + sb.append((char) (b & 0x00FF)); } index++; }