Skip to content

Commit de6417d

Browse files
committed
Speedup CharSequenceUtils.toCharArray(CharSequence)
- StringBuilder Performance Gain: 160-205% improvement (2-3x faster) - StringBuffer Performance Gain: 300-4,250% improvement (4-44x faster) - String: ~1-2% improvement (essentially identical) - This change was suggested by Claude Sonnet 4.5
1 parent ecdd289 commit de6417d

File tree

3 files changed

+208
-1
lines changed

3 files changed

+208
-1
lines changed

src/changes/changes.xml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ The <action> type attribute can be add,update,fix,remove.
8181
<action issue="LANG-1803" type="fix" dev="ggregory" due-to="IcoreE">Fix incorrect method invocation in ObjectUtilsTest and Javadoc reference in RandomStringUtils.</action>
8282
<action issue="LANG-1695" type="fix" dev="ggregory" due-to="Guillaume Nodet, Harshit Goel, Gary Gregory">Allow trailing decimal point in NumberUtils.isParsable #1531.</action>
8383
<action issue="LANG-1804" type="fix" dev="ggregory" due-to="IcoreE, Gary Gregory">Fix CharSet#getInstance returns null instead of EMPTY when input setStrs is null #1530.</action>
84+
<action type="fix" dev="ggregory" due-to="Gary Gregory">Speedup CharSequenceUtils.toCharArray(CharSequence) for StringBuilder input: 160-205% improvement (2-3x faster), see CharSequenceUtilsBenchmark.</action>
85+
<action type="fix" dev="ggregory" due-to="Gary Gregory">Speedup CharSequenceUtils.toCharArray(CharSequence) for StringBuffer input: 300-4,250% improvement (4-44x faster), see CharSequenceUtilsBenchmark.</action>
86+
<action type="fix" dev="ggregory" due-to="Gary Gregory">Speedup CharSequenceUtils.toCharArray(CharSequence) for String input: ~1-2% improvement (essentially identical).</action>
8487
<!-- ADD -->
8588
<!-- UPDATE -->
8689
<action type="update" dev="ggregory" due-to="Gary Gregory, Dependabot">Bump org.apache.commons:commons-parent from 92 to 93 #1498.</action>

src/main/java/org/apache/commons/lang3/CharSequenceUtils.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ static int indexOf(final CharSequence cs, final CharSequence searchChar, final i
7070
// return cs.toString().indexOf(searchChar.toString(), start);
7171
// }
7272
}
73-
73+
7474
/**
7575
* Returns the index within {@code cs} of the first occurrence of the specified character, starting the search at the specified index.
7676
* <p>
@@ -371,6 +371,16 @@ public static char[] toCharArray(final CharSequence source) {
371371
if (source instanceof String) {
372372
return ((String) source).toCharArray();
373373
}
374+
if (source instanceof StringBuilder) {
375+
final char[] array = new char[len];
376+
((StringBuilder) source).getChars(0, len, array, 0);
377+
return array;
378+
}
379+
if (source instanceof StringBuffer) {
380+
final char[] array = new char[len];
381+
((StringBuffer) source).getChars(0, len, array, 0);
382+
return array;
383+
}
374384
final char[] array = new char[len];
375385
for (int i = 0; i < len; i++) {
376386
array[i] = source.charAt(i);
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* https://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.commons.lang3;
19+
20+
import java.util.concurrent.TimeUnit;
21+
22+
import org.openjdk.jmh.annotations.Benchmark;
23+
import org.openjdk.jmh.annotations.BenchmarkMode;
24+
import org.openjdk.jmh.annotations.Fork;
25+
import org.openjdk.jmh.annotations.Level;
26+
import org.openjdk.jmh.annotations.Measurement;
27+
import org.openjdk.jmh.annotations.Mode;
28+
import org.openjdk.jmh.annotations.OutputTimeUnit;
29+
import org.openjdk.jmh.annotations.Param;
30+
import org.openjdk.jmh.annotations.Scope;
31+
import org.openjdk.jmh.annotations.Setup;
32+
import org.openjdk.jmh.annotations.State;
33+
import org.openjdk.jmh.annotations.Warmup;
34+
35+
/**
36+
* Benchmark comparing the old and new implementations of CharSequenceUtils methods.
37+
*
38+
* <p>
39+
* Run with:
40+
* </p>
41+
*
42+
* <pre>
43+
* mvn -P benchmark clean test -Dbenchmark=org.apache.commons.lang3.CharSequenceUtilsBenchmark
44+
* </pre>
45+
* <p>
46+
* Results:
47+
* </p>
48+
*
49+
* <pre>
50+
Benchmark (charSequenceType) (length) Mode Cnt Score Error Units
51+
CharSequenceUtilsBenchmark.benchmarkToCharArrayCurrent String 10 avgt 5 1.626 ± 0.011 ns/op
52+
CharSequenceUtilsBenchmark.benchmarkToCharArrayCurrent String 50 avgt 5 2.741 ± 0.029 ns/op
53+
CharSequenceUtilsBenchmark.benchmarkToCharArrayCurrent String 100 avgt 5 4.235 ± 0.038 ns/op
54+
CharSequenceUtilsBenchmark.benchmarkToCharArrayCurrent String 500 avgt 5 17.713 ± 0.273 ns/op
55+
CharSequenceUtilsBenchmark.benchmarkToCharArrayCurrent String 1000 avgt 5 34.692 ± 1.752 ns/op
56+
CharSequenceUtilsBenchmark.benchmarkToCharArrayCurrent StringBuilder 10 avgt 5 1.963 ± 0.047 ns/op
57+
CharSequenceUtilsBenchmark.benchmarkToCharArrayCurrent StringBuilder 50 avgt 5 4.085 ± 0.042 ns/op
58+
CharSequenceUtilsBenchmark.benchmarkToCharArrayCurrent StringBuilder 100 avgt 5 5.978 ± 0.177 ns/op
59+
CharSequenceUtilsBenchmark.benchmarkToCharArrayCurrent StringBuilder 500 avgt 5 25.616 ± 1.621 ns/op
60+
CharSequenceUtilsBenchmark.benchmarkToCharArrayCurrent StringBuilder 1000 avgt 5 53.749 ± 0.420 ns/op
61+
CharSequenceUtilsBenchmark.benchmarkToCharArrayCurrent StringBuffer 10 avgt 5 7.239 ± 0.149 ns/op
62+
CharSequenceUtilsBenchmark.benchmarkToCharArrayCurrent StringBuffer 50 avgt 5 9.061 ± 0.187 ns/op
63+
CharSequenceUtilsBenchmark.benchmarkToCharArrayCurrent StringBuffer 100 avgt 5 10.281 ± 0.055 ns/op
64+
CharSequenceUtilsBenchmark.benchmarkToCharArrayCurrent StringBuffer 500 avgt 5 29.647 ± 0.420 ns/op
65+
CharSequenceUtilsBenchmark.benchmarkToCharArrayCurrent StringBuffer 1000 avgt 5 56.203 ± 0.505 ns/op
66+
CharSequenceUtilsBenchmark.benchmarkToCharArrayNew String 10 avgt 5 1.657 ± 0.030 ns/op
67+
CharSequenceUtilsBenchmark.benchmarkToCharArrayNew String 50 avgt 5 2.771 ± 0.094 ns/op
68+
CharSequenceUtilsBenchmark.benchmarkToCharArrayNew String 100 avgt 5 4.281 ± 0.036 ns/op
69+
CharSequenceUtilsBenchmark.benchmarkToCharArrayNew String 500 avgt 5 17.744 ± 0.091 ns/op
70+
CharSequenceUtilsBenchmark.benchmarkToCharArrayNew String 1000 avgt 5 34.224 ± 0.251 ns/op
71+
CharSequenceUtilsBenchmark.benchmarkToCharArrayNew StringBuilder 10 avgt 5 1.962 ± 0.128 ns/op
72+
CharSequenceUtilsBenchmark.benchmarkToCharArrayNew StringBuilder 50 avgt 5 4.101 ± 0.035 ns/op
73+
CharSequenceUtilsBenchmark.benchmarkToCharArrayNew StringBuilder 100 avgt 5 5.984 ± 0.062 ns/op
74+
CharSequenceUtilsBenchmark.benchmarkToCharArrayNew StringBuilder 500 avgt 5 25.448 ± 0.152 ns/op
75+
CharSequenceUtilsBenchmark.benchmarkToCharArrayNew StringBuilder 1000 avgt 5 54.531 ± 0.559 ns/op
76+
CharSequenceUtilsBenchmark.benchmarkToCharArrayNew StringBuffer 10 avgt 5 7.260 ± 0.175 ns/op
77+
CharSequenceUtilsBenchmark.benchmarkToCharArrayNew StringBuffer 50 avgt 5 8.537 ± 0.101 ns/op
78+
CharSequenceUtilsBenchmark.benchmarkToCharArrayNew StringBuffer 100 avgt 5 10.502 ± 0.143 ns/op
79+
CharSequenceUtilsBenchmark.benchmarkToCharArrayNew StringBuffer 500 avgt 5 29.584 ± 0.339 ns/op
80+
CharSequenceUtilsBenchmark.benchmarkToCharArrayNew StringBuffer 1000 avgt 5 56.751 ± 0.983 ns/op
81+
CharSequenceUtilsBenchmark.benchmarkToCharArrayOld String 10 avgt 5 1.656 ± 0.231 ns/op
82+
CharSequenceUtilsBenchmark.benchmarkToCharArrayOld String 50 avgt 5 2.770 ± 0.222 ns/op
83+
CharSequenceUtilsBenchmark.benchmarkToCharArrayOld String 100 avgt 5 4.298 ± 0.198 ns/op
84+
CharSequenceUtilsBenchmark.benchmarkToCharArrayOld String 500 avgt 5 18.023 ± 0.203 ns/op
85+
CharSequenceUtilsBenchmark.benchmarkToCharArrayOld String 1000 avgt 5 35.053 ± 1.467 ns/op
86+
CharSequenceUtilsBenchmark.benchmarkToCharArrayOld StringBuilder 10 avgt 5 3.164 ± 0.062 ns/op
87+
CharSequenceUtilsBenchmark.benchmarkToCharArrayOld StringBuilder 50 avgt 5 8.907 ± 0.185 ns/op
88+
CharSequenceUtilsBenchmark.benchmarkToCharArrayOld StringBuilder 100 avgt 5 15.801 ± 0.104 ns/op
89+
CharSequenceUtilsBenchmark.benchmarkToCharArrayOld StringBuilder 500 avgt 5 77.203 ± 0.460 ns/op
90+
CharSequenceUtilsBenchmark.benchmarkToCharArrayOld StringBuilder 1000 avgt 5 164.064 ± 2.506 ns/op
91+
CharSequenceUtilsBenchmark.benchmarkToCharArrayOld StringBuffer 10 avgt 5 28.981 ± 0.307 ns/op
92+
CharSequenceUtilsBenchmark.benchmarkToCharArrayOld StringBuffer 50 avgt 5 126.285 ± 1.688 ns/op
93+
CharSequenceUtilsBenchmark.benchmarkToCharArrayOld StringBuffer 100 avgt 5 250.584 ± 5.639 ns/op
94+
CharSequenceUtilsBenchmark.benchmarkToCharArrayOld StringBuffer 500 avgt 5 1231.478 ± 51.296 ns/op
95+
CharSequenceUtilsBenchmark.benchmarkToCharArrayOld StringBuffer 1000 avgt 5 2453.553 ± 54.004 ns/op
96+
* </pre>
97+
*
98+
*/
99+
@BenchmarkMode(Mode.AverageTime)
100+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
101+
@State(Scope.Thread)
102+
@Fork(1)
103+
@Warmup(iterations = 3, time = 1)
104+
@Measurement(iterations = 5, time = 1)
105+
public class CharSequenceUtilsBenchmark {
106+
107+
/**
108+
* New optimized implementation of toCharArray.
109+
*/
110+
public static char[] toCharArrayNew(final CharSequence source) {
111+
final int len = StringUtils.length(source);
112+
if (len == 0) {
113+
return ArrayUtils.EMPTY_CHAR_ARRAY;
114+
}
115+
if (source instanceof String) {
116+
return ((String) source).toCharArray();
117+
}
118+
// NEW: Uses bulk getChars() for StringBuilder/StringBuffer
119+
if (source instanceof StringBuilder) {
120+
final char[] array = new char[len];
121+
((StringBuilder) source).getChars(0, len, array, 0);
122+
return array;
123+
}
124+
if (source instanceof StringBuffer) {
125+
final char[] array = new char[len];
126+
((StringBuffer) source).getChars(0, len, array, 0);
127+
return array;
128+
}
129+
final char[] array = new char[len];
130+
for (int i = 0; i < len; i++) {
131+
array[i] = source.charAt(i);
132+
}
133+
return array;
134+
}
135+
136+
/**
137+
* Old implementation of toCharArray.
138+
*/
139+
public static char[] toCharArrayOld(final CharSequence source) {
140+
final int len = StringUtils.length(source);
141+
if (len == 0) {
142+
return ArrayUtils.EMPTY_CHAR_ARRAY;
143+
}
144+
if (source instanceof String) {
145+
return ((String) source).toCharArray();
146+
}
147+
// OLD: Always uses charAt() loop, even for StringBuilder/StringBuffer
148+
final char[] array = new char[len];
149+
for (int i = 0; i < len; i++) {
150+
array[i] = source.charAt(i);
151+
}
152+
return array;
153+
}
154+
155+
@Param({ "10", "50", "100", "500", "1000" })
156+
public int length;
157+
@Param({ "String", "StringBuilder", "StringBuffer" })
158+
public String charSequenceType;
159+
private CharSequence testSequence;
160+
161+
@Benchmark
162+
public char[] benchmarkToCharArrayCurrent() {
163+
return CharSequenceUtils.toCharArray(testSequence);
164+
}
165+
@Benchmark
166+
public char[] benchmarkToCharArrayNew() {
167+
return toCharArrayNew(testSequence);
168+
}
169+
170+
@Benchmark
171+
public char[] benchmarkToCharArrayOld() {
172+
return toCharArrayOld(testSequence);
173+
}
174+
175+
@Setup(Level.Trial)
176+
public void setup() {
177+
final StringBuilder sb = new StringBuilder(length);
178+
for (int i = 0; i < length; i++) {
179+
sb.append((char) ('a' + i % 26));
180+
}
181+
final String content = sb.toString();
182+
switch (charSequenceType) {
183+
case "String":
184+
testSequence = content;
185+
break;
186+
case "StringBuilder":
187+
testSequence = new StringBuilder(content);
188+
break;
189+
case "StringBuffer":
190+
testSequence = new StringBuffer(content);
191+
break;
192+
}
193+
}
194+
}

0 commit comments

Comments
 (0)