Skip to content

Commit 65f8e5d

Browse files
committed
Sppedup IOUtils.contentEquals(Reader, Reader)
- ~2.6x speedup on StringReader input - ~10% speed on file resource as InputStreamReader Benchmark Mode Cnt Score Error Units IOUtilsContentEqualsReadersBenchmark_2_22_0.testFileCurrent avgt 5 105274.452 ± 1466.048 ns/op IOUtilsContentEqualsReadersBenchmark_2_22_0.testFileRelease2_22_0 avgt 5 107500.847 ± 1752.422 ns/op IOUtilsContentEqualsReadersBenchmark_2_22_0.testFile_2_21_0 avgt 5 115720.416 ± 1209.652 ns/op IOUtilsContentEqualsReadersBenchmark_2_22_0.testStringCurrent avgt 5 113330719.330 ± 1187191.151 ns/op IOUtilsContentEqualsReadersBenchmark_2_22_0.testStringRelease2_22_0 avgt 5 110389392.582 ± 785367.455 ns/op IOUtilsContentEqualsReadersBenchmark_2_22_0.testString_2_21_0 avgt 5 284939866.619 ± 9969793.485 ns/op Apache Maven 3.9.12 (848fbb4bf2d427b72bdb2471c22fced7ebd9a7a1) Maven home: /opt/homebrew/Cellar/maven/3.9.12/libexec Java version: 21.0.9, vendor: Homebrew, runtime: /opt/homebrew/Cellar/openjdk@21/21.0.9/libexec/openjdk.jdk/Contents/Home Default locale: en_US, platform encoding: UTF-8 OS name: "mac os x", version: "26.2", arch: "aarch64", family: "mac"
1 parent 1f80529 commit 65f8e5d

File tree

4 files changed

+589
-35
lines changed

4 files changed

+589
-35
lines changed

src/main/java/org/apache/commons/io/IOUtils.java

Lines changed: 17 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1256,10 +1256,7 @@ public static boolean contentEquals(final InputStream input1, final InputStream
12561256
// TODO Consider making public
12571257
private static boolean contentEquals(final Iterator<?> iterator1, final Iterator<?> iterator2) {
12581258
while (iterator1.hasNext()) {
1259-
if (!iterator2.hasNext()) {
1260-
return false;
1261-
}
1262-
if (!Objects.equals(iterator1.next(), iterator2.next())) {
1259+
if (!iterator2.hasNext() || !Objects.equals(iterator1.next(), iterator2.next())) {
12631260
return false;
12641261
}
12651262
}
@@ -1280,45 +1277,32 @@ private static boolean contentEquals(final Iterator<?> iterator1, final Iterator
12801277
* @since 1.1
12811278
*/
12821279
public static boolean contentEquals(final Reader input1, final Reader input2) throws IOException {
1280+
// See IOUtilsContentEqualsReadersBenchmark_2_22_0 for performance testing.
12831281
if (input1 == input2) {
12841282
return true;
12851283
}
12861284
if (input1 == null || input2 == null) {
12871285
return false;
12881286
}
1289-
1290-
// reuse one
12911287
try (ScratchChars scratch = IOUtils.ScratchChars.get()) {
12921288
final char[] array1 = scratch.array();
1293-
// but allocate another
12941289
final char[] array2 = charArray();
1295-
int pos1;
1296-
int pos2;
1297-
int count1;
1298-
int count2;
1290+
int read1;
1291+
int read2;
12991292
while (true) {
1300-
pos1 = 0;
1301-
pos2 = 0;
1302-
for (int index = 0; index < DEFAULT_BUFFER_SIZE; index++) {
1303-
if (pos1 == index) {
1304-
do {
1305-
count1 = input1.read(array1, pos1, DEFAULT_BUFFER_SIZE - pos1);
1306-
} while (count1 == 0);
1307-
if (count1 == EOF) {
1308-
return pos2 == index && input2.read() == EOF;
1309-
}
1310-
pos1 += count1;
1311-
}
1312-
if (pos2 == index) {
1313-
do {
1314-
count2 = input2.read(array2, pos2, DEFAULT_BUFFER_SIZE - pos2);
1315-
} while (count2 == 0);
1316-
if (count2 == EOF) {
1317-
return pos1 == index && input1.read() == EOF;
1318-
}
1319-
pos2 += count2;
1320-
}
1321-
if (array1[index] != array2[index]) {
1293+
read1 = input1.read(array1, 0, DEFAULT_BUFFER_SIZE);
1294+
read2 = input2.read(array2, 0, DEFAULT_BUFFER_SIZE);
1295+
// If both read EOF here, they're equal.
1296+
if (read1 == EOF && read2 == EOF) {
1297+
return true;
1298+
}
1299+
// If only one read EOF or different amounts, they're not equal.
1300+
if (read1 != read2) {
1301+
return false;
1302+
}
1303+
// Compare the buffers - bulk comparison is faster than character-by-character
1304+
for (int i = 0; i < read1; i++) {
1305+
if (array1[i] != array2[i]) {
13221306
return false;
13231307
}
13241308
}
Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* https://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.commons.io.jmh;
19+
20+
import static org.apache.commons.io.IOUtils.DEFAULT_BUFFER_SIZE;
21+
import static org.apache.commons.io.IOUtils.EOF;
22+
23+
import java.io.BufferedReader;
24+
import java.io.IOException;
25+
import java.io.InputStreamReader;
26+
import java.io.Reader;
27+
import java.io.StringReader;
28+
import java.nio.charset.Charset;
29+
import java.util.concurrent.TimeUnit;
30+
31+
import org.apache.commons.io.IOUtils;
32+
import org.apache.commons.lang3.StringUtils;
33+
import org.openjdk.jmh.annotations.Benchmark;
34+
import org.openjdk.jmh.annotations.BenchmarkMode;
35+
import org.openjdk.jmh.annotations.Fork;
36+
import org.openjdk.jmh.annotations.Measurement;
37+
import org.openjdk.jmh.annotations.Mode;
38+
import org.openjdk.jmh.annotations.OutputTimeUnit;
39+
import org.openjdk.jmh.annotations.Scope;
40+
import org.openjdk.jmh.annotations.State;
41+
import org.openjdk.jmh.annotations.Warmup;
42+
import org.openjdk.jmh.infra.Blackhole;
43+
44+
/**
45+
* Test different implementations of {@link IOUtils#contentEquals(Reader, Reader)}.
46+
*
47+
* <pre>
48+
* RESULTS:
49+
* Benchmark Mode Cnt Score Error Units
50+
* IOUtilsContentEqualsReadersBenchmark_2_22_0.testFileCurrent avgt 5 105274.452 ± 1466.048 ns/op
51+
* IOUtilsContentEqualsReadersBenchmark_2_22_0.testFileRelease2_22_0 avgt 5 107500.847 ± 1752.422 ns/op
52+
* IOUtilsContentEqualsReadersBenchmark_2_22_0.testFile_2_21_0 avgt 5 115720.416 ± 1209.652 ns/op
53+
* IOUtilsContentEqualsReadersBenchmark_2_22_0.testStringCurrent avgt 5 113330719.330 ± 1187191.151 ns/op
54+
* IOUtilsContentEqualsReadersBenchmark_2_22_0.testStringRelease2_22_0 avgt 5 110389392.582 ± 785367.455 ns/op
55+
* IOUtilsContentEqualsReadersBenchmark_2_22_0.testString_2_21_0 avgt 5 284939866.619 ± 9969793.485 ns/op
56+
*
57+
* Run: mvn clean test -P benchmark -Dbenchmark=IOUtilsContentEqualsReadersBenchmark_2_22_0
58+
* </pre>
59+
*/
60+
@BenchmarkMode(Mode.AverageTime)
61+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
62+
@State(Scope.Thread)
63+
@Warmup(iterations = 5, time = 10, timeUnit = TimeUnit.SECONDS)
64+
@Measurement(iterations = 5, time = 10, timeUnit = TimeUnit.SECONDS)
65+
@Fork(value = 1, jvmArgs = {"-server"})
66+
public class IOUtilsContentEqualsReadersBenchmark_2_22_0 {
67+
68+
private static final int STRING_LEN = 1 << 24;
69+
private static final String TEST_PATH_A = "/org/apache/commons/io/testfileBOM.xml";
70+
private static final String TEST_PATH_16K_A = "/org/apache/commons/io/abitmorethan16k.txt";
71+
private static final String TEST_PATH_16K_A_COPY = "/org/apache/commons/io/abitmorethan16kcopy.txt";
72+
private static final String TEST_PATH_B = "/org/apache/commons/io/testfileNoBOM.xml";
73+
private static final Charset DEFAULT_CHARSET = Charset.defaultCharset();
74+
static String[] STRINGS = new String[5];
75+
76+
static {
77+
STRINGS[0] = StringUtils.repeat("ab", STRING_LEN);
78+
STRINGS[1] = STRINGS[0] + 'c';
79+
STRINGS[2] = STRINGS[0] + 'd';
80+
STRINGS[3] = StringUtils.repeat("ab\rab\n", STRING_LEN);
81+
STRINGS[4] = StringUtils.repeat("ab\r\nab\r", STRING_LEN);
82+
}
83+
84+
static String SPECIAL_CASE_STRING_0 = StringUtils.repeat(StringUtils.repeat("ab", STRING_LEN) + '\n', 2);
85+
static String SPECIAL_CASE_STRING_1 = StringUtils.repeat(StringUtils.repeat("cd", STRING_LEN) + '\n', 2);
86+
87+
public static boolean contentEquals_2_21_0(final Reader input1, final Reader input2) throws IOException {
88+
if (input1 == input2) {
89+
return true;
90+
}
91+
if (input1 == null || input2 == null) {
92+
return false;
93+
}
94+
final char[] array1 = new char[DEFAULT_BUFFER_SIZE];
95+
final char[] array2 = new char[DEFAULT_BUFFER_SIZE];
96+
int pos1;
97+
int pos2;
98+
int count1;
99+
int count2;
100+
while (true) {
101+
pos1 = 0;
102+
pos2 = 0;
103+
for (int index = 0; index < DEFAULT_BUFFER_SIZE; index++) {
104+
if (pos1 == index) {
105+
do {
106+
count1 = input1.read(array1, pos1, DEFAULT_BUFFER_SIZE - pos1);
107+
} while (count1 == 0);
108+
if (count1 == EOF) {
109+
return pos2 == index && input2.read() == EOF;
110+
}
111+
pos1 += count1;
112+
}
113+
if (pos2 == index) {
114+
do {
115+
count2 = input2.read(array2, pos2, DEFAULT_BUFFER_SIZE - pos2);
116+
} while (count2 == 0);
117+
if (count2 == EOF) {
118+
return pos1 == index && input1.read() == EOF;
119+
}
120+
pos2 += count2;
121+
}
122+
if (array1[index] != array2[index]) {
123+
return false;
124+
}
125+
}
126+
}
127+
}
128+
129+
/**
130+
* Version 2.22.0 (December 2025).
131+
*/
132+
public static boolean contentEqualsRelease2_22_0(final Reader input1, final Reader input2) throws IOException {
133+
if (input1 == input2) {
134+
return true;
135+
}
136+
if (input1 == null || input2 == null) {
137+
return false;
138+
}
139+
final char[] array1 = new char[DEFAULT_BUFFER_SIZE];
140+
final char[] array2 = new char[DEFAULT_BUFFER_SIZE];
141+
int read1;
142+
int read2;
143+
while (true) {
144+
read1 = input1.read(array1, 0, DEFAULT_BUFFER_SIZE);
145+
read2 = input2.read(array2, 0, DEFAULT_BUFFER_SIZE);
146+
// If both read EOF here, they're equal.
147+
if (read1 == EOF && read2 == EOF) {
148+
return true;
149+
}
150+
// If only one read EOF or different amounts, they're not equal.
151+
if (read1 != read2) {
152+
return false;
153+
}
154+
// Compare the buffers - bulk comparison is faster than character-by-character
155+
for (int i = 0; i < read1; i++) {
156+
if (array1[i] != array2[i]) {
157+
return false;
158+
}
159+
}
160+
}
161+
}
162+
163+
@Benchmark
164+
public boolean[] testFile_2_21_0() throws IOException {
165+
final boolean[] res = new boolean[3];
166+
try (Reader input1 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_A), DEFAULT_CHARSET);
167+
Reader input2 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_B), DEFAULT_CHARSET)) {
168+
res[0] = contentEquals_2_21_0(input1, input1);
169+
}
170+
try (Reader input1 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_A), DEFAULT_CHARSET);
171+
Reader input2 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_A), DEFAULT_CHARSET)) {
172+
res[1] = contentEquals_2_21_0(input1, input2);
173+
}
174+
try (Reader input1 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_16K_A));
175+
Reader input2 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_16K_A_COPY))) {
176+
res[2] = contentEquals_2_21_0(input1, input2);
177+
}
178+
return res;
179+
}
180+
181+
@Benchmark
182+
public boolean[] testFileCurrent() throws IOException {
183+
final boolean[] res = new boolean[3];
184+
try (Reader input1 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_A), DEFAULT_CHARSET);
185+
Reader input2 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_B), DEFAULT_CHARSET)) {
186+
res[0] = IOUtils.contentEquals(input1, input1);
187+
}
188+
try (Reader input1 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_A), DEFAULT_CHARSET);
189+
Reader input2 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_A), DEFAULT_CHARSET)) {
190+
res[1] = IOUtils.contentEquals(input1, input2);
191+
}
192+
try (Reader input1 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_16K_A), DEFAULT_CHARSET);
193+
Reader input2 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_16K_A_COPY),
194+
DEFAULT_CHARSET)) {
195+
res[2] = IOUtils.contentEquals(input1, input2);
196+
}
197+
return res;
198+
}
199+
200+
@Benchmark
201+
public boolean[] testFileRelease2_22_0() throws IOException {
202+
final boolean[] res = new boolean[3];
203+
try (Reader input1 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_A), DEFAULT_CHARSET);
204+
Reader input2 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_B), DEFAULT_CHARSET)) {
205+
res[0] = contentEqualsRelease2_22_0(input1, input1);
206+
}
207+
try (Reader input1 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_A), DEFAULT_CHARSET);
208+
Reader input2 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_A), DEFAULT_CHARSET)) {
209+
res[1] = contentEqualsRelease2_22_0(input1, input2);
210+
}
211+
try (Reader input1 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_16K_A), DEFAULT_CHARSET);
212+
Reader input2 = new InputStreamReader(getClass().getResourceAsStream(TEST_PATH_16K_A_COPY),
213+
DEFAULT_CHARSET)) {
214+
res[2] = contentEqualsRelease2_22_0(input1, input2);
215+
}
216+
return res;
217+
}
218+
219+
@Benchmark
220+
public void testString_2_21_0(final Blackhole blackhole) throws IOException {
221+
for (int i = 0; i < 5; i++) {
222+
for (int j = 0; j < 5; j++) {
223+
try (StringReader input1 = new StringReader(STRINGS[i]);
224+
StringReader input2 = new StringReader(STRINGS[j])) {
225+
blackhole.consume(contentEquals_2_21_0(input1, input2));
226+
}
227+
}
228+
}
229+
}
230+
231+
@Benchmark
232+
public void testStringCurrent(final Blackhole blackhole) throws IOException {
233+
for (int i = 0; i < 5; i++) {
234+
for (int j = 0; j < 5; j++) {
235+
try (StringReader input1 = new StringReader(STRINGS[i]);
236+
StringReader input2 = new StringReader(STRINGS[j])) {
237+
blackhole.consume(IOUtils.contentEquals(input1, input2));
238+
}
239+
}
240+
}
241+
}
242+
243+
@Benchmark
244+
public void testStringRelease2_22_0(final Blackhole blackhole) throws IOException {
245+
for (int i = 0; i < 5; i++) {
246+
for (int j = 0; j < 5; j++) {
247+
try (StringReader input1 = new StringReader(STRINGS[i]);
248+
StringReader input2 = new StringReader(STRINGS[j])) {
249+
blackhole.consume(contentEqualsRelease2_22_0(input1, input2));
250+
}
251+
}
252+
}
253+
}
254+
255+
}

src/test/java/org/apache/commons/io/jmh/IOUtilsContentEqualsReadersBenchmark.java renamed to src/test/java/org/apache/commons/io/jmh/IOUtilsContentEqualsReadersBenchmark_2_9_0.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
@Warmup(iterations = 5, time = 10, timeUnit = TimeUnit.SECONDS)
6060
@Measurement(iterations = 5, time = 10, timeUnit = TimeUnit.SECONDS)
6161
@Fork(value = 1, jvmArgs = {"-server"})
62-
public class IOUtilsContentEqualsReadersBenchmark {
62+
public class IOUtilsContentEqualsReadersBenchmark_2_9_0 {
6363

6464
private static final int STRING_LEN = 1 << 24;
6565
private static final String TEST_PATH_A = "/org/apache/commons/io/testfileBOM.xml";
@@ -80,7 +80,6 @@ public class IOUtilsContentEqualsReadersBenchmark {
8080
static String SPECIAL_CASE_STRING_0 = StringUtils.repeat(StringUtils.repeat("ab", STRING_LEN) + '\n', 2);
8181
static String SPECIAL_CASE_STRING_1 = StringUtils.repeat(StringUtils.repeat("cd", STRING_LEN) + '\n', 2);
8282

83-
@SuppressWarnings("resource")
8483
public static boolean contentEquals_release_2_8_0(final Reader input1, final Reader input2) throws IOException {
8584
if (input1 == input2) {
8685
return true;

0 commit comments

Comments
 (0)