Skip to content

Commit 87408a0

Browse files
authored
[collections-838] Fix exponential hasValue() performance (#633)
Cache the result of hasNext() until next() or remove() is invoked. This avoids repeated hasNext() calls on nested chained iterators, significantly reducing overhead in deeply nested scenarios.
1 parent d8c59cd commit 87408a0

File tree

4 files changed

+136
-13
lines changed

4 files changed

+136
-13
lines changed

src/changes/changes.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
<action type="fix" dev="ggregory" due-to="Sebastian Götz, Gary Gregory" issue="COLLECTIONS-874">MapUtils.getLongValue(Map, K, Function) returns a byte instead of a long.</action>
3333
<action type="fix" dev="ggregory" due-to="Gary Gregory">Fix exception message in org.apache.commons.collections4.functors.FunctorUtils.validate(Consumer...)</action>
3434
<action type="fix" dev="ggregory" due-to="Gary Gregory">Fix exception message in org.apache.commons.collections4.iterators.UnmodifiableIterator.remove() to match java.util.Iterator.remove().</action>
35+
<action type="fix" dev="pkarwasz" due-to="Piotr P. Karwasz, Joerg Budischewski" issue="COLLECTIONS-838">Calling SetUtils.union on multiple instances of SetView causes JVM to hang</action>
3536
<!-- ADD -->
3637
<action type="add" dev="ggregory" due-to="Gary Gregory">Add generics to UnmodifiableIterator for the wrapped type.</action>
3738
<!-- UPDATE -->

src/main/java/org/apache/commons/collections4/iterators/IteratorChain.java

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ public class IteratorChain<E> implements Iterator<E> {
6363
private Iterator<? extends E> currentIterator;
6464

6565
/**
66-
* The "last used" Iterator is the Iterator upon which next() or hasNext()
66+
* The "last used" Iterator is the Iterator upon which next()
6767
* was most recently called used for the remove() operation only
6868
*/
6969
private Iterator<? extends E> lastUsedIterator;
@@ -74,6 +74,11 @@ public class IteratorChain<E> implements Iterator<E> {
7474
*/
7575
private boolean isLocked;
7676

77+
/**
78+
* Contains the result of the last hasNext() call until next() is invoked
79+
*/
80+
private Boolean cachedHasNextValue;
81+
7782
/**
7883
* Constructs an IteratorChain with no Iterators.
7984
* <p>
@@ -183,9 +188,10 @@ private void checkLocked() {
183188
@Override
184189
public boolean hasNext() {
185190
lockChain();
186-
updateCurrentIterator();
187-
lastUsedIterator = currentIterator;
188-
return currentIterator.hasNext();
191+
if (cachedHasNextValue == null) {
192+
updateCurrentIterator();
193+
}
194+
return cachedHasNextValue;
189195
}
190196

191197
/**
@@ -219,9 +225,11 @@ private void lockChain() {
219225
@Override
220226
public E next() {
221227
lockChain();
222-
updateCurrentIterator();
228+
if (cachedHasNextValue == null) {
229+
updateCurrentIterator();
230+
}
223231
lastUsedIterator = currentIterator;
224-
232+
cachedHasNextValue = null;
225233
return currentIterator.next();
226234
}
227235

@@ -241,10 +249,11 @@ public E next() {
241249
@Override
242250
public void remove() {
243251
lockChain();
244-
if (currentIterator == null) {
245-
updateCurrentIterator();
252+
if (lastUsedIterator == null) {
253+
throw new IllegalStateException("remove() has been invoked without next()");
246254
}
247255
lastUsedIterator.remove();
256+
lastUsedIterator = null; // must never be used twice without next() being invoked
248257
}
249258

250259
/**
@@ -267,13 +276,16 @@ protected void updateCurrentIterator() {
267276
} else {
268277
currentIterator = iteratorQueue.remove();
269278
}
270-
// set last used iterator here, in case the user calls remove
271-
// before calling hasNext() or next() (although they shouldn't)
272-
lastUsedIterator = currentIterator;
273279
}
274-
while (!currentIterator.hasNext() && !iteratorQueue.isEmpty()) {
280+
while (true) {
281+
cachedHasNextValue = currentIterator.hasNext();
282+
if (cachedHasNextValue) {
283+
break;
284+
}
285+
if (iteratorQueue.isEmpty()) {
286+
break;
287+
}
275288
currentIterator = iteratorQueue.remove();
276289
}
277290
}
278-
279291
}

src/test/java/org/apache/commons/collections4/SetUtilsTest.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,22 @@
2323
import static org.junit.jupiter.api.Assertions.assertNull;
2424
import static org.junit.jupiter.api.Assertions.assertSame;
2525
import static org.junit.jupiter.api.Assertions.assertThrows;
26+
import static org.junit.jupiter.api.Assertions.assertTimeoutPreemptively;
2627
import static org.junit.jupiter.api.Assertions.assertTrue;
2728

29+
import java.time.Duration;
2830
import java.util.Arrays;
2931
import java.util.Collection;
3032
import java.util.HashSet;
33+
import java.util.Iterator;
3134
import java.util.Set;
3235

3336
import org.apache.commons.collections4.SetUtils.SetView;
3437
import org.apache.commons.collections4.set.PredicatedSet;
3538
import org.junit.jupiter.api.BeforeEach;
3639
import org.junit.jupiter.api.Test;
40+
import org.junit.jupiter.params.ParameterizedTest;
41+
import org.junit.jupiter.params.provider.ValueSource;
3742

3843
/**
3944
* Tests for SetUtils.
@@ -218,6 +223,31 @@ void testUnion() {
218223
assertThrows(NullPointerException.class, () -> SetUtils.union(null, setA));
219224
}
220225

226+
@ParameterizedTest
227+
@ValueSource(booleans = {true, false})
228+
void testReverseNestedUnionPerfomWell(final boolean mergeLeft) {
229+
Set<Integer> set = SetUtils.union(setA, setB);
230+
for (int i = 0; i < 128; i++) {
231+
if (mergeLeft) {
232+
set = SetUtils.union(setB, set);
233+
} else {
234+
set = SetUtils.union(set, setB);
235+
}
236+
}
237+
final Set<Integer> combinedSet = set;
238+
assertTimeoutPreemptively(Duration.ofSeconds(1), () -> {
239+
assertEquals(7, combinedSet.size());
240+
assertTrue(combinedSet.containsAll(setA));
241+
assertTrue(combinedSet.containsAll(setB));
242+
243+
final Iterator<Integer> iterator = combinedSet.iterator();
244+
while (iterator.hasNext()) { // without the IteratorChain hasNext() caching, this would run hours
245+
iterator.next();
246+
}
247+
assertFalse(iterator.hasNext());
248+
});
249+
}
250+
221251
@Test
222252
void testUnmodifiableSet() {
223253
final Set<?> set1 = SetUtils.unmodifiableSet();

src/test/java/org/apache/commons/collections4/iterators/IteratorChainTest.java

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,12 @@
1919
import static org.junit.jupiter.api.Assertions.assertEquals;
2020
import static org.junit.jupiter.api.Assertions.assertFalse;
2121
import static org.junit.jupiter.api.Assertions.assertThrows;
22+
import static org.junit.jupiter.api.Assertions.assertTimeoutPreemptively;
2223
import static org.junit.jupiter.api.Assertions.assertTrue;
2324

25+
import java.time.Duration;
2426
import java.util.ArrayList;
27+
import java.util.Arrays;
2528
import java.util.Iterator;
2629
import java.util.List;
2730
import java.util.NoSuchElementException;
@@ -126,8 +129,10 @@ void testFirstIteratorIsEmptyBug() {
126129
assertTrue(chain.hasNext(), "should have next");
127130
assertEquals("B", chain.next());
128131
assertTrue(chain.hasNext(), "should have next");
132+
assertTrue(chain.hasNext(), "should not change");
129133
assertEquals("C", chain.next());
130134
assertFalse(chain.hasNext(), "should not have next");
135+
assertFalse(chain.hasNext(), "should not change");
131136
}
132137

133138
@Test
@@ -146,6 +151,9 @@ void testIterator() {
146151
public void testRemove() {
147152
final Iterator<String> iter = makeObject();
148153
assertThrows(IllegalStateException.class, () -> iter.remove(), "Calling remove before the first call to next() should throw an exception");
154+
assertTrue(iter.hasNext(), "initial has next should be true");
155+
assertThrows(IllegalStateException.class, () -> iter.remove(), "Calling remove before the first call to next() should throw an exception");
156+
149157
for (final String testValue : testArray) {
150158
final String iterValue = iter.next();
151159
assertEquals(testValue, iterValue, "Iteration value is correct");
@@ -158,6 +166,78 @@ public void testRemove() {
158166
assertTrue(list3.isEmpty(), "List is empty");
159167
}
160168

169+
@Test
170+
public void testRemoveDoubleCallShouldFail() {
171+
final Iterator<String> iter = makeObject();
172+
assertEquals(iter.next(), "One");
173+
iter.remove();
174+
assertThrows(IllegalStateException.class, () -> iter.remove());
175+
}
176+
177+
@Test
178+
public void testHasNextIsInvokedOnEdgeBeforeRemove() {
179+
final Iterator<String> iter = makeObject();
180+
assertEquals(iter.next(), "One");
181+
assertEquals(iter.next(), "Two");
182+
assertEquals(iter.next(), "Three");
183+
assertTrue(iter.hasNext(), "next elements exists");
184+
iter.remove(); // though hasNext() on next iterator has been invoked, removing an element on old iterator must still work
185+
assertTrue(iter.hasNext(), "next elements exists");
186+
assertEquals(iter.next(), "Four");
187+
188+
assertEquals(list1, Arrays.asList("One", "Two")); // Three must be gone
189+
assertEquals(list2, Arrays.asList("Four")); // Four still be there
190+
assertEquals(list3, Arrays.asList("Five", "Six")); // Five+Six anyway
191+
}
192+
193+
@Test
194+
public void testChainingPerformsWell() {
195+
Iterator<String> iter = makeObject();
196+
for (int i = 0; i < 150; i++) {
197+
final IteratorChain<String> chain = new IteratorChain<>();
198+
chain.addIterator(iter);
199+
iter = chain;
200+
}
201+
final Iterator<String> iterFinal = iter;
202+
assertTimeoutPreemptively(Duration.ofSeconds(1), () -> {
203+
for (final String testValue : testArray) {
204+
final String iterValue = iterFinal.next();
205+
assertEquals(testValue, iterValue, "Iteration value is correct");
206+
if (!iterValue.equals("Four")) {
207+
iterFinal.remove();
208+
}
209+
}
210+
assertFalse(iterFinal.hasNext(), "all values got iterated");
211+
assertTrue(list1.isEmpty(), "List is empty");
212+
assertEquals(1, list2.size(), "List is empty");
213+
assertTrue(list3.isEmpty(), "List is empty");
214+
});
215+
}
216+
217+
@Test
218+
public void testChaining() {
219+
IteratorChain<String> chain = new IteratorChain<>();
220+
chain.addIterator(list1.iterator());
221+
chain = new IteratorChain<>(chain);
222+
chain.addIterator(list2.iterator());
223+
chain = new IteratorChain<>(chain);
224+
chain.addIterator(list3.iterator());
225+
226+
for (final String testValue : testArray) {
227+
assertTrue(chain.hasNext(), "chain contains values");
228+
assertTrue(chain.hasNext(), "hasNext doesn't change on 2nd invocation");
229+
final String iterValue = chain.next();
230+
assertEquals(testValue, iterValue, "Iteration value is correct");
231+
if (!iterValue.equals("Four")) {
232+
chain.remove();
233+
}
234+
}
235+
assertFalse(chain.hasNext(), "all values got iterated");
236+
assertTrue(list1.isEmpty(), "List is empty");
237+
assertEquals(1, list2.size(), "List is empty");
238+
assertTrue(list3.isEmpty(), "List is empty");
239+
}
240+
161241
@Test
162242
void testRemoveFromFilteredIterator() {
163243

0 commit comments

Comments
 (0)