diff --git a/docs/changelog/126889.yaml b/docs/changelog/126889.yaml new file mode 100644 index 0000000000000..33d15e3f124ac --- /dev/null +++ b/docs/changelog/126889.yaml @@ -0,0 +1,6 @@ +pr: 126889 +summary: Rework uniquify to not use iterators +area: Infra/Core +type: bug +issues: + - 126883 diff --git a/server/src/main/java/org/elasticsearch/common/util/CollectionUtils.java b/server/src/main/java/org/elasticsearch/common/util/CollectionUtils.java index c2e8fceaa57be..7001f211d8a89 100644 --- a/server/src/main/java/org/elasticsearch/common/util/CollectionUtils.java +++ b/server/src/main/java/org/elasticsearch/common/util/CollectionUtils.java @@ -51,20 +51,24 @@ public static void uniquify(List list, Comparator cmp) { return; } - ListIterator uniqueItr = list.listIterator(); - ListIterator existingItr = list.listIterator(); - T uniqueValue = uniqueItr.next(); // get first element to compare with - existingItr.next(); // advance the existing iterator to the second element, where we will begin comparing + ListIterator resultItr = list.listIterator(); + ListIterator currentItr = list.listIterator(1); // start at second element to compare + T lastValue = resultItr.next(); // result always includes first element, so advance it and grab first do { - T existingValue = existingItr.next(); - if (cmp.compare(existingValue, uniqueValue) != 0 && (uniqueValue = uniqueItr.next()) != existingValue) { - uniqueItr.set(existingValue); + T currentValue = currentItr.next(); // each iter we check if the next element is different from the last we put in the result + if (cmp.compare(lastValue, currentValue) != 0) { + lastValue = currentValue; + resultItr.next(); // advance result so the current position is where we want to set + if (resultItr.previousIndex() != currentItr.previousIndex()) { + // optimization: only need to set if different + resultItr.set(currentValue); + } } - } while (existingItr.hasNext()); + } while (currentItr.hasNext()); // Lop off the rest of the list. Note with LinkedList this requires advancing back to this index, // but Java provides no way to efficiently remove from the end of a non random-access list. - list.subList(uniqueItr.nextIndex(), list.size()).clear(); + list.subList(resultItr.nextIndex(), list.size()).clear(); } /** diff --git a/server/src/test/java/org/elasticsearch/common/util/CollectionUtilsTests.java b/server/src/test/java/org/elasticsearch/common/util/CollectionUtilsTests.java index 3a76cee0d6240..6ac6659b1073e 100644 --- a/server/src/test/java/org/elasticsearch/common/util/CollectionUtilsTests.java +++ b/server/src/test/java/org/elasticsearch/common/util/CollectionUtilsTests.java @@ -65,7 +65,7 @@ private void assertUniquify(List list, Comparator cmp, int size) { for (List listCopy : List.of(new ArrayList(list), new LinkedList(list))) { CollectionUtils.uniquify(listCopy, cmp); for (int i = 0; i < listCopy.size() - 1; ++i) { - assertThat(cmp.compare(listCopy.get(i), listCopy.get(i + 1)), lessThan(0)); + assertThat(listCopy.get(i) + " < " + listCopy.get(i + 1), cmp.compare(listCopy.get(i), listCopy.get(i + 1)), lessThan(0)); } assertThat(listCopy.size(), equalTo(size)); } @@ -75,9 +75,25 @@ public void testUniquify() { assertUniquify(List.of(), Comparator.naturalOrder(), 0); assertUniquify(List.of(1), Comparator.naturalOrder(), 1); assertUniquify(List.of(1, 2, 3), Comparator.naturalOrder(), 3); + assertUniquify(List.of(1, 1, 3), Comparator.naturalOrder(), 2); assertUniquify(List.of(1, 1, 1), Comparator.naturalOrder(), 1); assertUniquify(List.of(1, 2, 2, 3), Comparator.naturalOrder(), 3); assertUniquify(List.of(1, 2, 2, 2), Comparator.naturalOrder(), 2); + assertUniquify(List.of(1, 2, 2, 3, 3, 5), Comparator.naturalOrder(), 4); + + for (int i = 0; i < 10; ++i) { + int uniqueItems = randomIntBetween(1, 10); + var list = new ArrayList(); + int next = 1; + for (int j = 0; j < uniqueItems; ++j) { + int occurences = randomIntBetween(1, 10); + while (occurences-- > 0) { + list.add(next); + } + next++; + } + assertUniquify(list, Comparator.naturalOrder(), uniqueItems); + } } public void testEmptyPartition() {