From 86b7f6a5e5b42334c78800618977a8e14e608273 Mon Sep 17 00:00:00 2001 From: Ryan Ernst Date: Wed, 16 Apr 2025 12:00:27 -0700 Subject: [PATCH] Fix uniquify to handle multiple successive duplicates (#126889) CollectionUtils.uniquify is based on C++ std::unique. However, C++ iterators are not quite the same as Java iterators. In particular, advancing them only allows grabbing the value once. This commit reworks uniquify to be based on list indices instead of iterators. closes #126883 --- docs/changelog/126889.yaml | 6 +++++ .../common/util/CollectionUtils.java | 22 +++++++++++-------- .../common/util/CollectionUtilsTests.java | 18 ++++++++++++++- 3 files changed, 36 insertions(+), 10 deletions(-) create mode 100644 docs/changelog/126889.yaml diff --git a/docs/changelog/126889.yaml b/docs/changelog/126889.yaml new file mode 100644 index 0000000000000..33d15e3f124ac --- /dev/null +++ b/docs/changelog/126889.yaml @@ -0,0 +1,6 @@ +pr: 126889 +summary: Rework uniquify to not use iterators +area: Infra/Core +type: bug +issues: + - 126883 diff --git a/server/src/main/java/org/elasticsearch/common/util/CollectionUtils.java b/server/src/main/java/org/elasticsearch/common/util/CollectionUtils.java index c2e8fceaa57be..7001f211d8a89 100644 --- a/server/src/main/java/org/elasticsearch/common/util/CollectionUtils.java +++ b/server/src/main/java/org/elasticsearch/common/util/CollectionUtils.java @@ -51,20 +51,24 @@ public static void uniquify(List list, Comparator cmp) { return; } - ListIterator uniqueItr = list.listIterator(); - ListIterator existingItr = list.listIterator(); - T uniqueValue = uniqueItr.next(); // get first element to compare with - existingItr.next(); // advance the existing iterator to the second element, where we will begin comparing + ListIterator resultItr = list.listIterator(); + ListIterator currentItr = list.listIterator(1); // start at second element to compare + T lastValue = resultItr.next(); // result always includes first element, so advance it and grab first do { - T existingValue = existingItr.next(); - if (cmp.compare(existingValue, uniqueValue) != 0 && (uniqueValue = uniqueItr.next()) != existingValue) { - uniqueItr.set(existingValue); + T currentValue = currentItr.next(); // each iter we check if the next element is different from the last we put in the result + if (cmp.compare(lastValue, currentValue) != 0) { + lastValue = currentValue; + resultItr.next(); // advance result so the current position is where we want to set + if (resultItr.previousIndex() != currentItr.previousIndex()) { + // optimization: only need to set if different + resultItr.set(currentValue); + } } - } while (existingItr.hasNext()); + } while (currentItr.hasNext()); // Lop off the rest of the list. Note with LinkedList this requires advancing back to this index, // but Java provides no way to efficiently remove from the end of a non random-access list. - list.subList(uniqueItr.nextIndex(), list.size()).clear(); + list.subList(resultItr.nextIndex(), list.size()).clear(); } /** diff --git a/server/src/test/java/org/elasticsearch/common/util/CollectionUtilsTests.java b/server/src/test/java/org/elasticsearch/common/util/CollectionUtilsTests.java index 3a76cee0d6240..6ac6659b1073e 100644 --- a/server/src/test/java/org/elasticsearch/common/util/CollectionUtilsTests.java +++ b/server/src/test/java/org/elasticsearch/common/util/CollectionUtilsTests.java @@ -65,7 +65,7 @@ private void assertUniquify(List list, Comparator cmp, int size) { for (List listCopy : List.of(new ArrayList(list), new LinkedList(list))) { CollectionUtils.uniquify(listCopy, cmp); for (int i = 0; i < listCopy.size() - 1; ++i) { - assertThat(cmp.compare(listCopy.get(i), listCopy.get(i + 1)), lessThan(0)); + assertThat(listCopy.get(i) + " < " + listCopy.get(i + 1), cmp.compare(listCopy.get(i), listCopy.get(i + 1)), lessThan(0)); } assertThat(listCopy.size(), equalTo(size)); } @@ -75,9 +75,25 @@ public void testUniquify() { assertUniquify(List.of(), Comparator.naturalOrder(), 0); assertUniquify(List.of(1), Comparator.naturalOrder(), 1); assertUniquify(List.of(1, 2, 3), Comparator.naturalOrder(), 3); + assertUniquify(List.of(1, 1, 3), Comparator.naturalOrder(), 2); assertUniquify(List.of(1, 1, 1), Comparator.naturalOrder(), 1); assertUniquify(List.of(1, 2, 2, 3), Comparator.naturalOrder(), 3); assertUniquify(List.of(1, 2, 2, 2), Comparator.naturalOrder(), 2); + assertUniquify(List.of(1, 2, 2, 3, 3, 5), Comparator.naturalOrder(), 4); + + for (int i = 0; i < 10; ++i) { + int uniqueItems = randomIntBetween(1, 10); + var list = new ArrayList(); + int next = 1; + for (int j = 0; j < uniqueItems; ++j) { + int occurences = randomIntBetween(1, 10); + while (occurences-- > 0) { + list.add(next); + } + next++; + } + assertUniquify(list, Comparator.naturalOrder(), uniqueItems); + } } public void testEmptyPartition() {