diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index a3dc9d595f1f..e60f2b1a5540 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -188,7 +188,9 @@ Improvements Optimizations --------------------- -(No changes) + +* GITHUB#12839: Introduce method to grow arrays up to a given upper limit and use it to reduce overallocation for + DirectoryTaxonomyReader#getBulkOrdinals. (Stefan Vodita) Bug Fixes --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java b/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java index 629e289ef4db..336b5b3e6bbb 100644 --- a/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java @@ -330,15 +330,36 @@ public static int[] growExact(int[] array, int newLength) { return copy; } + /** + * Returns an array whose size is at least {@code minLength}, generally over-allocating + * exponentially, but never allocating more than {@code maxLength} elements. + */ + public static int[] growInRange(int[] array, int minLength, int maxLength) { + assert minLength >= 0 + : "length must be positive (got " + minLength + "): likely integer overflow?"; + + if (minLength > maxLength) { + throw new IllegalArgumentException( + "requested minimum array length " + + minLength + + " is larger than requested maximum array length " + + maxLength); + } + + if (array.length >= minLength) { + return array; + } + + int potentialLength = oversize(minLength, Integer.BYTES); + return growExact(array, Math.min(maxLength, potentialLength)); + } + /** * Returns an array whose size is at least {@code minSize}, generally over-allocating * exponentially */ public static int[] grow(int[] array, int minSize) { - assert minSize >= 0 : "size must be positive (got " + minSize + "): likely integer overflow?"; - if (array.length < minSize) { - return growExact(array, oversize(minSize, Integer.BYTES)); - } else return array; + return growInRange(array, minSize, Integer.MAX_VALUE); } /** diff --git a/lucene/core/src/test/org/apache/lucene/util/TestArrayUtil.java b/lucene/core/src/test/org/apache/lucene/util/TestArrayUtil.java index abc41a5bf9fa..972fd0c3c848 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestArrayUtil.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestArrayUtil.java @@ -18,6 +18,8 @@ import static org.apache.lucene.util.ArrayUtil.copyOfSubArray; import static org.apache.lucene.util.ArrayUtil.growExact; +import static org.apache.lucene.util.ArrayUtil.growInRange; +import static org.apache.lucene.util.ArrayUtil.oversize; import java.util.Arrays; import java.util.Collections; @@ -371,6 +373,36 @@ public void testGrowExact() { () -> growExact(new String[] {"a", "b", "c"}, random().nextInt(3))); } + public void testGrowInRange() { + int[] array = new int[] {1, 2, 3}; + + // If minLength is negative, maxLength does not matter + expectThrows(AssertionError.class, () -> growInRange(array, -1, 4)); + expectThrows(AssertionError.class, () -> growInRange(array, -1, 0)); + expectThrows(AssertionError.class, () -> growInRange(array, -1, -1)); + + // If minLength > maxLength, we throw an exception + expectThrows(IllegalArgumentException.class, () -> growInRange(array, 1, 0)); + expectThrows(IllegalArgumentException.class, () -> growInRange(array, 4, 3)); + expectThrows(IllegalArgumentException.class, () -> growInRange(array, 5, 4)); + + // If minLength is sufficient, we return the array + assertSame(array, growInRange(array, 1, 4)); + assertSame(array, growInRange(array, 1, 2)); + assertSame(array, growInRange(array, 1, 1)); + + int minLength = 4; + int maxLength = Integer.MAX_VALUE; + + // The array grows normally if maxLength permits + assertEquals( + oversize(minLength, Integer.BYTES), + growInRange(new int[] {1, 2, 3}, minLength, maxLength).length); + + // The array grows to maxLength if maxLength is limiting + assertEquals(minLength, growInRange(new int[] {1, 2, 3}, minLength, minLength).length); + } + public void testCopyOfSubArray() { short[] shortArray = {1, 2, 3}; assertArrayEquals(new short[] {1}, copyOfSubArray(shortArray, 0, 1)); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java index bf4867e0dda8..0d641d396ec8 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java @@ -328,7 +328,8 @@ public int[] getBulkOrdinals(FacetLabel... categoryPaths) throws IOException { } // First try to find results in the cache: int[] result = new int[categoryPaths.length]; - int[] indexesMissingFromCache = new int[10]; // initial size, will grow when required + // Will grow when required, but never beyond categoryPaths.length + int[] indexesMissingFromCache = new int[Math.min(10, categoryPaths.length)]; int numberOfMissingFromCache = 0; FacetLabel cp; Integer res; @@ -352,7 +353,8 @@ public int[] getBulkOrdinals(FacetLabel... categoryPaths) throws IOException { } } else { indexesMissingFromCache = - ArrayUtil.grow(indexesMissingFromCache, numberOfMissingFromCache + 1); + ArrayUtil.growInRange( + indexesMissingFromCache, numberOfMissingFromCache + 1, categoryPaths.length); indexesMissingFromCache[numberOfMissingFromCache++] = i; } }