From 6924371952dfe17fd18b217c389c826d318302f2 Mon Sep 17 00:00:00 2001 From: Gautam Worah Date: Mon, 7 Jun 2021 10:46:26 -0700 Subject: [PATCH 01/11] lots of wip, merged in changes from upstream, test is failing, needs more work --- lucene/CHANGES.txt | 3 + .../facet/taxonomy/FloatTaxonomyFacets.java | 13 +- .../facet/taxonomy/IntTaxonomyFacets.java | 13 +- .../directory/DirectoryTaxonomyReader.java | 149 ++++++++++++++++-- .../facet/taxonomy/TestTaxonomyCombined.java | 6 +- .../directory/TestBackwardsCompatibility.java | 25 +++ .../TestDirectoryTaxonomyReader.java | 37 ++++- 7 files changed, 225 insertions(+), 21 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index a0848a75d5fd..b15fddb24497 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -130,6 +130,9 @@ API Changes Improvements +* LUCENE-9476: Add new getBulkPath API to DirectoryTaxonomyReader to more efficiently retrieve FacetLabels for multiple + facet ordinals at once (Gautam Worah, Mike McCandless) + * LUCENE-9960: Avoid unnecessary top element replacement for equal elements in PriorityQueue. (Dawid Weiss) * LUCENE-9687: Hunspell support improvements: add API for spell-checking and suggestions, support compound words, diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java index cde483d87f45..65a8869323e5 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java @@ -23,6 +23,7 @@ import org.apache.lucene.facet.FacetsConfig.DimConfig; import org.apache.lucene.facet.LabelAndValue; import org.apache.lucene.facet.TopOrdAndFloatQueue; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; /** Base class for all taxonomy-based facets that aggregate to a per-ords float[]. */ public abstract class FloatTaxonomyFacets extends TaxonomyFacets { @@ -146,10 +147,18 @@ public FacetResult getTopChildren(int topN, String dim, String... path) throws I } LabelAndValue[] labelValues = new LabelAndValue[q.size()]; + int[] ordinals = new int[labelValues.length]; + float[] values = new float[labelValues.length]; + for (int i = labelValues.length - 1; i >= 0; i--) { TopOrdAndFloatQueue.OrdAndValue ordAndValue = q.pop(); - FacetLabel child = taxoReader.getPath(ordAndValue.ord); - labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value); + ordinals[i] = ordAndValue.ord; + values[i] = ordAndValue.value; + } + + FacetLabel[] bulkPath = ((DirectoryTaxonomyReader) taxoReader).getBulkPath(ordinals); + for (int i = 0; i < labelValues.length; i++) { + labelValues[i] = new LabelAndValue(bulkPath[i].components[cp.length], values[i]); } return new FacetResult(dim, path, sumValues, labelValues, childCount); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java index 7d4e5d6149c0..aa153a00825a 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java @@ -27,6 +27,7 @@ import org.apache.lucene.facet.FacetsConfig.DimConfig; import org.apache.lucene.facet.LabelAndValue; import org.apache.lucene.facet.TopOrdAndIntQueue; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; /** Base class for all taxonomy-based facets that aggregate to a per-ords int[]. */ public abstract class IntTaxonomyFacets extends TaxonomyFacets { @@ -237,10 +238,18 @@ public FacetResult getTopChildren(int topN, String dim, String... path) throws I } LabelAndValue[] labelValues = new LabelAndValue[q.size()]; + int[] ordinals = new int[labelValues.length]; + int[] values = new int[labelValues.length]; + for (int i = labelValues.length - 1; i >= 0; i--) { TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop(); - FacetLabel child = taxoReader.getPath(ordAndValue.ord); - labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value); + ordinals[i] = ordAndValue.ord; + values[i] = ordAndValue.value; + } + + FacetLabel[] bulkPath = ((DirectoryTaxonomyReader) taxoReader).getBulkPath(ordinals); + for (int i = 0; i < labelValues.length; i++) { + labelValues[i] = new LabelAndValue(bulkPath[i].components[cp.length], values[i]); } return new FacetResult(dim, path, totValue, labelValues, childCount); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java index ea38d8c24d7e..79776460de21 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java @@ -18,10 +18,12 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.function.IntUnaryOperator; import java.util.logging.Level; import java.util.logging.Logger; import org.apache.lucene.document.Document; @@ -35,6 +37,7 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiTerms; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.ReaderUtil; @@ -44,6 +47,7 @@ import org.apache.lucene.util.Accountables; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.InPlaceMergeSorter; import org.apache.lucene.util.RamUsageEstimator; /** @@ -318,23 +322,17 @@ public FacetLabel getPath(int ordinal) throws IOException { // doOpenIfChanged, we need to ensure that the ordinal is one that this DTR // instance recognizes. Therefore we do this check up front, before we hit // the cache. - if (ordinal < 0 || ordinal >= indexReader.maxDoc()) { - return null; - } + int indexReaderMaxDoc = indexReader.maxDoc(); + checkOrdinalBounds(ordinal, indexReaderMaxDoc); - // TODO: can we use an int-based hash impl, such as IntToObjectMap, - // wrapped as LRU? - Integer catIDInteger = Integer.valueOf(ordinal); - synchronized (categoryCache) { - FacetLabel res = categoryCache.get(catIDInteger); - if (res != null) { - return res; - } + FacetLabel ordinalPath = getPathFromCache(ordinal); + + if (ordinalPath != null) { + return ordinalPath; } int readerIndex = ReaderUtil.subIndex(ordinal, indexReader.leaves()); LeafReader leafReader = indexReader.leaves().get(readerIndex).reader(); - // TODO: Use LUCENE-9476 to get the bulk lookup API for extracting BinaryDocValues BinaryDocValues values = leafReader.getBinaryDocValues(Consts.FULL); FacetLabel ret; @@ -351,12 +349,137 @@ public FacetLabel getPath(int ordinal) throws IOException { } synchronized (categoryCache) { - categoryCache.put(catIDInteger, ret); + categoryCache.put(ordinal, ret); } return ret; } + private FacetLabel getPathFromCache(int ordinal) { + // TODO: can we use an int-based hash impl, such as IntToObjectMap, + // wrapped as LRU? + synchronized (categoryCache) { + return categoryCache.get(ordinal); + } + } + + private void checkOrdinalBounds(int ordinal, int indexReaderMaxDoc) + throws IllegalArgumentException { + if (ordinal < 0 || ordinal >= indexReaderMaxDoc) { + throw new IllegalArgumentException( + "ordinal " + + ordinal + + " is out of the range of the indexReader " + + indexReader.toString()); + } + } + + /** + * Returns an array of FacetLabels for a given array of ordinals. + * + *

This API is generally faster than iteratively calling {@link #getPath(int)} over an array of + * ordinals. It uses the {@link #getPath(int)} method iteratively when it detects that the index + * was created using StoredFields (with no performance gains) and uses DocValues based iteration + * when the index is based on DocValues. + * + * @param ordinals Array of ordinals that are assigned to categories inserted into the taxonomy + * index + */ + public FacetLabel[] getBulkPath(int... ordinals) throws IOException { + ensureOpen(); + + int ordinalsLength = ordinals.length; + FacetLabel[] bulkPath = new FacetLabel[ordinalsLength]; + // remember the original positions of ordinals before they are sorted + int originalPosition[] = new int[ordinalsLength]; + Arrays.setAll(originalPosition, IntUnaryOperator.identity()); + int indexReaderMaxDoc = indexReader.maxDoc(); + + for (int i = 0; i < ordinalsLength; i++) { + // check whether the ordinal is valid before accessing the cache + checkOrdinalBounds(ordinals[i], indexReaderMaxDoc); + // check the cache before trying to find it in the index + FacetLabel ordinalPath = getPathFromCache(ordinals[i]); + if (ordinalPath != null) { + bulkPath[i] = ordinalPath; + } + } + + // parallel sort the ordinals and originalPosition array based on the values in the ordinals + // array + new InPlaceMergeSorter() { + @Override + protected void swap(int i, int j) { + int x = ordinals[i]; + ordinals[i] = ordinals[j]; + ordinals[j] = x; + + x = originalPosition[i]; + originalPosition[i] = originalPosition[j]; + originalPosition[j] = x; + } + ; + + @Override + public int compare(int i, int j) { + return Integer.compare(ordinals[i], ordinals[j]); + } + }.sort(0, ordinalsLength); + + int readerIndex; + int leafReaderMaxDoc = 0; + int leafReaderDocBase = 0; + LeafReader leafReader; + LeafReaderContext leafReaderContext; + BinaryDocValues values = null; + + for (int i = 0; i < ordinalsLength; i++) { + if (bulkPath[originalPosition[i]] == null) { + if (values == null || ordinals[i] >= leafReaderMaxDoc) { + + readerIndex = ReaderUtil.subIndex(ordinals[i], indexReader.leaves()); + leafReaderContext = indexReader.leaves().get(readerIndex); + leafReader = leafReaderContext.reader(); + leafReaderMaxDoc = leafReader.maxDoc(); + leafReaderDocBase = leafReaderContext.docBase; + values = leafReader.getBinaryDocValues(Consts.FULL); + + // this check is only needed once to confirm that the index uses BinaryDocValues + boolean success = values.advanceExact(ordinals[i] - leafReaderDocBase); + if (success == false) { + return getBulkPathForOlderIndexes(ordinals); + } + } + boolean success = values.advanceExact(ordinals[i] - leafReaderDocBase); + assert success; + bulkPath[originalPosition[i]] = + new FacetLabel(FacetsConfig.stringToPath(values.binaryValue().utf8ToString())); + } + } + + for (int i = 0; i < ordinalsLength; i++) { + synchronized (categoryCache) { + categoryCache.put(ordinals[i], bulkPath[originalPosition[i]]); + } + } + + return bulkPath; + } + + /** + * This function is only used when the underlying taxonomy index was constructed using an older + * (slower) StoredFields based codec (< 8.7). The {@link #getBulkPath(int...)} function calls it + * internally when it realizes that the index uses StoredFields. + */ + private FacetLabel[] getBulkPathForOlderIndexes(int... ordinals) throws IOException { + FacetLabel[] bulkPath = new FacetLabel[ordinals.length]; + for (int i = 0; i < ordinals.length; i++) { + bulkPath[i] = getPath(ordinals[i]); + } + + return bulkPath; + } + @Override public int getSize() { ensureOpen(); diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java index 9f3ca9ef84e7..37e3755fa4a6 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java @@ -366,9 +366,9 @@ public void testReaderBasic() throws Exception { } } // (also test invalid ordinals:) - assertNull(tr.getPath(-1)); - assertNull(tr.getPath(tr.getSize())); - assertNull(tr.getPath(TaxonomyReader.INVALID_ORDINAL)); + expectThrows(IllegalArgumentException.class, () -> tr.getPath(-1)); + expectThrows(IllegalArgumentException.class, () -> tr.getPath(tr.getSize())); + expectThrows(IllegalArgumentException.class, () -> tr.getPath(TaxonomyReader.INVALID_ORDINAL)); // test TaxonomyReader.getOrdinal(): for (int i = 1; i < expectedCategories.length; i++) { diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java index 05142b8613b9..cf59966a8e53 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java @@ -89,6 +89,31 @@ private void createNewTaxonomyIndex(String dirName) throws IOException { dir.close(); } + // Opens up a pre-existing index and tries to run getBulkPath on it + public void testGetBulkPathOnOlderCodec() throws Exception { + Path indexDir = createTempDir(oldTaxonomyIndexName); + TestUtil.unzip(getDataInputStream(oldTaxonomyIndexName + ".zip"), indexDir); + Directory dir = newFSDirectory(indexDir); + + DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir); + + FacetLabel cp_b = new FacetLabel("b"); + writer.addCategory(cp_b); + writer.getInternalIndexWriter().forceMerge(1); + writer.commit(); + + DirectoryTaxonomyReader reader = new DirectoryTaxonomyReader(writer); + + FacetLabel[] facetLabels = {new FacetLabel("a"), cp_b}; + int[] ordinals = + new int[] {reader.getOrdinal(facetLabels[0]), reader.getOrdinal(facetLabels[1])}; + // The zip file already contains a category "a" stored with the older StoredFields codec + assertArrayEquals(facetLabels, reader.getBulkPath(ordinals)); + reader.close(); + writer.close(); + dir.close(); + } + // Used to create a fresh taxonomy index with StoredFields @Ignore public void testCreateOldTaxonomy() throws IOException { diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java index c9338b417944..c9c91e354a36 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java @@ -413,7 +413,7 @@ public void testOpenIfChangedReuse() throws Exception { // check that r1 doesn't see cp_b assertEquals(TaxonomyReader.INVALID_ORDINAL, r1.getOrdinal(cp_b)); - assertNull(r1.getPath(2)); + expectThrows(IllegalArgumentException.class, () -> r1.getPath(2)); r1.close(); r2.close(); @@ -567,4 +567,39 @@ public void testAccountable() throws Exception { taxoReader.close(); dir.close(); } + + public void testCallingBulkPathReturnsCorrectResult() throws Exception { + float PROBABILITY_OF_COMMIT = 0.5f; + Directory src = newDirectory(); + DirectoryTaxonomyWriter w = new DirectoryTaxonomyWriter(src); + String randomArray[] = new String[random().nextInt(1000)]; + // adding a smaller bound on ints ensures that we will have some duplicate ordinals in random + // test cases + Arrays.setAll(randomArray, i -> Integer.toString(random().nextInt(500))); + + FacetLabel allPaths[] = new FacetLabel[randomArray.length]; + int allOrdinals[] = new int[randomArray.length]; + + for (int i = 0; i < randomArray.length; i++) { + allPaths[i] = new FacetLabel(randomArray[i]); + w.addCategory(allPaths[i]); + // add random commits to create multiple segments in the index + if (random().nextFloat() < PROBABILITY_OF_COMMIT) { + w.commit(); + } + } + w.commit(); + w.close(); + + DirectoryTaxonomyReader r1 = new DirectoryTaxonomyReader(src); + + for (int i = 0; i < allPaths.length; i++) { + allOrdinals[i] = r1.getOrdinal(allPaths[i]); + } + + FacetLabel allBulkPaths[] = r1.getBulkPath(allOrdinals); + assertArrayEquals(allPaths, allBulkPaths); + r1.close(); + src.close(); + } } From 7b44f1fd5c6f13c801040d3b86e264a4f32fb0a5 Mon Sep 17 00:00:00 2001 From: Gautam Worah Date: Tue, 8 Jun 2021 18:54:39 -0700 Subject: [PATCH 02/11] Logic working correctly, only benchmarks remaining, backwards compat working --- .../directory/DirectoryTaxonomyReader.java | 21 +++++++++---------- .../directory/TestBackwardsCompatibility.java | 11 ++-------- 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java index 79776460de21..ad683345fb07 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java @@ -391,7 +391,7 @@ public FacetLabel[] getBulkPath(int... ordinals) throws IOException { int ordinalsLength = ordinals.length; FacetLabel[] bulkPath = new FacetLabel[ordinalsLength]; // remember the original positions of ordinals before they are sorted - int originalPosition[] = new int[ordinalsLength]; + int[] originalPosition = new int[ordinalsLength]; Arrays.setAll(originalPosition, IntUnaryOperator.identity()); int indexReaderMaxDoc = indexReader.maxDoc(); @@ -405,8 +405,7 @@ public FacetLabel[] getBulkPath(int... ordinals) throws IOException { } } - // parallel sort the ordinals and originalPosition array based on the values in the ordinals - // array + /* parallel sort the ordinals and originalPosition array based on the values in the ordinals array */ new InPlaceMergeSorter() { @Override protected void swap(int i, int j) { @@ -444,9 +443,10 @@ public int compare(int i, int j) { leafReaderDocBase = leafReaderContext.docBase; values = leafReader.getBinaryDocValues(Consts.FULL); - // this check is only needed once to confirm that the index uses BinaryDocValues - boolean success = values.advanceExact(ordinals[i] - leafReaderDocBase); - if (success == false) { + /* + If the index is constructed with the older StoredFields it will not have any BinaryDocValues field and will return null + */ + if (values == null) { return getBulkPathForOlderIndexes(ordinals); } } @@ -454,12 +454,11 @@ public int compare(int i, int j) { assert success; bulkPath[originalPosition[i]] = new FacetLabel(FacetsConfig.stringToPath(values.binaryValue().utf8ToString())); - } - } - for (int i = 0; i < ordinalsLength; i++) { - synchronized (categoryCache) { - categoryCache.put(ordinals[i], bulkPath[originalPosition[i]]); + // add the value to the categoryCache after computation + synchronized (categoryCache) { + categoryCache.put(ordinals[i], bulkPath[originalPosition[i]]); + } } } diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java index cf59966a8e53..192ca7bc0781 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java @@ -96,17 +96,10 @@ public void testGetBulkPathOnOlderCodec() throws Exception { Directory dir = newFSDirectory(indexDir); DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir); - - FacetLabel cp_b = new FacetLabel("b"); - writer.addCategory(cp_b); - writer.getInternalIndexWriter().forceMerge(1); - writer.commit(); - DirectoryTaxonomyReader reader = new DirectoryTaxonomyReader(writer); - FacetLabel[] facetLabels = {new FacetLabel("a"), cp_b}; - int[] ordinals = - new int[] {reader.getOrdinal(facetLabels[0]), reader.getOrdinal(facetLabels[1])}; + FacetLabel[] facetLabels = {new FacetLabel("a")}; + int[] ordinals = new int[] {reader.getOrdinal(facetLabels[0])}; // The zip file already contains a category "a" stored with the older StoredFields codec assertArrayEquals(facetLabels, reader.getBulkPath(ordinals)); reader.close(); From f6288960e7876e06c2afba9e95ce9a5f81cba4b7 Mon Sep 17 00:00:00 2001 From: Gautam Worah Date: Tue, 8 Jun 2021 18:56:43 -0700 Subject: [PATCH 03/11] Add minor comment --- .../facet/taxonomy/directory/DirectoryTaxonomyReader.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java index ad683345fb07..bc87cf0a51ad 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java @@ -434,6 +434,9 @@ public int compare(int i, int j) { for (int i = 0; i < ordinalsLength; i++) { if (bulkPath[originalPosition[i]] == null) { + /* + If ordinals[i] >= leafReaderMaxDoc then we find the next leaf that contains our ordinal + */ if (values == null || ordinals[i] >= leafReaderMaxDoc) { readerIndex = ReaderUtil.subIndex(ordinals[i], indexReader.leaves()); From 11fd008341af2f91ab2c1b0638b282ecd253b6d9 Mon Sep 17 00:00:00 2001 From: Gautam Worah Date: Wed, 7 Jul 2021 17:32:14 -0700 Subject: [PATCH 04/11] Fixed the bug of not adding leafReaderDocBase to leafReaderMaxDoc --- .../directory/DirectoryTaxonomyReader.java | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java index bc87cf0a51ad..c25eb8e96dc5 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java @@ -322,8 +322,7 @@ public FacetLabel getPath(int ordinal) throws IOException { // doOpenIfChanged, we need to ensure that the ordinal is one that this DTR // instance recognizes. Therefore we do this check up front, before we hit // the cache. - int indexReaderMaxDoc = indexReader.maxDoc(); - checkOrdinalBounds(ordinal, indexReaderMaxDoc); + checkOrdinalBounds(ordinal); FacetLabel ordinalPath = getPathFromCache(ordinal); @@ -363,14 +362,14 @@ private FacetLabel getPathFromCache(int ordinal) { } } - private void checkOrdinalBounds(int ordinal, int indexReaderMaxDoc) + private void checkOrdinalBounds(int ordinal) throws IllegalArgumentException { - if (ordinal < 0 || ordinal >= indexReaderMaxDoc) { + if (ordinal < 0 || ordinal >= indexReader.maxDoc()) { throw new IllegalArgumentException( "ordinal " + ordinal + " is out of the range of the indexReader " - + indexReader.toString()); + + indexReader.toString() + ". The maximum possible ordinal number is " + (indexReader.maxDoc() - 1)); } } @@ -380,7 +379,7 @@ private void checkOrdinalBounds(int ordinal, int indexReaderMaxDoc) *

This API is generally faster than iteratively calling {@link #getPath(int)} over an array of * ordinals. It uses the {@link #getPath(int)} method iteratively when it detects that the index * was created using StoredFields (with no performance gains) and uses DocValues based iteration - * when the index is based on DocValues. + * when the index is based on BinaryDocValues. Lucene switched to BinaryDocValues in version 9.0 * * @param ordinals Array of ordinals that are assigned to categories inserted into the taxonomy * index @@ -393,11 +392,10 @@ public FacetLabel[] getBulkPath(int... ordinals) throws IOException { // remember the original positions of ordinals before they are sorted int[] originalPosition = new int[ordinalsLength]; Arrays.setAll(originalPosition, IntUnaryOperator.identity()); - int indexReaderMaxDoc = indexReader.maxDoc(); for (int i = 0; i < ordinalsLength; i++) { // check whether the ordinal is valid before accessing the cache - checkOrdinalBounds(ordinals[i], indexReaderMaxDoc); + checkOrdinalBounds(ordinals[i]); // check the cache before trying to find it in the index FacetLabel ordinalPath = getPathFromCache(ordinals[i]); if (ordinalPath != null) { @@ -416,8 +414,7 @@ protected void swap(int i, int j) { x = originalPosition[i]; originalPosition[i] = originalPosition[j]; originalPosition[j] = x; - } - ; + }; @Override public int compare(int i, int j) { @@ -435,9 +432,11 @@ public int compare(int i, int j) { for (int i = 0; i < ordinalsLength; i++) { if (bulkPath[originalPosition[i]] == null) { /* - If ordinals[i] >= leafReaderMaxDoc then we find the next leaf that contains our ordinal + If ordinals[i] >= leafReaderDocBase + leafReaderMaxDoc then we find the next leaf that contains our ordinal. + Remember: ordinals[i] operates in the global ordinal space and hence we add leafReaderDocBase to the leafReaderMaxDoc + (which is the maxDoc of the specific leaf) */ - if (values == null || ordinals[i] >= leafReaderMaxDoc) { + if (values == null || ordinals[i] >= leafReaderDocBase + leafReaderMaxDoc) { readerIndex = ReaderUtil.subIndex(ordinals[i], indexReader.leaves()); leafReaderContext = indexReader.leaves().get(readerIndex); From 1cc34a86090d3c09148547a29252613c3f924263 Mon Sep 17 00:00:00 2001 From: Gautam Worah Date: Wed, 7 Jul 2021 18:36:54 -0700 Subject: [PATCH 05/11] Spotless fix --- .../taxonomy/directory/DirectoryTaxonomyReader.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java index c25eb8e96dc5..13d974f16648 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java @@ -362,14 +362,15 @@ private FacetLabel getPathFromCache(int ordinal) { } } - private void checkOrdinalBounds(int ordinal) - throws IllegalArgumentException { + private void checkOrdinalBounds(int ordinal) throws IllegalArgumentException { if (ordinal < 0 || ordinal >= indexReader.maxDoc()) { throw new IllegalArgumentException( "ordinal " + ordinal + " is out of the range of the indexReader " - + indexReader.toString() + ". The maximum possible ordinal number is " + (indexReader.maxDoc() - 1)); + + indexReader.toString() + + ". The maximum possible ordinal number is " + + (indexReader.maxDoc() - 1)); } } @@ -414,7 +415,8 @@ protected void swap(int i, int j) { x = originalPosition[i]; originalPosition[i] = originalPosition[j]; originalPosition[j] = x; - }; + } + ; @Override public int compare(int i, int j) { From 2f4d4baebe990d28692a40dc145fe518dd8b9483 Mon Sep 17 00:00:00 2001 From: Gautam Worah Date: Mon, 16 Aug 2021 16:09:16 -0700 Subject: [PATCH 06/11] small typo fix --- .../lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java | 1 - 1 file changed, 1 deletion(-) diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java index 13d974f16648..8c50c7fd85bd 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java @@ -416,7 +416,6 @@ protected void swap(int i, int j) { originalPosition[i] = originalPosition[j]; originalPosition[j] = x; } - ; @Override public int compare(int i, int j) { From 577ff89df6107d2a310bafd01b53d27fbd2c06e9 Mon Sep 17 00:00:00 2001 From: Gautam Worah Date: Tue, 17 Aug 2021 19:55:19 -0700 Subject: [PATCH 07/11] Merge from main, update back compat test with latest changes --- .../taxonomy/directory/TestBackwardsCompatibility.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java index 7f70b8caf5cf..a702ecec7230 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java @@ -98,9 +98,13 @@ public void testGetBulkPathOnOlderCodec() throws Exception { DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir); DirectoryTaxonomyReader reader = new DirectoryTaxonomyReader(writer); - FacetLabel[] facetLabels = {new FacetLabel("a")}; - int[] ordinals = new int[] {reader.getOrdinal(facetLabels[0])}; - // The zip file already contains a category "a" stored with the older StoredFields codec + FacetLabel[] facetLabels = { + new FacetLabel("a"), new FacetLabel("b"), + }; + int[] ordinals = + new int[] {reader.getOrdinal(facetLabels[0]), reader.getOrdinal(facetLabels[1])}; + // The zip file already contains a category "a" and a category "b" stored with the older + // StoredFields codec assertArrayEquals(facetLabels, reader.getBulkPath(ordinals)); reader.close(); writer.close(); From 4cd890c4296799022170a3bee9c94e53ff3317bd Mon Sep 17 00:00:00 2001 From: Gautam Worah Date: Thu, 26 Aug 2021 15:24:24 -0700 Subject: [PATCH 08/11] Moved ALL caching operations to be `bulk` in nature (to reduce thread contention). Modified the test case to be super random (threads, iterations etc). Modified TaxonomyReader with a getBulkPath method that is overridden in DirTaxonomyReader. Improved documentation, improved method signatures to use the newer (...) style syntax, fixed the CHANGES entry --- lucene/CHANGES.txt | 6 +- .../lucene/facet/taxonomy/TaxonomyReader.java | 13 +++ .../directory/DirectoryTaxonomyReader.java | 82 +++++++++---------- .../TestDirectoryTaxonomyReader.java | 47 ++++++++++- 4 files changed, 100 insertions(+), 48 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index c7c5691b1c3d..5d95d931b562 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -137,9 +137,6 @@ API Changes Improvements -* LUCENE-9476: Add new getBulkPath API to DirectoryTaxonomyReader to more efficiently retrieve FacetLabels for multiple - facet ordinals at once (Gautam Worah, Mike McCandless) - * LUCENE-9960: Avoid unnecessary top element replacement for equal elements in PriorityQueue. (Dawid Weiss) * LUCENE-9633: Improve match highlighter behavior for degenerate intervals (on non-existing positions). @@ -253,6 +250,9 @@ Improvements * LUCENE-10019: Align file starts in CFS files to have proper alignment (8 bytes) (Uwe Schinder) +* LUCENE-9476: Add new getBulkPath API to DirectoryTaxonomyReader to more efficiently retrieve FacetLabels for multiple + facet ordinals at once (Gautam Worah, Mike McCandless). This API is 2-4% faster than iteratively calling getPath + Bug fixes * LUCENE-9686: Fix read past EOF handling in DirectIODirectory. (Zach Chen, diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java index 4a64a696e032..88ca924b2555 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java @@ -212,6 +212,19 @@ public int getOrdinal(String dim, String... path) throws IOException { /** Returns the path name of the category with the given ordinal. */ public abstract FacetLabel getPath(int ordinal) throws IOException; + /** + * Returns the path names of the list of ordinals associated with different categories. The + * implementation in {@link org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader} is + * generally faster than iteratively calling {@link #getPath(int)} + */ + public FacetLabel[] getBulkPath(int... ordinals) throws IOException { + FacetLabel[] facetLabels = new FacetLabel[ordinals.length]; + for (int i = 0; i < ordinals.length; i++) { + facetLabels[i] = getPath(ordinals[i]); + } + return facetLabels; + } + /** Returns the current refCount for this taxonomy reader. */ public final int getRefCount() { return refCount.get(); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java index 8c50c7fd85bd..844924ef7cc6 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java @@ -324,10 +324,10 @@ public FacetLabel getPath(int ordinal) throws IOException { // the cache. checkOrdinalBounds(ordinal); - FacetLabel ordinalPath = getPathFromCache(ordinal); + FacetLabel[] ordinalPath = getPathFromCache(ordinal); - if (ordinalPath != null) { - return ordinalPath; + if (ordinalPath[0] != null) { + return ordinalPath[0]; } int readerIndex = ReaderUtil.subIndex(ordinal, indexReader.leaves()); @@ -354,23 +354,37 @@ public FacetLabel getPath(int ordinal) throws IOException { return ret; } - private FacetLabel getPathFromCache(int ordinal) { - // TODO: can we use an int-based hash impl, such as IntToObjectMap, + private FacetLabel[] getPathFromCache(int... ordinals) { + FacetLabel[] facetLabels = new FacetLabel[ordinals.length]; + // TODO LUCENE-10068: can we use an int-based hash impl, such as IntToObjectMap, // wrapped as LRU? synchronized (categoryCache) { - return categoryCache.get(ordinal); + for (int i = 0; i < ordinals.length; i++) { + facetLabels[i] = categoryCache.get(ordinals[i]); + } } + return facetLabels; } - private void checkOrdinalBounds(int ordinal) throws IllegalArgumentException { - if (ordinal < 0 || ordinal >= indexReader.maxDoc()) { - throw new IllegalArgumentException( - "ordinal " - + ordinal - + " is out of the range of the indexReader " - + indexReader.toString() - + ". The maximum possible ordinal number is " - + (indexReader.maxDoc() - 1)); + /** + * Checks if the ordinals in the array are >=0 and < {@code + * DirectoryTaxonomyReader#indexReader.maxDoc()} + * + * @param ordinals Integer array of ordinals + * @throws IllegalArgumentException Throw an IllegalArgumentException if one of the ordinals is + * out of bounds + */ + private void checkOrdinalBounds(int... ordinals) throws IllegalArgumentException { + for (int ordinal : ordinals) { + if (ordinal < 0 || ordinal >= indexReader.maxDoc()) { + throw new IllegalArgumentException( + "ordinal " + + ordinal + + " is out of the range of the indexReader " + + indexReader.toString() + + ". The maximum possible ordinal number is " + + (indexReader.maxDoc() - 1)); + } } } @@ -385,8 +399,10 @@ private void checkOrdinalBounds(int ordinal) throws IllegalArgumentException { * @param ordinals Array of ordinals that are assigned to categories inserted into the taxonomy * index */ + @Override public FacetLabel[] getBulkPath(int... ordinals) throws IOException { ensureOpen(); + checkOrdinalBounds(ordinals); int ordinalsLength = ordinals.length; FacetLabel[] bulkPath = new FacetLabel[ordinalsLength]; @@ -394,15 +410,7 @@ public FacetLabel[] getBulkPath(int... ordinals) throws IOException { int[] originalPosition = new int[ordinalsLength]; Arrays.setAll(originalPosition, IntUnaryOperator.identity()); - for (int i = 0; i < ordinalsLength; i++) { - // check whether the ordinal is valid before accessing the cache - checkOrdinalBounds(ordinals[i]); - // check the cache before trying to find it in the index - FacetLabel ordinalPath = getPathFromCache(ordinals[i]); - if (ordinalPath != null) { - bulkPath[i] = ordinalPath; - } - } + getPathFromCache(ordinals); /* parallel sort the ordinals and originalPosition array based on the values in the ordinals array */ new InPlaceMergeSorter() { @@ -429,6 +437,7 @@ public int compare(int i, int j) { LeafReader leafReader; LeafReaderContext leafReaderContext; BinaryDocValues values = null; + List uncachedOrdinalPositions = new ArrayList<>(); for (int i = 0; i < ordinalsLength; i++) { if (bulkPath[originalPosition[i]] == null) { @@ -450,33 +459,24 @@ public int compare(int i, int j) { If the index is constructed with the older StoredFields it will not have any BinaryDocValues field and will return null */ if (values == null) { - return getBulkPathForOlderIndexes(ordinals); + return super.getBulkPath(ordinals); } } + // values is leaf specific so you only advance till you reach the target within the leaf boolean success = values.advanceExact(ordinals[i] - leafReaderDocBase); assert success; bulkPath[originalPosition[i]] = new FacetLabel(FacetsConfig.stringToPath(values.binaryValue().utf8ToString())); - // add the value to the categoryCache after computation - synchronized (categoryCache) { - categoryCache.put(ordinals[i], bulkPath[originalPosition[i]]); - } + uncachedOrdinalPositions.add(i); } } - return bulkPath; - } - - /** - * This function is only used when the underlying taxonomy index was constructed using an older - * (slower) StoredFields based codec (< 8.7). The {@link #getBulkPath(int...)} function calls it - * internally when it realizes that the index uses StoredFields. - */ - private FacetLabel[] getBulkPathForOlderIndexes(int... ordinals) throws IOException { - FacetLabel[] bulkPath = new FacetLabel[ordinals.length]; - for (int i = 0; i < ordinals.length; i++) { - bulkPath[i] = getPath(ordinals[i]); + synchronized (categoryCache) { + for (int i : uncachedOrdinalPositions) { + // add the value to the categoryCache after computation + categoryCache.put(ordinals[i], bulkPath[originalPosition[i]]); + } } return bulkPath; diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java index c9c91e354a36..9103e95a925d 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java @@ -569,7 +569,6 @@ public void testAccountable() throws Exception { } public void testCallingBulkPathReturnsCorrectResult() throws Exception { - float PROBABILITY_OF_COMMIT = 0.5f; Directory src = newDirectory(); DirectoryTaxonomyWriter w = new DirectoryTaxonomyWriter(src); String randomArray[] = new String[random().nextInt(1000)]; @@ -584,7 +583,7 @@ public void testCallingBulkPathReturnsCorrectResult() throws Exception { allPaths[i] = new FacetLabel(randomArray[i]); w.addCategory(allPaths[i]); // add random commits to create multiple segments in the index - if (random().nextFloat() < PROBABILITY_OF_COMMIT) { + if (random().nextBoolean()) { w.commit(); } } @@ -597,8 +596,48 @@ public void testCallingBulkPathReturnsCorrectResult() throws Exception { allOrdinals[i] = r1.getOrdinal(allPaths[i]); } - FacetLabel allBulkPaths[] = r1.getBulkPath(allOrdinals); - assertArrayEquals(allPaths, allBulkPaths); + // create multiple threads to check result correctness and thread contention in the cache + Thread[] addThreads = new Thread[atLeast(4)]; + for (int z = 0; z < addThreads.length; z++) { + addThreads[z] = + new Thread() { + @Override + public void run() { + // each thread iterates for maxThreadIterations times + int maxThreadIterations = random().nextInt(10); + for (int threadIterations = 0; + threadIterations < maxThreadIterations; + threadIterations++) { + + // length of the FacetLabel array that we are going to check + int numOfOrdinalsToCheck = random().nextInt(allOrdinals.length); + int[] ordinals = new int[numOfOrdinalsToCheck]; + FacetLabel[] path = new FacetLabel[numOfOrdinalsToCheck]; + + for (int i = 0; i < numOfOrdinalsToCheck; i++) { + // we deliberately allow it to choose repeat indexes as this will exercise the + // cache + int ordinalIndex = random().nextInt(allOrdinals.length); + ordinals[i] = allOrdinals[ordinalIndex]; + path[i] = allPaths[ordinalIndex]; + } + + try { + // main check for correctness is done here + assertArrayEquals(path, r1.getBulkPath(ordinals)); + } catch (IOException e) { + // this should ideally never occur, but if it does just rethrow the error to the + // caller + throw new RuntimeException(e); + } + } + } + }; + } + + for (Thread t : addThreads) t.start(); + for (Thread t : addThreads) t.join(); + r1.close(); src.close(); } From a9b66f24fa5d6b0552438e53cdd5ce7e9608c59e Mon Sep 17 00:00:00 2001 From: Gautam Worah Date: Thu, 26 Aug 2021 15:45:31 -0700 Subject: [PATCH 09/11] Remove casting from IntTaxonomyFacets and FloatTaxonomyFacets --- .../org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java | 3 +-- .../org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java index 65a8869323e5..ec7f307b99c8 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java @@ -23,7 +23,6 @@ import org.apache.lucene.facet.FacetsConfig.DimConfig; import org.apache.lucene.facet.LabelAndValue; import org.apache.lucene.facet.TopOrdAndFloatQueue; -import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; /** Base class for all taxonomy-based facets that aggregate to a per-ords float[]. */ public abstract class FloatTaxonomyFacets extends TaxonomyFacets { @@ -156,7 +155,7 @@ public FacetResult getTopChildren(int topN, String dim, String... path) throws I values[i] = ordAndValue.value; } - FacetLabel[] bulkPath = ((DirectoryTaxonomyReader) taxoReader).getBulkPath(ordinals); + FacetLabel[] bulkPath = taxoReader.getBulkPath(ordinals); for (int i = 0; i < labelValues.length; i++) { labelValues[i] = new LabelAndValue(bulkPath[i].components[cp.length], values[i]); } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java index 1816093e277b..3f1dc17dece7 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java @@ -27,7 +27,6 @@ import org.apache.lucene.facet.FacetsConfig.DimConfig; import org.apache.lucene.facet.LabelAndValue; import org.apache.lucene.facet.TopOrdAndIntQueue; -import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; /** Base class for all taxonomy-based facets that aggregate to a per-ords int[]. */ public abstract class IntTaxonomyFacets extends TaxonomyFacets { @@ -247,7 +246,7 @@ public FacetResult getTopChildren(int topN, String dim, String... path) throws I values[i] = ordAndValue.value; } - FacetLabel[] bulkPath = ((DirectoryTaxonomyReader) taxoReader).getBulkPath(ordinals); + FacetLabel[] bulkPath = taxoReader.getBulkPath(ordinals); for (int i = 0; i < labelValues.length; i++) { labelValues[i] = new LabelAndValue(bulkPath[i].components[cp.length], values[i]); } From a257c36210ec76c045678f170f9a25cd00f38635 Mon Sep 17 00:00:00 2001 From: Gautam Worah Date: Fri, 27 Aug 2021 11:03:56 -0700 Subject: [PATCH 10/11] Minor fixes --- .../apache/lucene/facet/taxonomy/TaxonomyReader.java | 8 +++++--- .../taxonomy/directory/DirectoryTaxonomyReader.java | 10 ++++++---- .../directory/TestDirectoryTaxonomyReader.java | 9 +++++---- .../directory/TestDirectoryTaxonomyWriter.java | 3 ++- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java index 88ca924b2555..50c23e72019b 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java @@ -213,9 +213,11 @@ public int getOrdinal(String dim, String... path) throws IOException { public abstract FacetLabel getPath(int ordinal) throws IOException; /** - * Returns the path names of the list of ordinals associated with different categories. The - * implementation in {@link org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader} is - * generally faster than iteratively calling {@link #getPath(int)} + * Returns the path names of the list of ordinals associated with different categories. + * + *

The implementation in {@link + * org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader} is generally faster than + * the default implementation which iteratively calls {@link #getPath(int)} */ public FacetLabel[] getBulkPath(int... ordinals) throws IOException { FacetLabel[] facetLabels = new FacetLabel[ordinals.length]; diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java index 844924ef7cc6..66836536ffa0 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java @@ -472,10 +472,12 @@ public int compare(int i, int j) { } } - synchronized (categoryCache) { - for (int i : uncachedOrdinalPositions) { - // add the value to the categoryCache after computation - categoryCache.put(ordinals[i], bulkPath[originalPosition[i]]); + if (uncachedOrdinalPositions.isEmpty() == false) { + synchronized (categoryCache) { + for (int i : uncachedOrdinalPositions) { + // add the value to the categoryCache after computation + categoryCache.put(ordinals[i], bulkPath[originalPosition[i]]); + } } } diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java index 9103e95a925d..3da05830e1e7 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java @@ -16,6 +16,7 @@ */ package org.apache.lucene.facet.taxonomy.directory; +import com.carrotsearch.randomizedtesting.generators.RandomNumbers; import java.io.IOException; import java.util.Arrays; import java.util.HashSet; @@ -597,16 +598,16 @@ public void testCallingBulkPathReturnsCorrectResult() throws Exception { } // create multiple threads to check result correctness and thread contention in the cache - Thread[] addThreads = new Thread[atLeast(4)]; + Thread[] addThreads = new Thread[RandomNumbers.randomIntBetween(random(), 1, 12)]; for (int z = 0; z < addThreads.length; z++) { addThreads[z] = new Thread() { @Override public void run() { - // each thread iterates for maxThreadIterations times - int maxThreadIterations = random().nextInt(10); + // each thread iterates for numThreadIterations times + int numThreadIterations = random().nextInt(10); for (int threadIterations = 0; - threadIterations < maxThreadIterations; + threadIterations < numThreadIterations; threadIterations++) { // length of the FacetLabel array that we are going to check diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java index dc92ee753c61..6483c52d3204 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java @@ -16,6 +16,7 @@ */ package org.apache.lucene.facet.taxonomy.directory; +import com.carrotsearch.randomizedtesting.generators.RandomNumbers; import java.io.IOException; import java.util.HashMap; import java.util.Map; @@ -314,7 +315,7 @@ public void testConcurrency() throws Exception { System.out.println("TEST: use cache=" + cache); } final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, cache); - Thread[] addThreads = new Thread[atLeast(4)]; + Thread[] addThreads = new Thread[RandomNumbers.randomIntBetween(random(), 1, 12)]; for (int z = 0; z < addThreads.length; z++) { addThreads[z] = new Thread() { From a68404f04763fc3e95cb5be468c65e563980bf5b Mon Sep 17 00:00:00 2001 From: Gautam Worah Date: Sun, 29 Aug 2021 15:32:30 -0700 Subject: [PATCH 11/11] Modify CHANGES entry to reflect the small API change in getPath. --- lucene/CHANGES.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 5d95d931b562..43550cc0cb6f 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -251,7 +251,8 @@ Improvements (Uwe Schinder) * LUCENE-9476: Add new getBulkPath API to DirectoryTaxonomyReader to more efficiently retrieve FacetLabels for multiple - facet ordinals at once (Gautam Worah, Mike McCandless). This API is 2-4% faster than iteratively calling getPath + facet ordinals at once (Gautam Worah, Mike McCandless). This API is 2-4% faster than iteratively calling getPath. + The getPath API now throws an IAE instead of returning null if the ordinal is out of bounds. Bug fixes