Skip to content

Commit 38fc368

Browse files
authored
Disabling point range collector if doc values not indexed (#14559)
--------- Signed-off-by: Ankit Jain <[email protected]>
1 parent d04c5e3 commit 38fc368

File tree

3 files changed

+55
-23
lines changed

3 files changed

+55
-23
lines changed

lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/HistogramCollectorBenchmark.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ public void setup(BenchmarkParams params) throws Exception {
7575
if (params.pointEnabled) {
7676
// Adding indexed point field to verify multi range collector
7777
doc.add(new LongPoint("f", value));
78+
// Doc values need to be enabled for histogram collection
79+
doc.add(NumericDocValuesField.indexedField("f", value));
7880
} else {
7981
doc.add(NumericDocValuesField.indexedField("f", value));
8082
}

lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/plain/histograms/HistogramCollector.java

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,13 @@ public LeafCollector getLeafCollector(LeafReaderContext context) throws IOExcept
7373
throw new CollectionTerminatedException();
7474
}
7575

76+
// Doc values should be indexed to enable histogram collection
77+
if (fi.getDocValuesType() != DocValuesType.NUMERIC
78+
&& fi.getDocValuesType() != DocValuesType.SORTED_NUMERIC) {
79+
throw new IllegalStateException(
80+
"Expected numeric field, but got doc-value type: " + fi.getDocValuesType());
81+
}
82+
7683
// We can use multi range traversal logic to collect the histogram on numeric
7784
// field indexed as point for MATCH_ALL cases. In future, this can be extended
7885
// for Point Range Query cases as well
@@ -92,12 +99,6 @@ public LeafCollector getLeafCollector(LeafReaderContext context) throws IOExcept
9299
}
93100
}
94101

95-
if (fi.getDocValuesType() != DocValuesType.NUMERIC
96-
&& fi.getDocValuesType() != DocValuesType.SORTED_NUMERIC) {
97-
throw new IllegalStateException(
98-
"Expected numeric field, but got doc-value type: " + fi.getDocValuesType());
99-
}
100-
101102
SortedNumericDocValues values = DocValues.getSortedNumeric(context.reader(), field);
102103
NumericDocValues singleton = DocValues.unwrapSingleton(values);
103104
if (singleton == null) {

lucene/sandbox/src/test/org/apache/lucene/sandbox/facet/plain/histograms/TestHistogramCollectorManager.java

Lines changed: 46 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -125,23 +125,8 @@ public void testSkipIndexWithSortAndLowInterval() throws IOException {
125125

126126
public void testMultiRangePointTreeCollector() throws IOException {
127127
Directory dir = newDirectory();
128-
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig());
129-
130-
long[] values = new long[5000];
131-
132-
for (int i = 0; i < values.length; i++) {
133-
values[i] = random().nextInt(0, 5000); // Generates a random integer
134-
}
135-
136-
for (long value : values) {
137-
Document doc = new Document();
138-
// Adding indexed point field to verify multi range collector
139-
doc.add(new LongPoint("f", value));
140-
w.addDocument(doc);
141-
}
142-
143-
DirectoryReader reader = DirectoryReader.open(w);
144-
w.close();
128+
long[] values = generateRandomData(5000);
129+
DirectoryReader reader = indexNumericData(dir, values, true);
145130
IndexSearcher searcher = newSearcher(reader);
146131

147132
// Validate the MATCH_ALL case
@@ -222,6 +207,50 @@ protected String toString(int dimension, byte[] value) {
222207
dir.close();
223208
}
224209

210+
public void testHistogramCollectorExceptionWithoutDocValues() throws IOException {
211+
Directory dir = newDirectory();
212+
long[] values = generateRandomData(5000);
213+
DirectoryReader reader = indexNumericData(dir, values, false);
214+
IndexSearcher searcher = newSearcher(reader);
215+
216+
// Validate that exception is thrown when doc values is disabled on numeric field
217+
expectThrows(
218+
IllegalStateException.class,
219+
() -> searcher.search(new MatchAllDocsQuery(), new HistogramCollectorManager("f", 1000)));
220+
221+
reader.close();
222+
dir.close();
223+
}
224+
225+
private long[] generateRandomData(int bound) {
226+
long[] values = new long[bound];
227+
for (int i = 0; i < values.length; i++) {
228+
values[i] = random().nextInt(0, bound); // Generates a random integer
229+
}
230+
return values;
231+
}
232+
233+
private DirectoryReader indexNumericData(Directory dir, long[] values, boolean docValueEnabled)
234+
throws IOException {
235+
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig());
236+
237+
for (long value : values) {
238+
Document doc = new Document();
239+
// Adding indexed point field to verify multi range collector
240+
doc.add(new LongPoint("f", value));
241+
if (docValueEnabled) {
242+
// Doc values need to be enabled for histogram collection
243+
doc.add(new NumericDocValuesField("f", value));
244+
}
245+
w.addDocument(doc);
246+
}
247+
248+
DirectoryReader reader = DirectoryReader.open(w);
249+
w.close();
250+
251+
return reader;
252+
}
253+
225254
private void doTestSkipIndex(IndexWriterConfig cfg) throws IOException {
226255
Directory dir = newDirectory();
227256
IndexWriter w = new IndexWriter(dir, cfg);

0 commit comments

Comments
 (0)