Skip to content

Commit 2ea7c9e

Browse files
authored
Rewrite SortedNumericDocValuesRangeQuery using DocValuesSkippers (#15083)
If DocValuesSkippers are enabled on a field, we can check at rewrite time if a range query will match all or none of the documents in all segments, and rewrite to a corresponding MatchAllDocsQuery or MatchNoDocsQuery.
1 parent f245bed commit 2ea7c9e

File tree

4 files changed

+153
-0
lines changed

4 files changed

+153
-0
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,8 @@ Optimizations
253253
* GITHUB#15039: Score computations are now more reliably vectorized.
254254
(Adrien Grand, Guo Feng)
255255

256+
* GITHUB#15083: Use DocValuesSkippers in SortedNumericDocValuesRangeQuery#rewrite(). (Alan Woodward)
257+
256258
Changes in Runtime Behavior
257259
---------------------
258260
* GITHUB#14823: Decrease TieredMergePolicy's default number of segments per

lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import org.apache.lucene.search.DocValuesRangeIterator;
3232
import org.apache.lucene.search.FieldExistsQuery;
3333
import org.apache.lucene.search.IndexSearcher;
34+
import org.apache.lucene.search.MatchAllDocsQuery;
3435
import org.apache.lucene.search.MatchNoDocsQuery;
3536
import org.apache.lucene.search.Query;
3637
import org.apache.lucene.search.QueryVisitor;
@@ -97,6 +98,17 @@ public Query rewrite(IndexSearcher indexSearcher) throws IOException {
9798
if (lowerValue > upperValue) {
9899
return new MatchNoDocsQuery();
99100
}
101+
long globalMin = DocValuesSkipper.globalMinValue(indexSearcher, field);
102+
long globalMax = DocValuesSkipper.globalMaxValue(indexSearcher, field);
103+
if (lowerValue > globalMax || upperValue < globalMin) {
104+
return new MatchNoDocsQuery();
105+
}
106+
if (lowerValue <= globalMin
107+
&& upperValue >= globalMax
108+
&& DocValuesSkipper.globalDocCount(indexSearcher, field)
109+
== indexSearcher.getIndexReader().maxDoc()) {
110+
return new MatchAllDocsQuery();
111+
}
100112
return super.rewrite(indexSearcher);
101113
}
102114

lucene/core/src/java/org/apache/lucene/index/DocValuesSkipper.java

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import java.io.IOException;
2020
import org.apache.lucene.search.DocIdSetIterator;
21+
import org.apache.lucene.search.IndexSearcher;
2122

2223
/**
2324
* Skipper for {@link DocValues}.
@@ -122,4 +123,67 @@ public final void advance(long minValue, long maxValue) throws IOException {
122123
advance(maxDocID + 1);
123124
}
124125
}
126+
127+
/**
128+
* Returns the minimum value for a field across all segments, or {@link Long#MIN_VALUE} if not
129+
* available
130+
*
131+
* @param searcher a searcher over the index
132+
* @param field the field to retrieve values for
133+
*/
134+
public static long globalMinValue(IndexSearcher searcher, String field) throws IOException {
135+
long minValue = Long.MAX_VALUE;
136+
for (LeafReaderContext ctx : searcher.getLeafContexts()) {
137+
if (ctx.reader().getFieldInfos().fieldInfo(field) == null) {
138+
continue; // no field values in this segment, so we can ignore it
139+
}
140+
DocValuesSkipper skipper = ctx.reader().getDocValuesSkipper(field);
141+
if (skipper == null) {
142+
minValue = Long.MIN_VALUE;
143+
} else {
144+
minValue = Math.min(minValue, skipper.minValue());
145+
}
146+
}
147+
return minValue;
148+
}
149+
150+
/**
151+
* Returns the maximum value for a field across all segments, or {@link Long#MIN_VALUE} if not
152+
* available
153+
*
154+
* @param searcher a searcher over the index
155+
* @param field the field to retrieve values for
156+
*/
157+
public static long globalMaxValue(IndexSearcher searcher, String field) throws IOException {
158+
long maxValue = Long.MIN_VALUE;
159+
for (LeafReaderContext ctx : searcher.getLeafContexts()) {
160+
if (ctx.reader().getFieldInfos().fieldInfo(field) == null) {
161+
continue; // no field values in this segment, so we can ignore it
162+
}
163+
DocValuesSkipper skipper = ctx.reader().getDocValuesSkipper(field);
164+
if (skipper == null) {
165+
maxValue = Long.MAX_VALUE;
166+
} else {
167+
maxValue = Math.max(maxValue, skipper.maxValue());
168+
}
169+
}
170+
return maxValue;
171+
}
172+
173+
/**
174+
* Returns the total skipper document count for a field across all segments
175+
*
176+
* @param searcher a searcher over the index
177+
* @param field the field to retrieve values for
178+
*/
179+
public static int globalDocCount(IndexSearcher searcher, String field) throws IOException {
180+
int docCount = 0;
181+
for (LeafReaderContext ctx : searcher.getLeafContexts()) {
182+
DocValuesSkipper skipper = ctx.reader().getDocValuesSkipper(field);
183+
if (skipper != null) {
184+
docCount += skipper.docCount();
185+
}
186+
}
187+
return docCount;
188+
}
125189
}

lucene/core/src/test/org/apache/lucene/search/TestDocValuesQueries.java

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
*/
1717
package org.apache.lucene.search;
1818

19+
import static org.hamcrest.Matchers.containsString;
20+
import static org.hamcrest.Matchers.instanceOf;
21+
1922
import java.io.IOException;
2023
import java.util.ArrayList;
2124
import java.util.Arrays;
@@ -648,4 +651,76 @@ private void assertCount(IndexSearcher searcher, Query query, int expectedCount)
648651
Weight w = searcher.createWeight(query, ScoreMode.COMPLETE, 1.0f);
649652
assertEquals(expectedCount, w.count(searcher.reader.leaves().getFirst()));
650653
}
654+
655+
public void testSortedNumericDocValuesRangeQueryRewrites() throws Exception {
656+
try (Directory dir = newDirectory();
657+
RandomIndexWriter iw = new RandomIndexWriter(random(), dir)) {
658+
for (int i = 0; i < 100; i++) {
659+
Document doc = new Document();
660+
doc.add(SortedNumericDocValuesField.indexedField("with_index", 100 + i));
661+
doc.add(new SortedNumericDocValuesField("without_index", 100 + i));
662+
if (i % 17 == 0) {
663+
iw.commit();
664+
}
665+
if (i != 55) {
666+
doc.add(SortedNumericDocValuesField.indexedField("sparse", 100 + i));
667+
}
668+
if (i == 74) {
669+
doc.add(SortedNumericDocValuesField.indexedField("super_sparse", 174));
670+
}
671+
iw.addDocument(doc);
672+
}
673+
iw.commit();
674+
675+
try (IndexReader reader = iw.getReader()) {
676+
IndexSearcher searcher = new IndexSearcher(reader);
677+
assertThat(
678+
searcher.rewrite(SortedNumericDocValuesField.newSlowRangeQuery("with_index", 0, 50)),
679+
instanceOf(MatchNoDocsQuery.class));
680+
assertThat(
681+
searcher.rewrite(SortedNumericDocValuesField.newSlowRangeQuery("with_index", 0, 250)),
682+
instanceOf(MatchAllDocsQuery.class));
683+
assertThat(
684+
searcher.rewrite(SortedNumericDocValuesField.newSlowRangeQuery("sparse", 0, 50)),
685+
instanceOf(MatchNoDocsQuery.class));
686+
assertThat(
687+
searcher.rewrite(SortedNumericDocValuesField.newSlowRangeQuery("super_sparse", 0, 50)),
688+
instanceOf(MatchNoDocsQuery.class));
689+
assertThat(
690+
searcher.rewrite(
691+
SortedNumericDocValuesField.newSlowRangeQuery("super_sparse", 250, 350)),
692+
instanceOf(MatchNoDocsQuery.class));
693+
assertThat(
694+
searcher
695+
.rewrite(SortedNumericDocValuesField.newSlowRangeQuery("super_sparse", 174, 174))
696+
.getClass()
697+
.toString(),
698+
containsString("SortedNumericDocValuesRangeQuery"));
699+
assertThat(
700+
searcher
701+
.rewrite(SortedNumericDocValuesField.newSlowRangeQuery("with_index", 0, 150))
702+
.getClass()
703+
.toString(),
704+
containsString("SortedNumericDocValuesRangeQuery"));
705+
assertThat(
706+
searcher
707+
.rewrite(SortedNumericDocValuesField.newSlowRangeQuery("with_index", 150, 250))
708+
.getClass()
709+
.toString(),
710+
containsString("SortedNumericDocValuesRangeQuery"));
711+
assertThat(
712+
searcher
713+
.rewrite(SortedNumericDocValuesField.newSlowRangeQuery("with_index", 120, 150))
714+
.getClass()
715+
.toString(),
716+
containsString("SortedNumericDocValuesRangeQuery"));
717+
assertThat(
718+
searcher
719+
.rewrite(SortedNumericDocValuesField.newSlowRangeQuery("sparse", 0, 250))
720+
.getClass()
721+
.toString(),
722+
containsString("SortedNumericDocValuesRangeQuery"));
723+
}
724+
}
725+
}
651726
}

0 commit comments

Comments
 (0)