Skip to content

Commit 89f37e8

Browse files
authored
Improve SingleValueMatchQuery performance (#135714)
* If a field is single-valued and dense, then rewrite to match all docs. * Allow SingleValueMatchQuery to be cached if a field is single-valued, given that it will never emit a warning.
1 parent ce18c5b commit 89f37e8

File tree

3 files changed

+83
-22
lines changed

3 files changed

+83
-22
lines changed

docs/changelog/135714.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 135714
2+
summary: Improve performance when a single-valued field filter gets pushed down. (SingleValueMatchQuery)
3+
area: ES|QL
4+
type: enhancement
5+
issues: []

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/querydsl/query/SingleValueMatchQuery.java

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,11 @@
88
package org.elasticsearch.compute.querydsl.query;
99

1010
import org.apache.lucene.index.DocValues;
11+
import org.apache.lucene.index.LeafReader;
1112
import org.apache.lucene.index.LeafReaderContext;
13+
import org.apache.lucene.index.NumericDocValues;
1214
import org.apache.lucene.index.PointValues;
15+
import org.apache.lucene.index.SortedDocValues;
1316
import org.apache.lucene.index.SortedNumericDocValues;
1417
import org.apache.lucene.index.SortedSetDocValues;
1518
import org.apache.lucene.index.Terms;
@@ -107,6 +110,17 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti
107110

108111
@Override
109112
public boolean isCacheable(LeafReaderContext ctx) {
113+
final LeafFieldData lfd = fieldData.load(ctx);
114+
// If field is singleton, then it is safe to cache this query, because no warning will ever be emitted.
115+
if (lfd instanceof LeafNumericFieldData n) {
116+
if (DocValues.unwrapSingleton(n.getLongValues()) != null) {
117+
return true;
118+
}
119+
} else if (lfd instanceof LeafOrdinalsFieldData o) {
120+
if (DocValues.unwrapSingleton(o.getOrdinalsValues()) != null) {
121+
return true;
122+
}
123+
}
110124
// don't cache so we can emit warnings
111125
return false;
112126
}
@@ -120,6 +134,7 @@ private ScorerSupplier scorerSupplier(
120134
final int maxDoc = context.reader().maxDoc();
121135
if (DocValues.unwrapSingleton(sortedNumerics) != null) {
122136
// check for dense field
137+
// TODO: check doc values skippers
123138
final PointValues points = context.reader().getPointValues(fieldData.getFieldName());
124139
if (points != null && points.getDocCount() == maxDoc) {
125140
return new DocIdSetIteratorScorerSupplier(boost, scoreMode, DocIdSetIterator.all(maxDoc));
@@ -149,6 +164,7 @@ private ScorerSupplier scorerSupplier(
149164
final int maxDoc = context.reader().maxDoc();
150165
if (DocValues.unwrapSingleton(sortedSetDocValues) != null) {
151166
// check for dense field
167+
// TODO: check doc values skippers
152168
final Terms terms = context.reader().terms(fieldData.getFieldName());
153169
if (terms != null && terms.getDocCount() == maxDoc) {
154170
return new DocIdSetIteratorScorerSupplier(boost, scoreMode, DocIdSetIterator.all(maxDoc));
@@ -209,19 +225,37 @@ private ScorerSupplier scorerSupplier(
209225
@Override
210226
public Query rewrite(IndexSearcher indexSearcher) throws IOException {
211227
for (LeafReaderContext context : indexSearcher.getIndexReader().leaves()) {
228+
final LeafReader reader = context.reader();
229+
final int maxDoc = reader.maxDoc();
212230
final LeafFieldData lfd = fieldData.load(context);
213231
if (lfd instanceof LeafNumericFieldData) {
214-
final PointValues pointValues = context.reader().getPointValues(fieldData.getFieldName());
215-
if (pointValues == null
216-
|| pointValues.getDocCount() != context.reader().maxDoc()
217-
|| pointValues.size() != pointValues.getDocCount()) {
218-
return super.rewrite(indexSearcher);
232+
NumericDocValues singleton = DocValues.unwrapSingleton(reader.getSortedNumericDocValues(fieldData.getFieldName()));
233+
if (singleton != null) {
234+
singleton.nextDoc();
235+
if (singleton.docIDRunEnd() == maxDoc) {
236+
continue;
237+
}
219238
}
239+
// TODO: check doc values skippers
240+
final PointValues points = reader.getPointValues(fieldData.getFieldName());
241+
if (points != null && points.getDocCount() == maxDoc && points.size() == points.getDocCount()) {
242+
continue;
243+
}
244+
return super.rewrite(indexSearcher);
220245
} else if (lfd instanceof LeafOrdinalsFieldData) {
221-
final Terms terms = context.reader().terms(fieldData.getFieldName());
222-
if (terms == null || terms.getDocCount() != context.reader().maxDoc() || terms.getSumDocFreq() != terms.getDocCount()) {
223-
return super.rewrite(indexSearcher);
246+
SortedDocValues singleton = DocValues.unwrapSingleton(reader.getSortedSetDocValues(fieldData.getFieldName()));
247+
if (singleton != null) {
248+
singleton.nextDoc();
249+
if (singleton.docIDRunEnd() == maxDoc) {
250+
continue;
251+
}
224252
}
253+
// TODO: check doc values skippers
254+
Terms terms = reader.terms(fieldData.getFieldName());
255+
if (terms != null && terms.getDocCount() == maxDoc && terms.getSumDocFreq() == terms.getDocCount()) {
256+
continue;
257+
}
258+
return super.rewrite(indexSearcher);
225259
} else {
226260
return super.rewrite(indexSearcher);
227261
}

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueMathQueryTests.java

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,17 @@
1313
import org.apache.lucene.document.Field;
1414
import org.apache.lucene.document.KeywordField;
1515
import org.apache.lucene.document.LongField;
16+
import org.apache.lucene.document.SortedNumericDocValuesField;
17+
import org.apache.lucene.document.SortedSetDocValuesField;
1618
import org.apache.lucene.index.IndexReader;
1719
import org.apache.lucene.index.IndexableField;
1820
import org.apache.lucene.search.IndexSearcher;
1921
import org.apache.lucene.search.MatchAllDocsQuery;
2022
import org.apache.lucene.search.Query;
2123
import org.apache.lucene.store.Directory;
2224
import org.apache.lucene.tests.index.RandomIndexWriter;
25+
import org.apache.lucene.util.BytesRef;
26+
import org.apache.lucene.util.NumericUtils;
2327
import org.elasticsearch.compute.operator.DriverContext;
2428
import org.elasticsearch.compute.operator.Warnings;
2529
import org.elasticsearch.compute.querydsl.query.SingleValueMatchQuery;
@@ -55,7 +59,9 @@ public static List<Object[]> params() {
5559
params.add(new Object[] { new SneakyTwo(fieldType) });
5660
for (boolean multivaluedField : new boolean[] { true, false }) {
5761
for (boolean allowEmpty : new boolean[] { true, false }) {
58-
params.add(new Object[] { new StandardSetup(fieldType, multivaluedField, allowEmpty, 100) });
62+
for (boolean docValuesOnly : new boolean[] { true, false }) {
63+
params.add(new Object[] { new StandardSetup(fieldType, multivaluedField, docValuesOnly, allowEmpty, 100) });
64+
}
5965
}
6066
}
6167
}
@@ -122,10 +128,16 @@ private void runCase(List<List<Object>> fieldValues, int count) {
122128
}
123129
}
124130

125-
private record StandardSetup(String fieldType, boolean multivaluedField, boolean empty, int count) implements Setup {
131+
private record StandardSetup(String fieldType, boolean multivaluedField, boolean docValuesOnly, boolean empty, int count)
132+
implements
133+
Setup {
126134
@Override
127135
public XContentBuilder mapping(XContentBuilder builder) throws IOException {
128-
return builder.startObject("foo").field("type", fieldType).endObject();
136+
if (docValuesOnly) {
137+
return builder.startObject("foo").field("type", fieldType).field("index", false).endObject();
138+
} else {
139+
return builder.startObject("foo").field("type", fieldType).endObject();
140+
}
129141
}
130142

131143
@Override
@@ -134,7 +146,7 @@ public List<List<Object>> build(RandomIndexWriter iw) throws IOException {
134146
for (int i = 0; i < count; i++) {
135147
List<Object> values = values(i);
136148
docs.add(values);
137-
iw.addDocument(docFor(values));
149+
iw.addDocument(docFor(values, docValuesOnly));
138150
}
139151
return docs;
140152
}
@@ -187,8 +199,8 @@ public List<List<Object>> build(RandomIndexWriter iw) throws IOException {
187199
Object second = randomValue(fieldType);
188200
List<Object> justFirst = List.of(first);
189201
List<Object> both = List.of(first, second);
190-
iw.addDocument(docFor(justFirst));
191-
iw.addDocument(docFor(both));
202+
iw.addDocument(docFor(justFirst, false));
203+
iw.addDocument(docFor(both, false));
192204
return List.of(justFirst, both);
193205
}
194206

@@ -212,16 +224,26 @@ private static Object randomValue(String fieldType) {
212224
};
213225
}
214226

215-
private static List<IndexableField> docFor(Iterable<Object> values) {
227+
private static List<IndexableField> docFor(Iterable<Object> values, boolean docValuesOnly) {
216228
List<IndexableField> fields = new ArrayList<>();
217229
for (Object v : values) {
218-
fields.add(switch (v) {
219-
case Double n -> new DoubleField("foo", n, Field.Store.NO);
220-
case Float n -> new DoubleField("foo", n, Field.Store.NO);
221-
case Number n -> new LongField("foo", n.longValue(), Field.Store.NO);
222-
case String s -> new KeywordField("foo", s, Field.Store.NO);
223-
default -> throw new UnsupportedOperationException();
224-
});
230+
if (docValuesOnly) {
231+
fields.add(switch (v) {
232+
case Double n -> new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(n));
233+
case Float n -> new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(n));
234+
case Number n -> new SortedNumericDocValuesField("foo", n.longValue());
235+
case String s -> new SortedSetDocValuesField("foo", new BytesRef(s));
236+
default -> throw new UnsupportedOperationException();
237+
});
238+
} else {
239+
fields.add(switch (v) {
240+
case Double n -> new DoubleField("foo", n, Field.Store.NO);
241+
case Float n -> new DoubleField("foo", n, Field.Store.NO);
242+
case Number n -> new LongField("foo", n.longValue(), Field.Store.NO);
243+
case String s -> new KeywordField("foo", s, Field.Store.NO);
244+
default -> throw new UnsupportedOperationException();
245+
});
246+
}
225247
}
226248
return fields;
227249
}

0 commit comments

Comments
 (0)