Skip to content

Commit 55a9f8d

Browse files
Enable sort optimization on float and half_float (#126342) (#126530)
* Enable sort optimization on float and half_float (#126342) Before we enabled sort optimization on long, double and date types, but left other types for follow-up. This enables sort optimization on float and half_float types. Optimizations on INT, BYTE, SHORT are left for follow-up, because they need more work: we currently use SORT.LONG type for all integer types and this doesn't allow to use optimization. Backport for #126342 * Modification
1 parent fb5a57e commit 55a9f8d

File tree

10 files changed

+477
-161
lines changed

10 files changed

+477
-161
lines changed

docs/changelog/126342.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 126342
2+
summary: Enable sort optimization on float and `half_float`
3+
area: Search
4+
type: enhancement
5+
issues: []

server/src/internalClusterTest/java/org/elasticsearch/search/sort/FieldSortIT.java

Lines changed: 230 additions & 91 deletions
Large diffs are not rendered by default.

server/src/main/java/org/elasticsearch/index/fielddata/IndexNumericFieldData.java

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
2020
import org.elasticsearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource;
2121
import org.elasticsearch.index.fielddata.fieldcomparator.FloatValuesComparatorSource;
22+
import org.elasticsearch.index.fielddata.fieldcomparator.HalfFloatValuesComparatorSource;
2223
import org.elasticsearch.index.fielddata.fieldcomparator.LongValuesComparatorSource;
2324
import org.elasticsearch.search.DocValueFormat;
2425
import org.elasticsearch.search.MultiValueMode;
@@ -46,7 +47,7 @@ public enum NumericType {
4647
LONG(false, SortField.Type.LONG, CoreValuesSourceType.NUMERIC),
4748
DATE(false, SortField.Type.LONG, CoreValuesSourceType.DATE),
4849
DATE_NANOSECONDS(false, SortField.Type.LONG, CoreValuesSourceType.DATE),
49-
HALF_FLOAT(true, SortField.Type.LONG, CoreValuesSourceType.NUMERIC),
50+
HALF_FLOAT(true, SortField.Type.FLOAT, CoreValuesSourceType.NUMERIC),
5051
FLOAT(true, SortField.Type.FLOAT, CoreValuesSourceType.NUMERIC),
5152
DOUBLE(true, SortField.Type.DOUBLE, CoreValuesSourceType.NUMERIC);
5253

@@ -95,11 +96,13 @@ public final SortField sortField(
9596
* 3. We Aren't using max or min to resolve the duplicates.
9697
* 4. We have to cast the results to another type.
9798
*/
98-
if (sortRequiresCustomComparator()
99-
|| nested != null
99+
boolean requiresCustomComparator = nested != null
100100
|| (sortMode != MultiValueMode.MAX && sortMode != MultiValueMode.MIN)
101-
|| targetNumericType != getNumericType()) {
102-
return new SortField(getFieldName(), source, reverse);
101+
|| targetNumericType != getNumericType();
102+
if (sortRequiresCustomComparator() || requiresCustomComparator) {
103+
SortField sortField = new SortField(getFieldName(), source, reverse);
104+
sortField.setOptimizeSortWithPoints(requiresCustomComparator == false && isIndexed());
105+
return sortField;
103106
}
104107

105108
SortedNumericSelector.Type selectorType = sortMode == MultiValueMode.MAX
@@ -108,20 +111,18 @@ public final SortField sortField(
108111
SortField sortField = new SortedNumericSortField(getFieldName(), getNumericType().sortFieldType, reverse, selectorType);
109112
sortField.setMissingValue(source.missingObject(missingValue, reverse));
110113

111-
// TODO: Now that numeric sort uses indexed points to skip over non-competitive documents,
112-
// Lucene 9 requires that the same data/type is stored in points and doc values.
113-
// We break this assumption in ES by using the wider numeric sort type for every field,
114-
// (e.g. shorts use longs and floats use doubles). So for now we forbid the usage of
115-
// points in numeric sort on field types that use a different sort type.
116-
// We could expose these optimizations for all numeric types but that would require
117-
// to rewrite the logic to handle types when merging results coming from different
118-
// indices.
114+
// TODO: enable sort optimization for BYTE, SHORT and INT types
115+
// They can use custom comparator logic, similarly to HalfFloatValuesComparatorSource.
116+
// The problem comes from the fact that we use SortField.Type.LONG for all these types.
117+
// Investigate how to resolve this.
119118
switch (getNumericType()) {
120119
case DATE_NANOSECONDS:
121120
case DATE:
122121
case LONG:
123122
case DOUBLE:
124-
// longs, doubles and dates use the same type for doc-values and points.
123+
case FLOAT:
124+
// longs, doubles and dates use the same type for doc-values and points
125+
// floats uses longs for doc-values, but Lucene's FloatComparator::getValueForDoc converts long value to float
125126
sortField.setOptimizeSortWithPoints(isIndexed());
126127
break;
127128

@@ -199,7 +200,8 @@ private XFieldComparatorSource comparatorSource(
199200
Nested nested
200201
) {
201202
return switch (targetNumericType) {
202-
case HALF_FLOAT, FLOAT -> new FloatValuesComparatorSource(this, missingValue, sortMode, nested);
203+
case FLOAT -> new FloatValuesComparatorSource(this, missingValue, sortMode, nested);
204+
case HALF_FLOAT -> new HalfFloatValuesComparatorSource(this, missingValue, sortMode, nested);
203205
case DOUBLE -> new DoubleValuesComparatorSource(this, missingValue, sortMode, nested);
204206
case DATE -> dateComparatorSource(missingValue, sortMode, nested);
205207
case DATE_NANOSECONDS -> dateNanosComparatorSource(missingValue, sortMode, nested);

server/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/FloatValuesComparatorSource.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
*/
3838
public class FloatValuesComparatorSource extends IndexFieldData.XFieldComparatorSource {
3939

40-
private final IndexNumericFieldData indexFieldData;
40+
final IndexNumericFieldData indexFieldData;
4141

4242
public FloatValuesComparatorSource(
4343
IndexNumericFieldData indexFieldData,
@@ -54,7 +54,7 @@ public SortField.Type reducedType() {
5454
return SortField.Type.FLOAT;
5555
}
5656

57-
private NumericDoubleValues getNumericDocValues(LeafReaderContext context, double missingValue) throws IOException {
57+
NumericDoubleValues getNumericDocValues(LeafReaderContext context, double missingValue) throws IOException {
5858
final SortedNumericDoubleValues values = indexFieldData.load(context).getDoubleValues();
5959
if (nested == null) {
6060
return FieldData.replaceMissing(sortMode.select(values), missingValue);
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.fielddata.fieldcomparator;
11+
12+
import org.apache.lucene.index.LeafReaderContext;
13+
import org.apache.lucene.sandbox.document.HalfFloatPoint;
14+
import org.apache.lucene.search.LeafFieldComparator;
15+
import org.apache.lucene.search.Pruning;
16+
import org.apache.lucene.search.comparators.NumericComparator;
17+
18+
import java.io.IOException;
19+
20+
/**
21+
* Comparator for hal_float values.
22+
* This comparator provides a skipping functionality – an iterator that can skip over non-competitive documents.
23+
*/
24+
public class HalfFloatComparator extends NumericComparator<Float> {
25+
private final float[] values;
26+
protected float topValue;
27+
protected float bottom;
28+
29+
public HalfFloatComparator(int numHits, String field, Float missingValue, boolean reverse, Pruning pruning) {
30+
super(field, missingValue != null ? missingValue : 0.0f, reverse, pruning, HalfFloatPoint.BYTES);
31+
values = new float[numHits];
32+
}
33+
34+
@Override
35+
public int compare(int slot1, int slot2) {
36+
return Float.compare(values[slot1], values[slot2]);
37+
}
38+
39+
@Override
40+
public void setTopValue(Float value) {
41+
super.setTopValue(value);
42+
topValue = value;
43+
}
44+
45+
@Override
46+
public Float value(int slot) {
47+
return Float.valueOf(values[slot]);
48+
}
49+
50+
@Override
51+
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
52+
return new HalfFloatLeafComparator(context);
53+
}
54+
55+
/** Leaf comparator for {@link HalfFloatComparator} that provides skipping functionality */
56+
public class HalfFloatLeafComparator extends NumericLeafComparator {
57+
58+
public HalfFloatLeafComparator(LeafReaderContext context) throws IOException {
59+
super(context);
60+
}
61+
62+
private float getValueForDoc(int doc) throws IOException {
63+
if (docValues.advanceExact(doc)) {
64+
return Float.intBitsToFloat((int) docValues.longValue());
65+
} else {
66+
return missingValue;
67+
}
68+
}
69+
70+
@Override
71+
public void setBottom(int slot) throws IOException {
72+
bottom = values[slot];
73+
super.setBottom(slot);
74+
}
75+
76+
@Override
77+
public int compareBottom(int doc) throws IOException {
78+
return Float.compare(bottom, getValueForDoc(doc));
79+
}
80+
81+
@Override
82+
public int compareTop(int doc) throws IOException {
83+
return Float.compare(topValue, getValueForDoc(doc));
84+
}
85+
86+
@Override
87+
public void copy(int slot, int doc) throws IOException {
88+
values[slot] = getValueForDoc(doc);
89+
super.copy(slot, doc);
90+
}
91+
92+
@Override
93+
protected int compareMissingValueWithTopValue() {
94+
return Float.compare(missingValue, bottom);
95+
}
96+
97+
@Override
98+
protected int compareMissingValueWithBottomValue() {
99+
return Float.compare(missingValue, topValue);
100+
}
101+
102+
@Override
103+
protected void encodeBottom(byte[] packedValue) {
104+
HalfFloatPoint.encodeDimension(bottom, packedValue, 0);
105+
}
106+
107+
@Override
108+
protected void encodeTop(byte[] packedValue) {
109+
HalfFloatPoint.encodeDimension(topValue, packedValue, 0);
110+
}
111+
}
112+
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
package org.elasticsearch.index.fielddata.fieldcomparator;
10+
11+
import org.apache.lucene.index.LeafReaderContext;
12+
import org.apache.lucene.index.NumericDocValues;
13+
import org.apache.lucene.search.FieldComparator;
14+
import org.apache.lucene.search.LeafFieldComparator;
15+
import org.apache.lucene.search.Pruning;
16+
import org.elasticsearch.core.Nullable;
17+
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
18+
import org.elasticsearch.search.MultiValueMode;
19+
20+
import java.io.IOException;
21+
22+
/**
23+
* Comparator source for half_float values.
24+
*/
25+
public class HalfFloatValuesComparatorSource extends FloatValuesComparatorSource {
26+
public HalfFloatValuesComparatorSource(
27+
IndexNumericFieldData indexFieldData,
28+
@Nullable Object missingValue,
29+
MultiValueMode sortMode,
30+
Nested nested
31+
) {
32+
super(indexFieldData, missingValue, sortMode, nested);
33+
}
34+
35+
@Override
36+
public FieldComparator<?> newComparator(String fieldname, int numHits, Pruning enableSkipping, boolean reversed) {
37+
assert indexFieldData == null || fieldname.equals(indexFieldData.getFieldName());
38+
39+
final float fMissingValue = (Float) missingObject(missingValue, reversed);
40+
// NOTE: it's important to pass null as a missing value in the constructor so that
41+
// the comparator doesn't check docsWithField since we replace missing values in select()
42+
return new HalfFloatComparator(numHits, fieldname, null, reversed, enableSkipping) {
43+
@Override
44+
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
45+
return new HalfFloatLeafComparator(context) {
46+
@Override
47+
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
48+
return HalfFloatValuesComparatorSource.this.getNumericDocValues(context, fMissingValue).getRawFloatValues();
49+
}
50+
};
51+
}
52+
};
53+
}
54+
}

server/src/test/java/org/elasticsearch/common/lucene/LuceneTests.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
6161
import org.elasticsearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource;
6262
import org.elasticsearch.index.fielddata.fieldcomparator.FloatValuesComparatorSource;
63+
import org.elasticsearch.index.fielddata.fieldcomparator.HalfFloatValuesComparatorSource;
6364
import org.elasticsearch.index.fielddata.fieldcomparator.LongValuesComparatorSource;
6465
import org.elasticsearch.search.MultiValueMode;
6566
import org.elasticsearch.search.sort.ShardDocSortField;
@@ -642,7 +643,7 @@ private static Tuple<SortField, SortField> randomSortFieldCustomComparatorSource
642643
IndexFieldData.XFieldComparatorSource comparatorSource;
643644
boolean reverse = randomBoolean();
644645
Object missingValue = null;
645-
switch (randomIntBetween(0, 3)) {
646+
switch (randomIntBetween(0, 4)) {
646647
case 0 -> comparatorSource = new LongValuesComparatorSource(
647648
null,
648649
randomBoolean() ? randomLong() : null,
@@ -662,7 +663,13 @@ private static Tuple<SortField, SortField> randomSortFieldCustomComparatorSource
662663
randomFrom(MultiValueMode.values()),
663664
null
664665
);
665-
case 3 -> {
666+
case 3 -> comparatorSource = new HalfFloatValuesComparatorSource(
667+
null,
668+
randomBoolean() ? randomFloat() : null,
669+
randomFrom(MultiValueMode.values()),
670+
null
671+
);
672+
case 4 -> {
666673
comparatorSource = new BytesRefFieldComparatorSource(
667674
null,
668675
randomBoolean() ? "_first" : "_last",

server/src/test/java/org/elasticsearch/index/fielddata/AbstractFieldDataTestCase.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,17 @@ public <IFD extends IndexFieldData<?>> IFD getForField(String type, String field
109109
null,
110110
null
111111
).docValues(docValues).build(context).fieldType();
112+
} else if (type.equals("half_float")) {
113+
fieldType = new NumberFieldMapper.Builder(
114+
fieldName,
115+
NumberFieldMapper.NumberType.HALF_FLOAT,
116+
ScriptCompiler.NONE,
117+
false,
118+
true,
119+
IndexVersion.current(),
120+
null,
121+
null
122+
).docValues(docValues).build(context).fieldType();
112123
} else if (type.equals("double")) {
113124
fieldType = new NumberFieldMapper.Builder(
114125
fieldName,

server/src/test/java/org/elasticsearch/index/search/nested/FloatNestedSortingTests.java

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -10,29 +10,13 @@
1010

1111
import org.apache.lucene.document.SortedNumericDocValuesField;
1212
import org.apache.lucene.index.IndexableField;
13-
import org.apache.lucene.search.ConstantScoreQuery;
14-
import org.apache.lucene.search.FieldDoc;
15-
import org.apache.lucene.search.IndexSearcher;
16-
import org.apache.lucene.search.Query;
17-
import org.apache.lucene.search.Sort;
18-
import org.apache.lucene.search.SortField;
19-
import org.apache.lucene.search.TopDocs;
20-
import org.apache.lucene.search.join.QueryBitSetProducer;
21-
import org.apache.lucene.search.join.ScoreMode;
22-
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
2313
import org.apache.lucene.util.NumericUtils;
24-
import org.elasticsearch.common.lucene.search.Queries;
2514
import org.elasticsearch.index.fielddata.IndexFieldData;
26-
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource;
2715
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
2816
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
2917
import org.elasticsearch.index.fielddata.fieldcomparator.FloatValuesComparatorSource;
3018
import org.elasticsearch.search.MultiValueMode;
3119

32-
import java.io.IOException;
33-
34-
import static org.hamcrest.Matchers.equalTo;
35-
3620
public class FloatNestedSortingTests extends DoubleNestedSortingTests {
3721

3822
@Override
@@ -55,39 +39,4 @@ protected IndexFieldData.XFieldComparatorSource createFieldComparator(
5539
protected IndexableField createField(String name, int value) {
5640
return new SortedNumericDocValuesField(name, NumericUtils.floatToSortableInt(value));
5741
}
58-
59-
protected void assertAvgScoreMode(
60-
Query parentFilter,
61-
IndexSearcher searcher,
62-
IndexFieldData.XFieldComparatorSource innerFieldComparator
63-
) throws IOException {
64-
MultiValueMode sortMode = MultiValueMode.AVG;
65-
Query childFilter = Queries.not(parentFilter);
66-
XFieldComparatorSource nestedComparatorSource = createFieldComparator(
67-
"field2",
68-
sortMode,
69-
-127,
70-
createNested(searcher, parentFilter, childFilter)
71-
);
72-
Query query = new ToParentBlockJoinQuery(
73-
new ConstantScoreQuery(childFilter),
74-
new QueryBitSetProducer(parentFilter),
75-
ScoreMode.None
76-
);
77-
Sort sort = new Sort(new SortField("field2", nestedComparatorSource));
78-
TopDocs topDocs = searcher.search(query, 5, sort);
79-
assertThat(topDocs.totalHits.value, equalTo(7L));
80-
assertThat(topDocs.scoreDocs.length, equalTo(5));
81-
assertThat(topDocs.scoreDocs[0].doc, equalTo(11));
82-
assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[0]).fields[0]).intValue(), equalTo(2));
83-
assertThat(topDocs.scoreDocs[1].doc, equalTo(7));
84-
assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[1]).fields[0]).intValue(), equalTo(2));
85-
assertThat(topDocs.scoreDocs[2].doc, equalTo(3));
86-
assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[2]).fields[0]).intValue(), equalTo(3));
87-
assertThat(topDocs.scoreDocs[3].doc, equalTo(15));
88-
assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[3]).fields[0]).intValue(), equalTo(3));
89-
assertThat(topDocs.scoreDocs[4].doc, equalTo(19));
90-
assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[4]).fields[0]).intValue(), equalTo(3));
91-
}
92-
9342
}

0 commit comments

Comments
 (0)