Skip to content

Commit 324f282

Browse files
shmuelhanochcursoragentelasticsearchmachine
authored
[9.2] Fix ClassCastException when merging TopHits with mixed sort field types (elastic#141919) (elastic#143068)
* Fix ClassCastException when merging TopHits with mixed sort field types (elastic#141919) * Fix ClassCastException when merging TopHits with FLOAT/LONG sort types Fixes ES-14119: Prevents ClassCastException when merging TopHits aggregation results from different shards with incompatible sort field types (Float vs Long). Extends SortFieldValidation to handle FLOAT/LONG/DOUBLE mixing by converting all to DOUBLE, in addition to the existing INT/LONG handling. This ensures TopDocs can be merged successfully when field mappings differ across shards during upgrades. Fix flaky FieldSortIT#testSortMixedFieldTypes by ensuring both index_long and index_keyword have multiple shards and docs so that merge consistently sees incompatible sort types (LONG vs STRING) and throws as expected. Co-authored-by: Cursor <cursoragent@cursor.com> * Ensure INT/LONG mixes are never converted to DOUBLE (review fix) Only convert to DOUBLE when the mix involves FLOAT or DOUBLE; make the condition explicit via involvesFloatingPoint. Add test coverage. Co-authored-by: Cursor <cursoragent@cursor.com> * Review fix: clarify comment and exception for multi-index sort type mismatch Co-authored-by: Cursor <cursoragent@cursor.com> * [CI] Auto commit changes from spotless --------- Co-authored-by: Cursor <cursoragent@cursor.com> Co-authored-by: elasticsearchmachine <infra-root+elasticsearchmachine@elastic.co> * Convert missing value to Double when rewriting sort (elastic#141919) When merging TopHits with mixed numeric sort types we rewrite to DOUBLE but were passing the original missing value (e.g. Long) to the new SortField; Lucene requires Double for Type.DOUBLE. --------- Co-authored-by: Cursor <cursoragent@cursor.com> Co-authored-by: elasticsearchmachine <infra-root+elasticsearchmachine@elastic.co>
1 parent 2cd2c74 commit 324f282

File tree

6 files changed

+234
-24
lines changed

6 files changed

+234
-24
lines changed

docs/changelog/141919.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
area: Aggregations
2+
issues:
3+
- 141714
4+
pr: 141919
5+
summary: Fix `ClassCastException` when merging `TopHits` with mixed sort field types
6+
type: bug

server/src/internalClusterTest/java/org/elasticsearch/search/sort/FieldSortIT.java

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2162,17 +2162,20 @@ public void testLongSortOptimizationCorrectResults() {
21622162

21632163
public void testSortMixedFieldTypes() throws IOException {
21642164
assertAcked(
2165-
prepareCreate("index_long").setMapping("foo", "type=long"),
2165+
prepareCreate("index_long").setMapping("foo", "type=long").setSettings(Settings.builder().put("number_of_shards", 2)),
21662166
prepareCreate("index_integer").setMapping("foo", "type=integer"),
21672167
prepareCreate("index_double").setMapping("foo", "type=double"),
2168-
prepareCreate("index_keyword").setMapping("foo", "type=keyword")
2168+
prepareCreate("index_keyword").setMapping("foo", "type=keyword").setSettings(Settings.builder().put("number_of_shards", 2))
21692169
);
21702170

21712171
prepareIndex("index_long").setId("1").setSource("foo", "123").get();
2172+
prepareIndex("index_long").setId("2").setSource("foo", "124").get();
21722173
prepareIndex("index_integer").setId("1").setSource("foo", "123").get();
21732174
prepareIndex("index_double").setId("1").setSource("foo", "123").get();
21742175
prepareIndex("index_keyword").setId("1").setSource("foo", "123").get();
2176+
prepareIndex("index_keyword").setId("2").setSource("foo", "124").get();
21752177
refresh();
2178+
ensureGreen("index_long", "index_keyword");
21762179

21772180
// for debugging, we try to see where the documents are located
21782181
try (RestClient restClient = createRestClient()) {
@@ -2188,16 +2191,12 @@ public void testSortMixedFieldTypes() throws IOException {
21882191
assertNoFailures(prepareSearch("index_long", "index_integer").addSort(new FieldSortBuilder("foo")).setSize(10));
21892192
}
21902193

2191-
String errMsg = "Can't sort on field [foo]; the field has incompatible sort types";
2192-
2193-
{ // mixing long and double types is not allowed
2194-
SearchPhaseExecutionException exc = expectThrows(
2195-
SearchPhaseExecutionException.class,
2196-
prepareSearch("index_long", "index_double").addSort(new FieldSortBuilder("foo")).setSize(10)
2197-
);
2198-
assertThat(exc.getCause().toString(), containsString(errMsg));
2194+
{ // mixing long and double types is ok, as we convert to double sort
2195+
assertNoFailures(prepareSearch("index_long", "index_double").addSort(new FieldSortBuilder("foo")).setSize(10));
21992196
}
22002197

2198+
String errMsg = "Can't sort on field [foo]; the field has incompatible sort types";
2199+
22012200
{ // mixing long and keyword types is not allowed
22022201
SearchPhaseExecutionException exc = expectThrows(
22032202
SearchPhaseExecutionException.class,

server/src/main/java/org/elasticsearch/search/SearchSortValues.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,21 @@ private SearchSortValues(Object[] formattedSortValues, Object[] rawSortValues) {
6464
this.rawSortValues = rawSortValues;
6565
}
6666

67+
/**
68+
* Build sort values from pre-formatted and raw arrays. Use this when the formatted values
69+
* were produced with the correct per-field format (e.g. from a shard hit) and must not be
70+
* re-formatted with a single format like RAW, which would fail for non-UTF-8 BytesRefs
71+
* (e.g. version field).
72+
*/
73+
public static SearchSortValues fromFormattedAndRaw(Object[] formattedSortValues, Object[] rawSortValues) {
74+
Objects.requireNonNull(formattedSortValues);
75+
Objects.requireNonNull(rawSortValues);
76+
if (formattedSortValues.length != rawSortValues.length) {
77+
throw new IllegalArgumentException("formattedSortValues and rawSortValues must have the same length");
78+
}
79+
return new SearchSortValues(formattedSortValues, rawSortValues);
80+
}
81+
6782
@Override
6883
public void writeTo(StreamOutput out) throws IOException {
6984
out.writeArray(Lucene::writeSortValue, this.formattedSortValues);

server/src/main/java/org/elasticsearch/search/aggregations/metrics/InternalTopHits.java

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,16 @@
1414
import org.apache.lucene.search.TopDocs;
1515
import org.apache.lucene.search.TopFieldDocs;
1616
import org.apache.lucene.search.TotalHits.Relation;
17+
import org.apache.lucene.util.BytesRef;
1718
import org.elasticsearch.common.io.stream.StreamInput;
1819
import org.elasticsearch.common.io.stream.StreamOutput;
1920
import org.elasticsearch.common.lucene.Lucene;
2021
import org.elasticsearch.common.lucene.search.TopDocsAndMaxScore;
2122
import org.elasticsearch.common.xcontent.ChunkedToXContent;
23+
import org.elasticsearch.search.DocValueFormat;
2224
import org.elasticsearch.search.SearchHit;
2325
import org.elasticsearch.search.SearchHits;
26+
import org.elasticsearch.search.SearchSortValues;
2427
import org.elasticsearch.search.aggregations.AggregationReduceContext;
2528
import org.elasticsearch.search.aggregations.AggregatorReducer;
2629
import org.elasticsearch.search.aggregations.InternalAggregation;
@@ -131,7 +134,19 @@ public InternalAggregation get() {
131134
shardDocs = new TopFieldDocs[aggregations.size()];
132135
maxScore = reduceAndFindMaxScore(aggregations, shardDocs);
133136
Sort sort = SortFieldValidation.validateAndMaybeRewrite(Arrays.asList(shardDocs), topFieldDocs.fields);
134-
reducedTopDocs = TopDocs.merge(sort, from, size, (TopFieldDocs[]) shardDocs);
137+
try {
138+
reducedTopDocs = TopDocs.merge(sort, from, size, (TopFieldDocs[]) shardDocs);
139+
} catch (ClassCastException e) {
140+
// This can happen when sort field types are incompatible across shards even after validation,
141+
// e.g. during upgrades or when aggregating across indices with different field mappings.
142+
throw new IllegalArgumentException(
143+
"Failed to merge top_hits aggregation results from different shards due to incompatible "
144+
+ "sort field types. This can occur during upgrades or when aggregating across indices "
145+
+ "whose field mappings differ. Original error: "
146+
+ e.getMessage(),
147+
e
148+
);
149+
}
135150
} else {
136151
shardDocs = new TopDocs[aggregations.size()];
137152
maxScore = reduceAndFindMaxScore(aggregations, shardDocs);
@@ -168,12 +183,44 @@ private static SearchHits extractSearchHits(
168183
do {
169184
position = tracker[shardIndex]++;
170185
} while (topDocsForShard.scoreDocs[position] != scoreDoc);
171-
hits[i] = aggregations.get(shardIndex).searchHits.getAt(position);
186+
SearchHit hit = aggregations.get(shardIndex).searchHits.getAt(position);
187+
if (scoreDoc instanceof FieldDoc fieldDoc && fieldDoc.fields != null && fieldDoc.fields.length > 0) {
188+
Object[] existingFormatted = hit.getSortValues();
189+
boolean hasBytesRef = Arrays.stream(fieldDoc.fields).anyMatch(f -> f instanceof BytesRef);
190+
if (hasBytesRef && existingFormatted != null && existingFormatted.length == fieldDoc.fields.length) {
191+
// Preserve the shard's formatted values for BytesRef fields (e.g. version, keyword)
192+
// so we don't format them with RAW, which assumes UTF-8 and fails on version encoding.
193+
hit.sortValues(
194+
SearchSortValues.fromFormattedAndRaw(buildFormattedSortValues(fieldDoc.fields, existingFormatted), fieldDoc.fields)
195+
);
196+
} else {
197+
DocValueFormat[] formats = new DocValueFormat[fieldDoc.fields.length];
198+
Arrays.fill(formats, DocValueFormat.RAW);
199+
hit.sortValues(fieldDoc.fields, formats);
200+
}
201+
}
202+
hits[i] = hit;
172203
assert hits[i].isPooled() == false;
173204
}
174205
return SearchHits.unpooled(hits, reducedTopDocs.totalHits, maxScore);
175206
}
176207

208+
/**
209+
* Build formatted sort values: use existing formatted value for BytesRef (e.g. version) fields
210+
* to avoid re-formatting with RAW (UTF-8), and use RAW for numeric types.
211+
*/
212+
private static Object[] buildFormattedSortValues(Object[] rawValues, Object[] existingFormatted) {
213+
Object[] formatted = new Object[rawValues.length];
214+
for (int i = 0; i < rawValues.length; i++) {
215+
if (rawValues[i] instanceof BytesRef && existingFormatted != null && i < existingFormatted.length) {
216+
formatted[i] = existingFormatted[i];
217+
} else {
218+
formatted[i] = DocValueFormat.RAW.formatSortValue(rawValues[i]);
219+
}
220+
}
221+
return formatted;
222+
}
223+
177224
private static float reduceAndFindMaxScore(List<InternalTopHits> aggregations, TopDocs[] shardDocs) {
178225
float maxScore = Float.NaN;
179226
for (int i = 0; i < shardDocs.length; i++) {

server/src/main/java/org/elasticsearch/search/sort/SortFieldValidation.java

Lines changed: 97 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ public static Sort validateAndMaybeRewrite(Collection<? extends TopDocs> results
3939
SortField.Type[] firstTypes = null;
4040
boolean isFirstResult = true;
4141
Set<Integer> fieldIdsWithMixedIntAndLongSorts = new HashSet<>();
42+
Set<Integer> fieldIdsWithMixedNumericSorts = new HashSet<>();
43+
// Track which fields have floating point types to determine if we need DOUBLE conversion
44+
Set<Integer> fieldIdsWithFloatingPointTypes = new HashSet<>();
4245
for (TopDocs topDocs : results) {
4346
// We don't actually merge in empty score docs, so ignore potentially mismatched types if there are no docs
4447
if (topDocs == null || topDocs.scoreDocs == null || topDocs.scoreDocs.length == 0) {
@@ -54,6 +57,10 @@ public static Sort validateAndMaybeRewrite(Collection<? extends TopDocs> results
5457
// for custom types that we can't resolve, we can't do the check
5558
return sort;
5659
}
60+
// Track if this field has floating point types
61+
if (firstTypes[i] == SortField.Type.FLOAT || firstTypes[i] == SortField.Type.DOUBLE) {
62+
fieldIdsWithFloatingPointTypes.add(i);
63+
}
5764
}
5865
isFirstResult = false;
5966
} else {
@@ -64,26 +71,53 @@ public static Sort validateAndMaybeRewrite(Collection<? extends TopDocs> results
6471
// for custom types that we can't resolve, we can't do the check
6572
return sort;
6673
}
67-
if (mixIntAndLong(firstTypes[i], curType)) {
68-
fieldIdsWithMixedIntAndLongSorts.add(i);
69-
} else {
70-
throw new IllegalArgumentException(
71-
"Can't sort on field ["
72-
+ curSortFields[i].getField()
73-
+ "]; the field has incompatible sort types: ["
74-
+ firstTypes[i]
75-
+ "] and ["
76-
+ curType
77-
+ "] across shards!"
78-
);
74+
// Track if this field has floating point types
75+
if (curType == SortField.Type.FLOAT || curType == SortField.Type.DOUBLE) {
76+
fieldIdsWithFloatingPointTypes.add(i);
7977
}
78+
// Check if we are mixing INT and LONG sort types (without floating point)
79+
if (mixIntAndLong(firstTypes[i], curType)) {
80+
// Only add to INT/LONG mixing if there's no floating point type for this field
81+
if (fieldIdsWithFloatingPointTypes.contains(i) == false) {
82+
fieldIdsWithMixedIntAndLongSorts.add(i);
83+
} else {
84+
// If floating point exists, convert everything to DOUBLE
85+
fieldIdsWithMixedNumericSorts.add(i);
86+
}
87+
} else if (isNumericType(firstTypes[i])
88+
&& isNumericType(curType)
89+
&& involvesFloatingPoint(firstTypes[i], curType)) {
90+
// Only convert to DOUBLE when the mix involves FLOAT or DOUBLE (never pure INT/LONG).
91+
fieldIdsWithMixedNumericSorts.add(i);
92+
} else {
93+
throw new IllegalArgumentException(
94+
"Can't sort on field ["
95+
+ curSortFields[i].getField()
96+
+ "]; the field has incompatible sort types: ["
97+
+ firstTypes[i]
98+
+ "] and ["
99+
+ curType
100+
+ "] across shards!"
101+
);
102+
}
80103
}
81104
}
82105
}
83106
}
107+
// Remove fields from INT/LONG mixing if they also need DOUBLE conversion
108+
fieldIdsWithMixedIntAndLongSorts.removeAll(fieldIdsWithMixedNumericSorts);
84109
if (fieldIdsWithMixedIntAndLongSorts.isEmpty() == false) {
110+
// Ensure INT/LONG-only fields are rewritten to LONG, never to DOUBLE
111+
for (int fieldIdx : fieldIdsWithMixedIntAndLongSorts) {
112+
SortField.Type type = firstTypes[fieldIdx];
113+
assert type == SortField.Type.INT || type == SortField.Type.LONG
114+
: "INT/LONG mix must be rewritten to LONG, not DOUBLE; field " + fieldIdx + " had type " + type;
115+
}
85116
sort = rewriteSortAndResultsToLong(sort, results, fieldIdsWithMixedIntAndLongSorts);
86117
}
118+
if (fieldIdsWithMixedNumericSorts.isEmpty() == false) {
119+
sort = rewriteSortAndResultsToDouble(sort, results, fieldIdsWithMixedNumericSorts);
120+
}
87121
return sort;
88122
}
89123

@@ -92,6 +126,14 @@ private static boolean mixIntAndLong(SortField.Type firstType, SortField.Type cu
92126
|| (firstType == SortField.Type.LONG && currentType == SortField.Type.INT);
93127
}
94128

129+
/**
130+
* True when at least one type is FLOAT or DOUBLE. Used to ensure we only add to the
131+
* DOUBLE-rewrite set when the mix involves floating point, never for pure INT/LONG.
132+
*/
133+
private static boolean involvesFloatingPoint(SortField.Type a, SortField.Type b) {
134+
return (a == SortField.Type.FLOAT || a == SortField.Type.DOUBLE) || (b == SortField.Type.FLOAT || b == SortField.Type.DOUBLE);
135+
}
136+
95137
/**
96138
* Rewrite Sort objects and shards results for long sort for mixed fields:
97139
* convert Sort to Long sort and convert fields' values to Long values.
@@ -122,6 +164,49 @@ private static Sort rewriteSortAndResultsToLong(
122164
return new Sort(newSortFields);
123165
}
124166

167+
/**
168+
* Rewrite Sort objects and shard results for double sort for mixed numeric fields:
169+
* convert Sort to Double sort and convert fields' values to Double values.
170+
* This handles FLOAT/LONG/DOUBLE mixing by converting all to DOUBLE.
171+
*/
172+
private static Sort rewriteSortAndResultsToDouble(
173+
Sort sort,
174+
Collection<? extends TopDocs> results,
175+
Set<Integer> fieldIdsWithMixedNumericSorts
176+
) {
177+
SortField[] newSortFields = sort.getSort();
178+
for (int fieldIdx : fieldIdsWithMixedNumericSorts) {
179+
// Rewrite the sort field to DOUBLE
180+
SortField originalField = newSortFields[fieldIdx];
181+
SortField doubleField = new SortField(originalField.getField(), SortField.Type.DOUBLE, originalField.getReverse());
182+
Object missingValue = originalField.getMissingValue();
183+
if (missingValue != null && missingValue instanceof Number num) {
184+
missingValue = num.doubleValue();
185+
}
186+
doubleField.setMissingValue(missingValue);
187+
newSortFields[fieldIdx] = doubleField;
188+
189+
// Convert all sort values to Double
190+
for (TopDocs topDocs : results) {
191+
if (topDocs == null || topDocs.scoreDocs == null || topDocs.scoreDocs.length == 0) {
192+
continue;
193+
}
194+
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
195+
FieldDoc fieldDoc = (FieldDoc) scoreDoc;
196+
Object value = fieldDoc.fields[fieldIdx];
197+
if (value != null && value instanceof Number) {
198+
fieldDoc.fields[fieldIdx] = ((Number) value).doubleValue();
199+
}
200+
}
201+
}
202+
}
203+
return new Sort(newSortFields);
204+
}
205+
206+
private static boolean isNumericType(SortField.Type type) {
207+
return type == SortField.Type.INT || type == SortField.Type.LONG || type == SortField.Type.FLOAT || type == SortField.Type.DOUBLE;
208+
}
209+
125210
private static SortField.Type getType(SortField sortField) {
126211
if (sortField instanceof SortedNumericSortField sf) {
127212
return sf.getNumericType();

0 commit comments

Comments
 (0)