Skip to content

Commit 01d99d8

Browse files
mayya-sharipovaelasticsearchmachine
andauthored
Enable sort optimization on int, short and byte fields (#127968) (#128832)
* Enable sort optimization on int, short and byte fields (#127968) Before this PR sorting on integer, short and byte fields types used SortField.Type.LONG. This made sort optimization impossible for these field types. This PR uses SortField.Type.INT for integer, short and byte fields. This enables sort optimization. There are several caveats with changing sort type that are addressed: - Before mixed sort on integer and long fields was automatically supported, as both field types used SortField.TYPE.LONG. Now when merging results from different shards, we need to convert sort to LONG and results to long values. - Similar for collapsing when there is mixed INT and LONG sort types. - Index sorting. Similarly, before for index sorting on integer field, SortField.Type.LONG was used. This sort type is stored in the index writer config on disk and can't be modified. Now when providing sortField() for index sorting, we need to account for index version: for older indices return sort with SortField.Type.LONG and for new indices return SortField.Type.INT. --- There is only 1 change that may be considered not backwards compatible: Before if an integer field was [missing a value](https://www.elastic.co/docs/reference/elasticsearch/rest-apis/sort-search-results#_missing_values) , it sort values will return Long.MAX_VALUE in a search response. With this integer, it sort valeu will return Integer.MAX_VALUE. But I think this change is ok, as in our documentation, we don't provide information what value will be returned, we just say it will be sorted last. --- Also closes #127965 (as same type validation in added for collapse queries) * [CI] Auto commit changes from spotless * Add bucketedSort based on int --------- Co-authored-by: elasticsearchmachine <[email protected]>
1 parent ee87759 commit 01d99d8

File tree

21 files changed

+792
-85
lines changed

21 files changed

+792
-85
lines changed

docs/changelog/127968.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 127968
2+
summary: "Enable sort optimization on int, short and byte fields"
3+
area: Search
4+
type: enhancement
5+
issues:
6+
- 127965
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.upgrades;
11+
12+
import com.carrotsearch.randomizedtesting.annotations.Name;
13+
14+
import org.elasticsearch.client.Request;
15+
import org.elasticsearch.common.settings.Settings;
16+
17+
import java.util.List;
18+
import java.util.Map;
19+
20+
/**
21+
* Tests that index sorting works correctly after a rolling upgrade.
22+
*/
23+
public class IndexSortUpgradeIT extends AbstractRollingUpgradeTestCase {
24+
25+
public IndexSortUpgradeIT(@Name("upgradedNodes") int upgradedNodes) {
26+
super(upgradedNodes);
27+
}
28+
29+
@SuppressWarnings("unchecked")
30+
public void testIndexSortForNumericTypes() throws Exception {
31+
record IndexConfig(String indexName, String fieldName, String fieldType) {}
32+
var configs = new IndexConfig[] {
33+
new IndexConfig("index_byte", "byte_field", "byte"),
34+
new IndexConfig("index_short", "short_field", "short"),
35+
new IndexConfig("index_int", "int_field", "integer") };
36+
37+
if (isOldCluster()) {
38+
int numShards = randomIntBetween(1, 3);
39+
for (var config : configs) {
40+
createIndex(
41+
config.indexName(),
42+
Settings.builder()
43+
.put("index.number_of_shards", numShards)
44+
.put("index.number_of_replicas", 0)
45+
.put("index.sort.field", config.fieldName())
46+
.put("index.sort.order", "desc")
47+
.build(),
48+
"""
49+
{
50+
"properties": {
51+
"%s": {
52+
"type": "%s"
53+
}
54+
}
55+
}
56+
""".formatted(config.fieldName(), config.fieldType())
57+
);
58+
}
59+
}
60+
61+
final int numDocs = randomIntBetween(10, 25);
62+
for (var config : configs) {
63+
var bulkRequest = new Request("POST", "/" + config.indexName() + "/_bulk");
64+
StringBuilder bulkBody = new StringBuilder();
65+
for (int i = 0; i < numDocs; i++) {
66+
bulkBody.append("{\"index\": {}}\n");
67+
bulkBody.append("{\"" + config.fieldName() + "\": ").append(i).append("}\n");
68+
}
69+
bulkRequest.setJsonEntity(bulkBody.toString());
70+
bulkRequest.addParameter("refresh", "true");
71+
var bulkResponse = client().performRequest(bulkRequest);
72+
assertOK(bulkResponse);
73+
74+
var searchRequest = new Request("GET", "/" + config.indexName() + "/_search");
75+
searchRequest.setJsonEntity("""
76+
{
77+
"query": {
78+
"match_all": {}
79+
},
80+
"sort": {
81+
"%s": {
82+
"order": "desc"
83+
}
84+
}
85+
}
86+
""".formatted(config.fieldName()));
87+
var searchResponse = client().performRequest(searchRequest);
88+
assertOK(searchResponse);
89+
var responseBody = entityAsMap(searchResponse);
90+
var hits = (List<Map<String, Object>>) ((Map<String, Object>) responseBody.get("hits")).get("hits");
91+
int previousValue = ((Number) ((List<Object>) hits.get(0).get("sort")).get(0)).intValue();
92+
;
93+
for (int i = 1; i < hits.size(); i++) {
94+
int currentValue = ((Number) ((List<Object>) hits.get(i).get("sort")).get(0)).intValue();
95+
assertTrue("Sort values are not in desc order ", previousValue >= currentValue);
96+
previousValue = currentValue;
97+
}
98+
}
99+
}
100+
}
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
setup:
2+
- do:
3+
indices.create:
4+
index: index_long
5+
body:
6+
mappings:
7+
properties:
8+
field1:
9+
type: long
10+
field2:
11+
type: long
12+
13+
- do:
14+
indices.create:
15+
index: index_int
16+
body:
17+
mappings:
18+
properties:
19+
field1:
20+
type: integer
21+
field2:
22+
type: integer
23+
24+
- do:
25+
indices.create:
26+
index: index_short
27+
body:
28+
mappings:
29+
properties:
30+
field1:
31+
type: short
32+
field2:
33+
type: short
34+
35+
- do:
36+
indices.create:
37+
index: index_byte
38+
body:
39+
mappings:
40+
properties:
41+
field1:
42+
type: byte
43+
field2:
44+
type: byte
45+
46+
- do:
47+
bulk:
48+
refresh: true
49+
index: index_long
50+
body:
51+
- '{ "index" : { "_id" : "long1" } }'
52+
- '{"field1" : 10}'
53+
- '{ "index" : { "_id" : "long2" } }'
54+
- '{"field1" : 20, "field2": 20}'
55+
- '{ "index" : { "_id" : "long3" } }'
56+
- '{"field1" : 30}'
57+
- '{ "index" : { "_id" : "long4" } }'
58+
- '{"field1" : 40, "field2": 40}'
59+
- '{ "index" : { "_id" : "long5" } }'
60+
- '{"field1" : 50}'
61+
62+
- do:
63+
bulk:
64+
refresh: true
65+
index: index_int
66+
body:
67+
- '{ "index" : { "_id" : "int1" } }'
68+
- '{"field1" : 11, "field2": 11}'
69+
- '{ "index" : { "_id" : "int2" } }'
70+
- '{"field1" : 21}'
71+
- '{ "index" : { "_id" : "int3" } }'
72+
- '{"field1" : 31, "field2": 31}'
73+
- '{ "index" : { "_id" : "int4" } }'
74+
- '{"field1" : 41}'
75+
- '{ "index" : { "_id" : "int5" } }'
76+
- '{"field1" : 51, "field2": 51}'
77+
78+
- do:
79+
bulk:
80+
refresh: true
81+
index: index_short
82+
body:
83+
- '{ "index" : { "_id" : "short1" } }'
84+
- '{"field1" : 12}'
85+
- '{ "index" : { "_id" : "short2" } }'
86+
- '{"field1" : 22, "field2": 22}'
87+
- '{ "index" : { "_id" : "short3" } }'
88+
- '{"field1" : 32}'
89+
- '{ "index" : { "_id" : "short4" } }'
90+
- '{"field1" : 42, "field2": 42}'
91+
- '{ "index" : { "_id" : "short5" } }'
92+
- '{"field1" : 52}'
93+
94+
- do:
95+
bulk:
96+
refresh: true
97+
index: index_byte
98+
body:
99+
- '{ "index" : { "_id" : "byte1" } }'
100+
- '{"field1" : 13, "field2": 13}'
101+
- '{ "index" : { "_id" : "byte2" } }'
102+
- '{"field1" : 23}'
103+
- '{ "index" : { "_id" : "byte3" } }'
104+
- '{"field1" : 33, "field2": 33}'
105+
- '{ "index" : { "_id" : "byte4" } }'
106+
- '{"field1" : 43}'
107+
- '{ "index" : { "_id" : "byte5" } }'
108+
- '{"field1" : 53, "field2": 53}'
109+
110+
111+
---
112+
"Simple sort":
113+
- do:
114+
search:
115+
index: index_long,index_int,index_short,index_byte
116+
body:
117+
sort: [ { field1: { "order": "asc"} } ]
118+
- match: { hits.hits.0.sort.0: 10 }
119+
- match: { hits.hits.1.sort.0: 11 }
120+
- match: { hits.hits.2.sort.0: 12 }
121+
- match: { hits.hits.3.sort.0: 13 }
122+
- match: { hits.hits.4.sort.0: 20 }
123+
- match: { hits.hits.5.sort.0: 21 }
124+
- match: { hits.hits.6.sort.0: 22 }
125+
- match: { hits.hits.7.sort.0: 23 }
126+
- match: { hits.hits.8.sort.0: 30 }
127+
- match: { hits.hits.9.sort.0: 31 }
128+
129+
- do:
130+
search:
131+
index: index_long,index_int,index_short,index_byte
132+
body:
133+
sort: [ { field1: { "order": "asc"} } ]
134+
search_after: [31]
135+
- match: { hits.hits.0.sort.0: 32 }
136+
- match: { hits.hits.1.sort.0: 33 }
137+
- match: { hits.hits.2.sort.0: 40 }
138+
- match: { hits.hits.3.sort.0: 41 }
139+
- match: { hits.hits.4.sort.0: 42 }
140+
- match: { hits.hits.5.sort.0: 43 }
141+
- match: { hits.hits.6.sort.0: 50 }
142+
- match: { hits.hits.7.sort.0: 51 }
143+
- match: { hits.hits.8.sort.0: 52 }
144+
- match: { hits.hits.9.sort.0: 53 }
145+
146+
---
147+
"Sort missing values sort last":
148+
- requires:
149+
cluster_features: [ "search.sort.int_sort_for_int_short_byte_fields" ]
150+
reason: "Integer Sort is used on integer, short, byte field types"
151+
- do:
152+
search:
153+
index: index_long,index_int,index_short,index_byte
154+
body:
155+
sort: [ { field2: { "order": "asc" } } ]
156+
157+
- match: { hits.hits.0.sort.0: 11 }
158+
- match: { hits.hits.1.sort.0: 13 }
159+
- match: { hits.hits.2.sort.0: 20 }
160+
- match: { hits.hits.3.sort.0: 22 }
161+
- match: { hits.hits.4.sort.0: 31 }
162+
- match: { hits.hits.5.sort.0: 33 }
163+
- match: { hits.hits.6.sort.0: 40 }
164+
- match: { hits.hits.7.sort.0: 42 }
165+
- match: { hits.hits.8.sort.0: 51 }
166+
- match: { hits.hits.9.sort.0: 53 }
167+
168+
- do:
169+
search:
170+
index: index_long,index_int,index_short,index_byte
171+
body:
172+
sort: [ { field2: { "order": "asc" } } ]
173+
search_after: [ 53 ]
174+
175+
# Then all documents with missing field2
176+
# missing values on fields with integer type return Integer.MAX_VALUE
177+
# missing values on fields with long type return Long.MAX_VALUE
178+
- match: { hits.hits.0.sort.0: 2147483647 }
179+
- match: { hits.hits.1.sort.0: 2147483647 }
180+
- match: { hits.hits.2.sort.0: 2147483647 }
181+
- match: { hits.hits.3.sort.0: 2147483647 }
182+
- match: { hits.hits.4.sort.0: 2147483647 }
183+
- match: { hits.hits.5.sort.0: 2147483647 }
184+
- match: { hits.hits.6.sort.0: 2147483647 }
185+
- match: { hits.hits.7.sort.0: 9223372036854775807 }
186+
- match: { hits.hits.8.sort.0: 9223372036854775807 }
187+
- match: { hits.hits.9.sort.0: 9223372036854775807 }
188+

server/src/internalClusterTest/java/org/elasticsearch/index/IndexSortIT.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ private static XContentBuilder createTestMapping() {
5858
public void testIndexSort() {
5959
SortField dateSort = new SortedNumericSortField("date", SortField.Type.LONG, false);
6060
dateSort.setMissingValue(Long.MAX_VALUE);
61-
SortField numericSort = new SortedNumericSortField("numeric_dv", SortField.Type.LONG, false);
62-
numericSort.setMissingValue(Long.MAX_VALUE);
61+
SortField numericSort = new SortedNumericSortField("numeric_dv", SortField.Type.INT, false);
62+
numericSort.setMissingValue(Integer.MAX_VALUE);
6363
SortField keywordSort = new SortedSetSortField("keyword_dv", false);
6464
keywordSort.setMissingValue(SortField.STRING_LAST);
6565
Sort indexSort = new Sort(dateSort, numericSort, keywordSort);

0 commit comments

Comments
 (0)