From ee28fc1d1a669d93a3b5cbe3341cbe630a4eaf8c Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Wed, 23 Apr 2025 15:41:04 -0400 Subject: [PATCH 1/9] Enable sort optimizaiton on int, short and byte fields Before this PR sorting on integer, short and byte fields types used SortField.Type.LONG. This made sort optimization impossible for these field types. This PR uses SortField.Type.INT for integer, short and byte fields. This enables sort optimization. There are several caveats with changing sort type that are addressed: - Before mixed sort on integer and long fields was automatically supported, as both field types used SortField.TYPE.LONG. Now when merging results from different shards, we need to convert sort to LONG and results to long values. - Similar for collapsing when there is mixed INT and LONG sort types. - Index sorting. Similarly, before for index sorting on integer field, SortField.Type.LONG was used. This sort type is stored in the index writer config on disk and can't be modified. Now when providing sortField() for index sorting, we need to account for index version: for older indices return sort with SortField.Type.LONG and for new indices return SortField.Type.INT. --- .../upgrades/IndexSortUpgradeIT.java | 87 ++++++++++++++++++ .../search/CollapseSearchResultsIT.java | 89 +++++++++++++++++++ .../search/sort/FieldSortIT.java | 46 +++++++++- .../action/search/SearchPhaseController.java | 60 ++++++++++--- .../elasticsearch/index/IndexSortConfig.java | 2 +- .../elasticsearch/index/IndexVersions.java | 1 + .../index/fielddata/IndexFieldData.java | 14 +++ .../fielddata/IndexNumericFieldData.java | 65 +++++++++----- .../IntValuesComparatorSource.java | 65 ++++++++++++++ .../LongValuesComparatorSource.java | 4 +- .../searchafter/SearchAfterBuilder.java | 7 +- .../search/sort/FieldSortBuilder.java | 2 +- .../hamcrest/ElasticsearchAssertions.java | 7 ++ 13 files changed, 404 insertions(+), 45 deletions(-) create mode 100644 qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/IndexSortUpgradeIT.java create mode 100644 server/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/IntValuesComparatorSource.java diff --git a/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/IndexSortUpgradeIT.java b/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/IndexSortUpgradeIT.java new file mode 100644 index 0000000000000..5ea3194cbca49 --- /dev/null +++ b/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/IndexSortUpgradeIT.java @@ -0,0 +1,87 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.upgrades; + +import com.carrotsearch.randomizedtesting.annotations.Name; + +import org.elasticsearch.client.Request; +import org.elasticsearch.common.settings.Settings; + +/** + * Tests that index sorting works correctly after a rolling upgrade. + */ +public class IndexSortUpgradeIT extends AbstractRollingUpgradeTestCase { + + public IndexSortUpgradeIT(@Name("upgradedNodes") int upgradedNodes) { + super(upgradedNodes); + } + + public void testIndexSortForNumericTypes() throws Exception { + record IndexConfig(String indexName, String fieldName, String fieldType) {} + var configs = new IndexConfig[] { + new IndexConfig("index_byte", "byte_field", "byte"), + new IndexConfig("index_short", "short_field", "short"), + new IndexConfig("index_int", "int_field", "integer") }; + + if (isOldCluster()) { + int numShards = randomIntBetween(1, 3); + for (var config : configs) { + createIndex( + config.indexName(), + Settings.builder() + .put("index.number_of_shards", numShards) + .put("index.number_of_replicas", 0) + .put("index.sort.field", config.fieldName()) + .put("index.sort.order", "desc") + .build(), + """ + { + "properties": { + "%s": { + "type": "%s" + } + } + } + """.formatted(config.fieldName(), config.fieldType()) + ); + } + } + + final int numDocs = randomIntBetween(10, 25); + for (var config : configs) { + var bulkRequest = new Request("POST", "/" + config.indexName() + "/_bulk"); + StringBuilder bulkBody = new StringBuilder(); + for (int i = 0; i < numDocs; i++) { + bulkBody.append("{\"index\": {}}\n"); + bulkBody.append("{\"" + config.fieldName() + "\": ").append(i).append("}\n"); + } + bulkRequest.setJsonEntity(bulkBody.toString()); + bulkRequest.addParameter("refresh", "true"); + var bulkResponse = client().performRequest(bulkRequest); + assertOK(bulkResponse); + + var searchRequest = new Request("GET", "/" + config.indexName() + "/_search"); + searchRequest.setJsonEntity(""" + { + "query": { + "match_all": {} + }, + "sort": { + "%s": { + "order": "desc" + } + } + } + """.formatted(config.fieldName())); + var searchResponse = client().performRequest(searchRequest); + assertOK(searchResponse); + } + } +} diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/CollapseSearchResultsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/CollapseSearchResultsIT.java index 89474a0181597..bc54bfac5a7ab 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/CollapseSearchResultsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/CollapseSearchResultsIT.java @@ -10,9 +10,12 @@ package org.elasticsearch.search; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.action.bulk.BulkRequestBuilder; import org.elasticsearch.index.query.InnerHitBuilder; import org.elasticsearch.index.query.MatchAllQueryBuilder; import org.elasticsearch.search.collapse.CollapseBuilder; +import org.elasticsearch.search.sort.SortBuilders; +import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.xcontent.XContentType; @@ -115,4 +118,90 @@ public void testCollapseWithStoredFields() { } ); } + + public void testCollapseOnMixedIntAndLongSortTypes() { + assertAcked( + prepareCreate("shop_short").setMapping("brand_id", "type=short", "price", "type=integer"), + prepareCreate("shop_long").setMapping("brand_id", "type=long", "price", "type=integer"), + prepareCreate("shop_int").setMapping("brand_id", "type=integer", "price", "type=integer") + ); + + BulkRequestBuilder bulkRequest = client().prepareBulk(); + bulkRequest.add(client().prepareIndex("shop_short").setId("short01").setSource("brand_id", 1, "price", 100)); + bulkRequest.add(client().prepareIndex("shop_short").setId("short02").setSource("brand_id", 1, "price", 101)); + bulkRequest.add(client().prepareIndex("shop_short").setId("short03").setSource("brand_id", 1, "price", 102)); + bulkRequest.add(client().prepareIndex("shop_short").setId("short04").setSource("brand_id", 3, "price", 301)); + bulkRequest.get(); + + BulkRequestBuilder bulkRequest1 = client().prepareBulk(); + bulkRequest1.add(client().prepareIndex("shop_long").setId("long01").setSource("brand_id", 1, "price", 100)); + bulkRequest1.add(client().prepareIndex("shop_long").setId("long02").setSource("brand_id", 1, "price", 103)); + bulkRequest1.add(client().prepareIndex("shop_long").setId("long03").setSource("brand_id", 1, "price", 105)); + bulkRequest1.add(client().prepareIndex("shop_long").setId("long04").setSource("brand_id", 2, "price", 200)); + bulkRequest1.add(client().prepareIndex("shop_long").setId("long05").setSource("brand_id", 2, "price", 201)); + bulkRequest1.get(); + + BulkRequestBuilder bulkRequest2 = client().prepareBulk(); + bulkRequest2.add(client().prepareIndex("shop_int").setId("int01").setSource("brand_id", 1, "price", 101)); + bulkRequest2.add(client().prepareIndex("shop_int").setId("int02").setSource("brand_id", 1, "price", 102)); + bulkRequest2.add(client().prepareIndex("shop_int").setId("int03").setSource("brand_id", 1, "price", 104)); + bulkRequest2.add(client().prepareIndex("shop_int").setId("int04").setSource("brand_id", 2, "price", 201)); + bulkRequest2.add(client().prepareIndex("shop_int").setId("int05").setSource("brand_id", 2, "price", 202)); + bulkRequest2.add(client().prepareIndex("shop_int").setId("int06").setSource("brand_id", 3, "price", 300)); + bulkRequest2.get(); + refresh(); + + assertNoFailuresAndResponse( + prepareSearch("shop_long", "shop_int", "shop_short").setQuery(new MatchAllQueryBuilder()) + .setCollapse( + new CollapseBuilder("brand_id").setInnerHits( + new InnerHitBuilder("ih").setSize(3).addSort(SortBuilders.fieldSort("price").order(SortOrder.DESC)) + ) + ) + .addSort("brand_id", SortOrder.ASC) + .addSort("price", SortOrder.DESC), + response -> { + SearchHits hits = response.getHits(); + assertEquals(3, hits.getHits().length); + + // First hit should be brand_id=1 with highest price + Map firstHitSource = hits.getAt(0).getSourceAsMap(); + assertEquals(1, firstHitSource.get("brand_id")); + assertEquals(105, firstHitSource.get("price")); + assertEquals("long03", hits.getAt(0).getId()); + + // Check inner hits for brand_id=1 + SearchHits innerHits1 = hits.getAt(0).getInnerHits().get("ih"); + assertEquals(3, innerHits1.getHits().length); + assertEquals("long03", innerHits1.getAt(0).getId()); + assertEquals("int03", innerHits1.getAt(1).getId()); + assertEquals("long02", innerHits1.getAt(2).getId()); + + // Second hit should be brand_id=2 with highest price + Map secondHitSource = hits.getAt(1).getSourceAsMap(); + assertEquals(2, secondHitSource.get("brand_id")); + assertEquals(202, secondHitSource.get("price")); + assertEquals("int05", hits.getAt(1).getId()); + + // Check inner hits for brand_id=2 + SearchHits innerHits2 = hits.getAt(1).getInnerHits().get("ih"); + assertEquals(3, innerHits2.getHits().length); + assertEquals("int05", innerHits2.getAt(0).getId()); + assertEquals("int04", innerHits2.getAt(1).getId()); + assertEquals("long05", innerHits2.getAt(2).getId()); + + // third hit should be brand_id=3 with highest price + Map thirdHitSource = hits.getAt(2).getSourceAsMap(); + assertEquals(3, thirdHitSource.get("brand_id")); + assertEquals(301, thirdHitSource.get("price")); + assertEquals("short04", hits.getAt(2).getId()); + + // Check inner hits for brand_id=3 + SearchHits innerHits3 = hits.getAt(2).getInnerHits().get("ih"); + assertEquals(2, innerHits3.getHits().length); + assertEquals("short04", innerHits3.getAt(0).getId()); + assertEquals("int06", innerHits3.getAt(1).getId()); + } + ); + } } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/sort/FieldSortIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/sort/FieldSortIT.java index fde4e944f924d..2cd610f204d9e 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/sort/FieldSortIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/sort/FieldSortIT.java @@ -17,6 +17,7 @@ import org.elasticsearch.action.bulk.BulkRequestBuilder; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchPhaseExecutionException; +import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.ShardSearchFailure; import org.elasticsearch.cluster.metadata.IndexMetadata; @@ -64,6 +65,7 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertFirstHit; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitSize; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; @@ -2180,11 +2182,50 @@ public void testSortMixedFieldTypes() { } } + public void testMixedIntAndLongSortTypes() { + assertAcked( + prepareCreate("index_long").setMapping("field1", "type=long", "field2", "type=long"), + prepareCreate("index_integer").setMapping("field1", "type=integer", "field2", "type=integer"), + prepareCreate("index_short").setMapping("field1", "type=short", "field2", "type=short"), + prepareCreate("index_byte").setMapping("field1", "type=byte", "field2", "type=byte") + ); + + for (int i = 0; i < 5; i++) { + prepareIndex("index_long").setId(String.valueOf(i)).setSource("field1", i).get(); // missing field2 sorts last + prepareIndex("index_integer").setId(String.valueOf(i)).setSource("field1", i).get(); // missing field2 sorts last + prepareIndex("index_short").setId(String.valueOf(i)).setSource("field1", i, "field2", i * 10).get(); + prepareIndex("index_byte").setId(String.valueOf(i)).setSource("field1", i, "field2", i).get(); + } + refresh(); + + Object[] searchAfter = null; + int[] expectedHitSizes = { 8, 8, 4 }; + Object[][] expectedLastDocValues = { + new Object[] { 1L, 9223372036854775807L }, + new Object[] { 3L, 9223372036854775807L }, + new Object[] { 4L, 9223372036854775807L } }; + + for (int i = 0; i < 3; i++) { + SearchRequestBuilder request = prepareSearch("index_long", "index_integer", "index_short", "index_byte").setSize(8) + .addSort(new FieldSortBuilder("field1")) + .addSort(new FieldSortBuilder("field2")); + if (searchAfter != null) { + request.searchAfter(searchAfter); + } + SearchResponse response = request.get(); + assertHitSize(response, expectedHitSizes[i]); + Object[] lastDocSortValues = response.getHits().getAt(response.getHits().getHits().length - 1).getSortValues(); + assertThat(lastDocSortValues, equalTo(expectedLastDocValues[i])); + searchAfter = lastDocSortValues; + response.decRef(); + } + } + public void testSortMixedFieldTypesWithNoDocsForOneType() { assertAcked( prepareCreate("index_long").setMapping("foo", "type=long"), prepareCreate("index_other").setMapping("bar", "type=keyword"), - prepareCreate("index_double").setMapping("foo", "type=double") + prepareCreate("index_int").setMapping("foo", "type=integer") ); prepareIndex("index_long").setId("1").setSource("foo", "123").get(); @@ -2193,8 +2234,7 @@ public void testSortMixedFieldTypesWithNoDocsForOneType() { refresh(); assertNoFailures( - prepareSearch("index_long", "index_double", "index_other").addSort(new FieldSortBuilder("foo").unmappedType("boolean")) - .setSize(10) + prepareSearch("index_long", "index_int", "index_other").addSort(new FieldSortBuilder("foo").unmappedType("boolean")).setSize(10) ); } } diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java b/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java index 3f066e83a59d4..7339135fe93dc 100644 --- a/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java +++ b/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java @@ -58,9 +58,11 @@ import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Set; import java.util.concurrent.Executor; import java.util.function.BiFunction; import java.util.function.Consumer; @@ -152,12 +154,12 @@ static TopDocs mergeTopDocs(List results, int topN, int from) { } final TopDocs mergedTopDocs; if (topDocs instanceof TopFieldGroups firstTopDocs) { - final Sort sort = new Sort(firstTopDocs.fields); + final Sort sort = validateSameSortTypesAndMaybeRewrite(results, firstTopDocs.fields); TopFieldGroups[] shardTopDocs = topDocsList.toArray(new TopFieldGroups[0]); mergedTopDocs = TopFieldGroups.merge(sort, from, topN, shardTopDocs, false); } else if (topDocs instanceof TopFieldDocs firstTopDocs) { TopFieldDocs[] shardTopDocs = topDocsList.toArray(new TopFieldDocs[0]); - final Sort sort = checkSameSortTypes(topDocsList, firstTopDocs.fields); + final Sort sort = validateSameSortTypesAndMaybeRewrite(results, firstTopDocs.fields); mergedTopDocs = TopDocs.merge(sort, from, topN, shardTopDocs); } else { final TopDocs[] shardTopDocs = topDocsList.toArray(new TopDocs[0]); @@ -166,12 +168,13 @@ static TopDocs mergeTopDocs(List results, int topN, int from) { return mergedTopDocs; } - private static Sort checkSameSortTypes(Collection results, SortField[] firstSortFields) { + private static Sort validateSameSortTypesAndMaybeRewrite(Collection results, SortField[] firstSortFields) { Sort sort = new Sort(firstSortFields); if (results.size() < 2) return sort; SortField.Type[] firstTypes = null; boolean isFirstResult = true; + Set fieldIdsWithMixedIntAndLongSorts = new HashSet<>(); for (TopDocs topDocs : results) { // We don't actually merge in empty score docs, so ignore potentially mismatched types if there are no docs if (topDocs.scoreDocs == null || topDocs.scoreDocs.length == 0) { @@ -197,22 +200,55 @@ private static Sort checkSameSortTypes(Collection results, SortField[] // for custom types that we can't resolve, we can't do the check return sort; } - throw new IllegalArgumentException( - "Can't sort on field [" - + curSortFields[i].getField() - + "]; the field has incompatible sort types: [" - + firstTypes[i] - + "] and [" - + curType - + "] across shards!" - ); + // Check if we are mixing INT and LONG sort types, which is allowed + if ((firstTypes[i] == SortField.Type.INT && curType == SortField.Type.LONG) + || (firstTypes[i] == SortField.Type.LONG && curType == SortField.Type.INT)) fieldIdsWithMixedIntAndLongSorts + .add(i); + else { + throw new IllegalArgumentException( + "Can't sort on field [" + + curSortFields[i].getField() + + "]; the field has incompatible sort types: [" + + firstTypes[i] + + "] and [" + + curType + + "] across shards!" + ); + } } } } } + if (fieldIdsWithMixedIntAndLongSorts.size() > 0) { + sort = rewriteSortAndResultsToLong(sort, results, fieldIdsWithMixedIntAndLongSorts); + } return sort; } + /** + * Rewrite Sort objects and shards results for long sort for mixed fields: + * convert Sort to Long sort and convert fields' values to Long values. + * This is necessary to enable comparison of fields' values across shards for merging. + */ + private static Sort rewriteSortAndResultsToLong(Sort sort, Collection results, Set fieldIdsWithMixedIntAndLongSorts) { + SortField[] newSortFields = sort.getSort(); + for (int fieldIdx : fieldIdsWithMixedIntAndLongSorts) { + for (TopDocs topDocs : results) { + if (topDocs.scoreDocs == null || topDocs.scoreDocs.length == 0) continue; + SortField[] sortFields = ((TopFieldDocs) topDocs).fields; + if (getType(sortFields[fieldIdx]) == SortField.Type.INT) { + for (ScoreDoc scoreDoc : topDocs.scoreDocs) { + FieldDoc fieldDoc = (FieldDoc) scoreDoc; + fieldDoc.fields[fieldIdx] = ((Number) fieldDoc.fields[fieldIdx]).longValue(); + } + } else { // SortField.Type.LONG + newSortFields[fieldIdx] = sortFields[fieldIdx]; + } + } + } + return new Sort(newSortFields); + } + private static SortField.Type getType(SortField sortField) { if (sortField instanceof SortedNumericSortField sf) { return sf.getNumericType(); diff --git a/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java b/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java index 28de14ec04221..a3ed94416b7e8 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java @@ -289,7 +289,7 @@ public Sort buildIndexSort( if (fieldData == null) { throw new IllegalArgumentException("docvalues not found for index sort field:[" + sortSpec.field + "]"); } - sortFields[i] = fieldData.sortField(sortSpec.missingValue, mode, null, reverse); + sortFields[i] = fieldData.sortField(this.indexCreatedVersion, sortSpec.missingValue, mode, null, reverse); validateIndexSortField(sortFields[i]); } return new Sort(sortFields); diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index 74eb6721dda81..ebadf5661890f 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -168,6 +168,7 @@ private static Version parseUnchecked(String version) { public static final IndexVersion DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ = def(9_024_0_00, Version.LUCENE_10_2_1); public static final IndexVersion SEMANTIC_TEXT_DEFAULTS_TO_BBQ = def(9_025_0_00, Version.LUCENE_10_2_1); public static final IndexVersion DEFAULT_TO_ACORN_HNSW_FILTER_HEURISTIC = def(9_026_0_00, Version.LUCENE_10_2_1); + public static final IndexVersion INDEX_INT_SORT_INT_TYPE = def(9_027_0_00, Version.LUCENE_10_2_1); /* * STOP! READ THIS FIRST! No, really, * ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _ diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/IndexFieldData.java b/server/src/main/java/org/elasticsearch/index/fielddata/IndexFieldData.java index 5206b1f32ce21..5f6cc93063d87 100644 --- a/server/src/main/java/org/elasticsearch/index/fielddata/IndexFieldData.java +++ b/server/src/main/java/org/elasticsearch/index/fielddata/IndexFieldData.java @@ -26,6 +26,7 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.core.Nullable; +import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested; import org.elasticsearch.indices.breaker.CircuitBreakerService; import org.elasticsearch.search.DocValueFormat; @@ -64,6 +65,19 @@ public interface IndexFieldData { */ FD loadDirect(LeafReaderContext context) throws Exception; + /** + * Returns the {@link SortField} to use for sorting depending on the version of the index. + */ + default SortField sortField( + IndexVersion indexCreatedVersion, + @Nullable Object missingValue, + MultiValueMode sortMode, + Nested nested, + boolean reverse + ) { + return sortField(missingValue, sortMode, nested, reverse); + } + /** * Returns the {@link SortField} to use for sorting. */ diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/IndexNumericFieldData.java b/server/src/main/java/org/elasticsearch/index/fielddata/IndexNumericFieldData.java index 98a5c8aed23c9..3e1633e5c0bce 100644 --- a/server/src/main/java/org/elasticsearch/index/fielddata/IndexNumericFieldData.java +++ b/server/src/main/java/org/elasticsearch/index/fielddata/IndexNumericFieldData.java @@ -16,10 +16,13 @@ import org.elasticsearch.common.time.DateUtils; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.core.Nullable; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested; import org.elasticsearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource; import org.elasticsearch.index.fielddata.fieldcomparator.FloatValuesComparatorSource; import org.elasticsearch.index.fielddata.fieldcomparator.HalfFloatValuesComparatorSource; +import org.elasticsearch.index.fielddata.fieldcomparator.IntValuesComparatorSource; import org.elasticsearch.index.fielddata.fieldcomparator.LongValuesComparatorSource; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.MultiValueMode; @@ -41,9 +44,9 @@ public abstract class IndexNumericFieldData implements IndexFieldData new FloatValuesComparatorSource(this, missingValue, sortMode, nested); case HALF_FLOAT -> new HalfFloatValuesComparatorSource(this, missingValue, sortMode, nested); case DOUBLE -> new DoubleValuesComparatorSource(this, missingValue, sortMode, nested); + case BYTE, SHORT, INT -> new IntValuesComparatorSource(this, missingValue, sortMode, nested, targetNumericType); case DATE -> dateComparatorSource(missingValue, sortMode, nested); case DATE_NANOSECONDS -> dateNanosComparatorSource(missingValue, sortMode, nested); default -> { diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/IntValuesComparatorSource.java b/server/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/IntValuesComparatorSource.java new file mode 100644 index 0000000000000..f6e29d0f30018 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/IntValuesComparatorSource.java @@ -0,0 +1,65 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ +package org.elasticsearch.index.fielddata.fieldcomparator; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.search.FieldComparator; +import org.apache.lucene.search.LeafFieldComparator; +import org.apache.lucene.search.Pruning; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.comparators.IntComparator; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.index.fielddata.IndexNumericFieldData; +import org.elasticsearch.index.fielddata.IndexNumericFieldData.NumericType; +import org.elasticsearch.search.MultiValueMode; + +import java.io.IOException; + +/** + * Comparator source for integer values. + */ +public class IntValuesComparatorSource extends LongValuesComparatorSource { + + public IntValuesComparatorSource( + IndexNumericFieldData indexFieldData, + @Nullable Object missingValue, + MultiValueMode sortMode, + Nested nested, + NumericType targetNumericType + ) { + super(indexFieldData, missingValue, sortMode, nested, null, targetNumericType); + } + + @Override + public SortField.Type reducedType() { + return SortField.Type.INT; + } + + @Override + public FieldComparator newComparator(String fieldname, int numHits, Pruning enableSkipping, boolean reversed) { + assert indexFieldData == null || fieldname.equals(indexFieldData.getFieldName()); + + final int iMissingValue = (Integer) missingObject(missingValue, reversed); + // NOTE: it's important to pass null as a missing value in the constructor so that + // the comparator doesn't check docsWithField since we replace missing values in select() + return new IntComparator(numHits, fieldname, null, reversed, enableSkipping) { + @Override + public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException { + return new IntLeafComparator(context) { + @Override + protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException { + return IntValuesComparatorSource.this.getNumericDocValues(context, iMissingValue); + } + }; + } + }; + } + +} diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/LongValuesComparatorSource.java b/server/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/LongValuesComparatorSource.java index 7b9bc0cd81e19..84ad27efe7778 100644 --- a/server/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/LongValuesComparatorSource.java +++ b/server/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/LongValuesComparatorSource.java @@ -40,7 +40,7 @@ */ public class LongValuesComparatorSource extends IndexFieldData.XFieldComparatorSource { - private final IndexNumericFieldData indexFieldData; + final IndexNumericFieldData indexFieldData; private final Function converter; private final NumericType targetNumericType; @@ -84,7 +84,7 @@ private SortedNumericDocValues loadDocValues(LeafReaderContext context) { return converter != null ? converter.apply(values) : values; } - private NumericDocValues getNumericDocValues(LeafReaderContext context, long missingValue) throws IOException { + NumericDocValues getNumericDocValues(LeafReaderContext context, long missingValue) throws IOException { final SortedNumericDocValues values = loadDocValues(context); if (nested == null) { return FieldData.replaceMissing(sortMode.select(values), missingValue); diff --git a/server/src/main/java/org/elasticsearch/search/searchafter/SearchAfterBuilder.java b/server/src/main/java/org/elasticsearch/search/searchafter/SearchAfterBuilder.java index 02d1a69582666..0a26b8b707f6c 100644 --- a/server/src/main/java/org/elasticsearch/search/searchafter/SearchAfterBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/searchafter/SearchAfterBuilder.java @@ -154,8 +154,11 @@ private static Object convertValueFromSortType(String fieldName, SortField.Type try { switch (sortType) { case DOC, INT: - if (value instanceof Number) { - return ((Number) value).intValue(); + if (value instanceof Number valueNumber) { + if (valueNumber.longValue() > Integer.MAX_VALUE) { + valueNumber = Integer.MAX_VALUE; + } + return (valueNumber).intValue(); } return Integer.parseInt(value.toString()); diff --git a/server/src/main/java/org/elasticsearch/search/sort/FieldSortBuilder.java b/server/src/main/java/org/elasticsearch/search/sort/FieldSortBuilder.java index 1bf6a47808041..a720d2e4759d4 100644 --- a/server/src/main/java/org/elasticsearch/search/sort/FieldSortBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/sort/FieldSortBuilder.java @@ -371,7 +371,7 @@ public SortFieldAndFormat build(SearchExecutionContext context) throws IOExcepti field = numericFieldData.sortField(resolvedType, missing, localSortMode(), nested, reverse); isNanosecond = resolvedType == NumericType.DATE_NANOSECONDS; } else { - field = fieldData.sortField(missing, localSortMode(), nested, reverse); + field = fieldData.sortField(context.indexVersionCreated(), missing, localSortMode(), nested, reverse); if (fieldData instanceof IndexNumericFieldData) { isNanosecond = ((IndexNumericFieldData) fieldData).getNumericType() == NumericType.DATE_NANOSECONDS; } diff --git a/test/framework/src/main/java/org/elasticsearch/test/hamcrest/ElasticsearchAssertions.java b/test/framework/src/main/java/org/elasticsearch/test/hamcrest/ElasticsearchAssertions.java index b0d64a87c4d36..7b7841b72d86b 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/hamcrest/ElasticsearchAssertions.java +++ b/test/framework/src/main/java/org/elasticsearch/test/hamcrest/ElasticsearchAssertions.java @@ -381,6 +381,13 @@ public static void assertHitCount(SearchResponse countResponse, long expectedHit } } + public static void assertHitSize(SearchResponse countResponse, int expectedHitsSize) { + final int hitSize = countResponse.getHits().getHits().length; + if (hitSize != expectedHitsSize) { + fail("Hit size is " + hitSize + " but " + expectedHitsSize + " was expected. " + formatShardStatus(countResponse)); + } + } + public static void assertHitCountAndNoFailures(SearchRequestBuilder searchRequestBuilder, long expectedHitCount) { assertNoFailuresAndResponse(searchRequestBuilder, response -> assertHitCount(response, expectedHitCount)); } From 7beadf35c3ff587780b31b937084c9849cf6f414 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Fri, 9 May 2025 15:19:54 +0200 Subject: [PATCH 2/9] Update docs/changelog/127968.yaml --- docs/changelog/127968.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/127968.yaml diff --git a/docs/changelog/127968.yaml b/docs/changelog/127968.yaml new file mode 100644 index 0000000000000..7e0124481653b --- /dev/null +++ b/docs/changelog/127968.yaml @@ -0,0 +1,5 @@ +pr: 127968 +summary: "Enable sort optimization on int, short and byte fields" +area: Search +type: enhancement +issues: [] From a9f5a8b6ea3f27686e9a1b486263baa47ef760b0 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Fri, 9 May 2025 14:00:32 -0400 Subject: [PATCH 3/9] Fix tests --- .../search/nested/NestedSortingTests.java | 60 +++++++++---------- .../search/sort/FieldSortBuilderTests.java | 6 +- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/search/nested/NestedSortingTests.java b/server/src/test/java/org/elasticsearch/index/search/nested/NestedSortingTests.java index cd6f596cfda05..0a7fdd0302002 100644 --- a/server/src/test/java/org/elasticsearch/index/search/nested/NestedSortingTests.java +++ b/server/src/test/java/org/elasticsearch/index/search/nested/NestedSortingTests.java @@ -624,15 +624,15 @@ public void testMultiLevelNestedSorting() throws IOException { assertThat(topFields.totalHits.value(), equalTo(5L)); StoredFields storedFields = searcher.storedFields(); assertThat(storedFields.document(topFields.scoreDocs[0].doc).get("_id"), equalTo("2")); - assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(76L)); + assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(76)); assertThat(storedFields.document(topFields.scoreDocs[1].doc).get("_id"), equalTo("4")); - assertThat(((FieldDoc) topFields.scoreDocs[1]).fields[0], equalTo(87L)); + assertThat(((FieldDoc) topFields.scoreDocs[1]).fields[0], equalTo(87)); assertThat(storedFields.document(topFields.scoreDocs[2].doc).get("_id"), equalTo("1")); - assertThat(((FieldDoc) topFields.scoreDocs[2]).fields[0], equalTo(234L)); + assertThat(((FieldDoc) topFields.scoreDocs[2]).fields[0], equalTo(234)); assertThat(storedFields.document(topFields.scoreDocs[3].doc).get("_id"), equalTo("3")); - assertThat(((FieldDoc) topFields.scoreDocs[3]).fields[0], equalTo(976L)); + assertThat(((FieldDoc) topFields.scoreDocs[3]).fields[0], equalTo(976)); assertThat(storedFields.document(topFields.scoreDocs[4].doc).get("_id"), equalTo("5")); - assertThat(((FieldDoc) topFields.scoreDocs[4]).fields[0], equalTo(Long.MAX_VALUE)); + assertThat(((FieldDoc) topFields.scoreDocs[4]).fields[0], equalTo(Integer.MAX_VALUE)); // Specific genre { @@ -640,25 +640,25 @@ public void testMultiLevelNestedSorting() throws IOException { topFields = search(queryBuilder, sortBuilder, searchExecutionContext, searcher); assertThat(topFields.totalHits.value(), equalTo(1L)); assertThat(storedFields.document(topFields.scoreDocs[0].doc).get("_id"), equalTo("2")); - assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(76L)); + assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(76)); queryBuilder = new TermQueryBuilder("genre", "science fiction"); topFields = search(queryBuilder, sortBuilder, searchExecutionContext, searcher); assertThat(topFields.totalHits.value(), equalTo(1L)); assertThat(storedFields.document(topFields.scoreDocs[0].doc).get("_id"), equalTo("1")); - assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(234L)); + assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(234)); queryBuilder = new TermQueryBuilder("genre", "horror"); topFields = search(queryBuilder, sortBuilder, searchExecutionContext, searcher); assertThat(topFields.totalHits.value(), equalTo(1L)); assertThat(storedFields.document(topFields.scoreDocs[0].doc).get("_id"), equalTo("3")); - assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(976L)); + assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(976)); queryBuilder = new TermQueryBuilder("genre", "cooking"); topFields = search(queryBuilder, sortBuilder, searchExecutionContext, searcher); assertThat(topFields.totalHits.value(), equalTo(1L)); assertThat(storedFields.document(topFields.scoreDocs[0].doc).get("_id"), equalTo("4")); - assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(87L)); + assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(87)); } // reverse sort order @@ -668,15 +668,15 @@ public void testMultiLevelNestedSorting() throws IOException { topFields = search(queryBuilder, sortBuilder, searchExecutionContext, searcher); assertThat(topFields.totalHits.value(), equalTo(5L)); assertThat(storedFields.document(topFields.scoreDocs[0].doc).get("_id"), equalTo("3")); - assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(976L)); + assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(976)); assertThat(storedFields.document(topFields.scoreDocs[1].doc).get("_id"), equalTo("1")); - assertThat(((FieldDoc) topFields.scoreDocs[1]).fields[0], equalTo(849L)); + assertThat(((FieldDoc) topFields.scoreDocs[1]).fields[0], equalTo(849)); assertThat(storedFields.document(topFields.scoreDocs[2].doc).get("_id"), equalTo("4")); - assertThat(((FieldDoc) topFields.scoreDocs[2]).fields[0], equalTo(180L)); + assertThat(((FieldDoc) topFields.scoreDocs[2]).fields[0], equalTo(180)); assertThat(storedFields.document(topFields.scoreDocs[3].doc).get("_id"), equalTo("2")); - assertThat(((FieldDoc) topFields.scoreDocs[3]).fields[0], equalTo(76L)); + assertThat(((FieldDoc) topFields.scoreDocs[3]).fields[0], equalTo(76)); assertThat(storedFields.document(topFields.scoreDocs[4].doc).get("_id"), equalTo("5")); - assertThat(((FieldDoc) topFields.scoreDocs[4]).fields[0], equalTo(Long.MIN_VALUE)); + assertThat(((FieldDoc) topFields.scoreDocs[4]).fields[0], equalTo(Integer.MIN_VALUE)); } // Specific genre and reverse sort order @@ -685,25 +685,25 @@ public void testMultiLevelNestedSorting() throws IOException { topFields = search(queryBuilder, sortBuilder, searchExecutionContext, searcher); assertThat(topFields.totalHits.value(), equalTo(1L)); assertThat(storedFields.document(topFields.scoreDocs[0].doc).get("_id"), equalTo("2")); - assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(76L)); + assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(76)); queryBuilder = new TermQueryBuilder("genre", "science fiction"); topFields = search(queryBuilder, sortBuilder, searchExecutionContext, searcher); assertThat(topFields.totalHits.value(), equalTo(1L)); assertThat(storedFields.document(topFields.scoreDocs[0].doc).get("_id"), equalTo("1")); - assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(849L)); + assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(849)); queryBuilder = new TermQueryBuilder("genre", "horror"); topFields = search(queryBuilder, sortBuilder, searchExecutionContext, searcher); assertThat(topFields.totalHits.value(), equalTo(1L)); assertThat(storedFields.document(topFields.scoreDocs[0].doc).get("_id"), equalTo("3")); - assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(976L)); + assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(976)); queryBuilder = new TermQueryBuilder("genre", "cooking"); topFields = search(queryBuilder, sortBuilder, searchExecutionContext, searcher); assertThat(topFields.totalHits.value(), equalTo(1L)); assertThat(storedFields.document(topFields.scoreDocs[0].doc).get("_id"), equalTo("4")); - assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(180L)); + assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(180)); } // Nested filter + query @@ -721,9 +721,9 @@ public void testMultiLevelNestedSorting() throws IOException { ); assertThat(topFields.totalHits.value(), equalTo(2L)); assertThat(storedFields.document(topFields.scoreDocs[0].doc).get("_id"), equalTo("2")); - assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(76L)); + assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(76)); assertThat(storedFields.document(topFields.scoreDocs[1].doc).get("_id"), equalTo("4")); - assertThat(((FieldDoc) topFields.scoreDocs[1]).fields[0], equalTo(87L)); + assertThat(((FieldDoc) topFields.scoreDocs[1]).fields[0], equalTo(87)); sortBuilder.order(SortOrder.DESC); topFields = search( @@ -734,9 +734,9 @@ public void testMultiLevelNestedSorting() throws IOException { ); assertThat(topFields.totalHits.value(), equalTo(2L)); assertThat(storedFields.document(topFields.scoreDocs[0].doc).get("_id"), equalTo("4")); - assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(87L)); + assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(87)); assertThat(storedFields.document(topFields.scoreDocs[1].doc).get("_id"), equalTo("2")); - assertThat(((FieldDoc) topFields.scoreDocs[1]).fields[0], equalTo(76L)); + assertThat(((FieldDoc) topFields.scoreDocs[1]).fields[0], equalTo(76)); } // Multiple Nested filters + query @@ -759,9 +759,9 @@ public void testMultiLevelNestedSorting() throws IOException { ); assertThat(topFields.totalHits.value(), equalTo(2L)); assertThat(storedFields.document(topFields.scoreDocs[0].doc).get("_id"), equalTo("4")); - assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(87L)); + assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(87)); assertThat(storedFields.document(topFields.scoreDocs[1].doc).get("_id"), equalTo("2")); - assertThat(((FieldDoc) topFields.scoreDocs[1]).fields[0], equalTo(Long.MAX_VALUE)); + assertThat(((FieldDoc) topFields.scoreDocs[1]).fields[0], equalTo(Integer.MAX_VALUE)); sortBuilder.order(SortOrder.DESC); topFields = search( @@ -772,9 +772,9 @@ public void testMultiLevelNestedSorting() throws IOException { ); assertThat(topFields.totalHits.value(), equalTo(2L)); assertThat(storedFields.document(topFields.scoreDocs[0].doc).get("_id"), equalTo("4")); - assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(87L)); + assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(87)); assertThat(storedFields.document(topFields.scoreDocs[1].doc).get("_id"), equalTo("2")); - assertThat(((FieldDoc) topFields.scoreDocs[1]).fields[0], equalTo(Long.MIN_VALUE)); + assertThat(((FieldDoc) topFields.scoreDocs[1]).fields[0], equalTo(Integer.MIN_VALUE)); } // Nested filter + Specific genre @@ -789,25 +789,25 @@ public void testMultiLevelNestedSorting() throws IOException { topFields = search(queryBuilder, sortBuilder, searchExecutionContext, searcher); assertThat(topFields.totalHits.value(), equalTo(1L)); assertThat(storedFields.document(topFields.scoreDocs[0].doc).get("_id"), equalTo("2")); - assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(76L)); + assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(76)); queryBuilder = new TermQueryBuilder("genre", "science fiction"); topFields = search(queryBuilder, sortBuilder, searchExecutionContext, searcher); assertThat(topFields.totalHits.value(), equalTo(1L)); assertThat(storedFields.document(topFields.scoreDocs[0].doc).get("_id"), equalTo("1")); - assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(Long.MAX_VALUE)); + assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(Integer.MAX_VALUE)); queryBuilder = new TermQueryBuilder("genre", "horror"); topFields = search(queryBuilder, sortBuilder, searchExecutionContext, searcher); assertThat(topFields.totalHits.value(), equalTo(1L)); assertThat(storedFields.document(topFields.scoreDocs[0].doc).get("_id"), equalTo("3")); - assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(Long.MAX_VALUE)); + assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(Integer.MAX_VALUE)); queryBuilder = new TermQueryBuilder("genre", "cooking"); topFields = search(queryBuilder, sortBuilder, searchExecutionContext, searcher); assertThat(topFields.totalHits.value(), equalTo(1L)); assertThat(storedFields.document(topFields.scoreDocs[0].doc).get("_id"), equalTo("4")); - assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(87L)); + assertThat(((FieldDoc) topFields.scoreDocs[0]).fields[0], equalTo(87)); } searcher.getIndexReader().close(); diff --git a/server/src/test/java/org/elasticsearch/search/sort/FieldSortBuilderTests.java b/server/src/test/java/org/elasticsearch/search/sort/FieldSortBuilderTests.java index 5f08a3f1143e0..521f6e9b5eb2f 100644 --- a/server/src/test/java/org/elasticsearch/search/sort/FieldSortBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/search/sort/FieldSortBuilderTests.java @@ -468,7 +468,7 @@ public void testGetMaxNumericSortValue() throws IOException { } case INTEGER -> { int v2 = randomInt(); - values[i] = (long) v2; + values[i] = v2; doc.add(new IntPoint(fieldName, v2)); } case DOUBLE -> { @@ -488,12 +488,12 @@ public void testGetMaxNumericSortValue() throws IOException { } case BYTE -> { byte v6 = randomByte(); - values[i] = (long) v6; + values[i] = (int) v6; doc.add(new IntPoint(fieldName, v6)); } case SHORT -> { short v7 = randomShort(); - values[i] = (long) v7; + values[i] = (int) v7; doc.add(new IntPoint(fieldName, v7)); } default -> throw new AssertionError("unknown type " + numberType); From 44e64b290cdb8d7a011ed0b90c97fb86026ed933 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Sun, 18 May 2025 07:55:30 -0400 Subject: [PATCH 4/9] Fix test failures --- .../java/org/elasticsearch/index/IndexSortIT.java | 4 ++-- .../org/elasticsearch/search/searchafter/SearchAfterIT.java | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/index/IndexSortIT.java b/server/src/internalClusterTest/java/org/elasticsearch/index/IndexSortIT.java index 57bad0bc1a091..0e85d2b9750cb 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/index/IndexSortIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/index/IndexSortIT.java @@ -58,8 +58,8 @@ private static XContentBuilder createTestMapping() { public void testIndexSort() { SortField dateSort = new SortedNumericSortField("date", SortField.Type.LONG, false); dateSort.setMissingValue(Long.MAX_VALUE); - SortField numericSort = new SortedNumericSortField("numeric_dv", SortField.Type.LONG, false); - numericSort.setMissingValue(Long.MAX_VALUE); + SortField numericSort = new SortedNumericSortField("numeric_dv", SortField.Type.INT, false); + numericSort.setMissingValue(Integer.MAX_VALUE); SortField keywordSort = new SortedSetSortField("keyword_dv", false); keywordSort.setMissingValue(SortField.STRING_LAST); Sort indexSort = new Sort(dateSort, numericSort, keywordSort); diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/searchafter/SearchAfterIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/searchafter/SearchAfterIT.java index 29f56eeb5ecb3..d2c949f352f6b 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/searchafter/SearchAfterIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/searchafter/SearchAfterIT.java @@ -386,11 +386,11 @@ private List convertSortValues(List sortValues) { for (int i = 0; i < sortValues.size(); i++) { Object from = sortValues.get(i); if (from instanceof Integer integer) { - converted.add(integer.longValue()); + converted.add(integer.intValue()); } else if (from instanceof Short s) { - converted.add(s.longValue()); + converted.add(s.intValue()); } else if (from instanceof Byte b) { - converted.add(b.longValue()); + converted.add(b.intValue()); } else if (from instanceof Boolean b) { if (b) { converted.add(1L); From 2ea6efdf3e69ecb2250b784708af080a7c008fb4 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Tue, 20 May 2025 12:11:33 -0400 Subject: [PATCH 5/9] Fix test and add clarification to SearchAfterBuilder --- .../org/elasticsearch/search/CollapseSearchResultsIT.java | 6 +++--- .../search/searchafter/SearchAfterBuilder.java | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/CollapseSearchResultsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/CollapseSearchResultsIT.java index bc54bfac5a7ab..1ce1ac8e688e0 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/CollapseSearchResultsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/CollapseSearchResultsIT.java @@ -145,8 +145,8 @@ public void testCollapseOnMixedIntAndLongSortTypes() { bulkRequest2.add(client().prepareIndex("shop_int").setId("int01").setSource("brand_id", 1, "price", 101)); bulkRequest2.add(client().prepareIndex("shop_int").setId("int02").setSource("brand_id", 1, "price", 102)); bulkRequest2.add(client().prepareIndex("shop_int").setId("int03").setSource("brand_id", 1, "price", 104)); - bulkRequest2.add(client().prepareIndex("shop_int").setId("int04").setSource("brand_id", 2, "price", 201)); - bulkRequest2.add(client().prepareIndex("shop_int").setId("int05").setSource("brand_id", 2, "price", 202)); + bulkRequest2.add(client().prepareIndex("shop_int").setId("int04").setSource("brand_id", 2, "price", 202)); + bulkRequest2.add(client().prepareIndex("shop_int").setId("int05").setSource("brand_id", 2, "price", 203)); bulkRequest2.add(client().prepareIndex("shop_int").setId("int06").setSource("brand_id", 3, "price", 300)); bulkRequest2.get(); refresh(); @@ -180,7 +180,7 @@ public void testCollapseOnMixedIntAndLongSortTypes() { // Second hit should be brand_id=2 with highest price Map secondHitSource = hits.getAt(1).getSourceAsMap(); assertEquals(2, secondHitSource.get("brand_id")); - assertEquals(202, secondHitSource.get("price")); + assertEquals(203, secondHitSource.get("price")); assertEquals("int05", hits.getAt(1).getId()); // Check inner hits for brand_id=2 diff --git a/server/src/main/java/org/elasticsearch/search/searchafter/SearchAfterBuilder.java b/server/src/main/java/org/elasticsearch/search/searchafter/SearchAfterBuilder.java index 0a26b8b707f6c..493708f9f0bdd 100644 --- a/server/src/main/java/org/elasticsearch/search/searchafter/SearchAfterBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/searchafter/SearchAfterBuilder.java @@ -154,6 +154,9 @@ private static Object convertValueFromSortType(String fieldName, SortField.Type try { switch (sortType) { case DOC, INT: + // As mixing INT and LONG sort in a single request is allowed, + // we may get search_after values that are larger than Integer.MAX_VALUE + // in this case convert them to Integer.MAX_VALUE if (value instanceof Number valueNumber) { if (valueNumber.longValue() > Integer.MAX_VALUE) { valueNumber = Integer.MAX_VALUE; From 443e7df9d40fff0df7d6c4b3aaa9594e592776ff Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Tue, 20 May 2025 16:11:54 -0400 Subject: [PATCH 6/9] Fix BWC tests --- .../index/fielddata/IndexNumericFieldData.java | 3 ++- .../search/searchafter/SearchAfterBuilder.java | 8 +++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/IndexNumericFieldData.java b/server/src/main/java/org/elasticsearch/index/fielddata/IndexNumericFieldData.java index 3e1633e5c0bce..72bdf6ce7d012 100644 --- a/server/src/main/java/org/elasticsearch/index/fielddata/IndexNumericFieldData.java +++ b/server/src/main/java/org/elasticsearch/index/fielddata/IndexNumericFieldData.java @@ -164,7 +164,8 @@ public SortField sortField( ); XFieldComparatorSource longSource = comparatorSource(NumericType.LONG, missingValue, sortMode, nested); rewrittenSortField.setMissingValue(longSource.missingObject(missingValue, reverse)); - rewrittenSortField.setOptimizeSortWithPoints(numericSortField.getOptimizeSortWithPoints()); + // we don't optimize sorting on int field for old indices + rewrittenSortField.setOptimizeSortWithPoints(false); return rewrittenSortField; } diff --git a/server/src/main/java/org/elasticsearch/search/searchafter/SearchAfterBuilder.java b/server/src/main/java/org/elasticsearch/search/searchafter/SearchAfterBuilder.java index 493708f9f0bdd..1ea702aa75e79 100644 --- a/server/src/main/java/org/elasticsearch/search/searchafter/SearchAfterBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/searchafter/SearchAfterBuilder.java @@ -153,7 +153,13 @@ static Object convertValueFromSortField(Object value, SortField sortField, DocVa private static Object convertValueFromSortType(String fieldName, SortField.Type sortType, Object value, DocValueFormat format) { try { switch (sortType) { - case DOC, INT: + case DOC: + if (value instanceof Number valueNumber) { + return (valueNumber).intValue(); + } + return Integer.parseInt(value.toString()); + + case INT: // As mixing INT and LONG sort in a single request is allowed, // we may get search_after values that are larger than Integer.MAX_VALUE // in this case convert them to Integer.MAX_VALUE From 4341c8d80422f51b946e1d06b435113acccc7d5f Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Wed, 21 May 2025 10:55:48 -0400 Subject: [PATCH 7/9] Update docs/changelog/127968.yaml --- docs/changelog/127968.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/changelog/127968.yaml b/docs/changelog/127968.yaml index 7e0124481653b..5cc867d0d2637 100644 --- a/docs/changelog/127968.yaml +++ b/docs/changelog/127968.yaml @@ -2,4 +2,5 @@ pr: 127968 summary: "Enable sort optimization on int, short and byte fields" area: Search type: enhancement -issues: [] +issues: + - 127965 From 8404d0f199be73ec3dc9514b75b3864cc450db23 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Mon, 26 May 2025 11:14:36 -0400 Subject: [PATCH 8/9] Add more tests --- .../upgrades/IndexSortUpgradeIT.java | 13 ++ .../search/95_sort_mixed_numeric_types.yml | 188 ++++++++++++++++++ .../elasticsearch/search/SearchFeatures.java | 8 +- 3 files changed, 208 insertions(+), 1 deletion(-) create mode 100644 rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/95_sort_mixed_numeric_types.yml diff --git a/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/IndexSortUpgradeIT.java b/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/IndexSortUpgradeIT.java index 5ea3194cbca49..4254d6398dce4 100644 --- a/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/IndexSortUpgradeIT.java +++ b/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/IndexSortUpgradeIT.java @@ -14,6 +14,9 @@ import org.elasticsearch.client.Request; import org.elasticsearch.common.settings.Settings; +import java.util.List; +import java.util.Map; + /** * Tests that index sorting works correctly after a rolling upgrade. */ @@ -23,6 +26,7 @@ public IndexSortUpgradeIT(@Name("upgradedNodes") int upgradedNodes) { super(upgradedNodes); } + @SuppressWarnings("unchecked") public void testIndexSortForNumericTypes() throws Exception { record IndexConfig(String indexName, String fieldName, String fieldType) {} var configs = new IndexConfig[] { @@ -82,6 +86,15 @@ record IndexConfig(String indexName, String fieldName, String fieldType) {} """.formatted(config.fieldName())); var searchResponse = client().performRequest(searchRequest); assertOK(searchResponse); + var responseBody = entityAsMap(searchResponse); + var hits = (List>) ((Map) responseBody.get("hits")).get("hits"); + int previousValue = ((Number) ((List) hits.get(0).get("sort")).get(0)).intValue(); + ; + for (int i = 1; i < hits.size(); i++) { + int currentValue = ((Number) ((List) hits.get(i).get("sort")).get(0)).intValue(); + assertTrue("Sort values are not in desc order ", previousValue >= currentValue); + previousValue = currentValue; + } } } } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/95_sort_mixed_numeric_types.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/95_sort_mixed_numeric_types.yml new file mode 100644 index 0000000000000..ea468da410d28 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/95_sort_mixed_numeric_types.yml @@ -0,0 +1,188 @@ + setup: + - do: + indices.create: + index: index_long + body: + mappings: + properties: + field1: + type: long + field2: + type: long + + - do: + indices.create: + index: index_int + body: + mappings: + properties: + field1: + type: integer + field2: + type: integer + + - do: + indices.create: + index: index_short + body: + mappings: + properties: + field1: + type: short + field2: + type: short + + - do: + indices.create: + index: index_byte + body: + mappings: + properties: + field1: + type: byte + field2: + type: byte + + - do: + bulk: + refresh: true + index: index_long + body: + - '{ "index" : { "_id" : "long1" } }' + - '{"field1" : 10}' + - '{ "index" : { "_id" : "long2" } }' + - '{"field1" : 20, "field2": 20}' + - '{ "index" : { "_id" : "long3" } }' + - '{"field1" : 30}' + - '{ "index" : { "_id" : "long4" } }' + - '{"field1" : 40, "field2": 40}' + - '{ "index" : { "_id" : "long5" } }' + - '{"field1" : 50}' + + - do: + bulk: + refresh: true + index: index_int + body: + - '{ "index" : { "_id" : "int1" } }' + - '{"field1" : 11, "field2": 11}' + - '{ "index" : { "_id" : "int2" } }' + - '{"field1" : 21}' + - '{ "index" : { "_id" : "int3" } }' + - '{"field1" : 31, "field2": 31}' + - '{ "index" : { "_id" : "int4" } }' + - '{"field1" : 41}' + - '{ "index" : { "_id" : "int5" } }' + - '{"field1" : 51, "field2": 51}' + + - do: + bulk: + refresh: true + index: index_short + body: + - '{ "index" : { "_id" : "short1" } }' + - '{"field1" : 12}' + - '{ "index" : { "_id" : "short2" } }' + - '{"field1" : 22, "field2": 22}' + - '{ "index" : { "_id" : "short3" } }' + - '{"field1" : 32}' + - '{ "index" : { "_id" : "short4" } }' + - '{"field1" : 42, "field2": 42}' + - '{ "index" : { "_id" : "short5" } }' + - '{"field1" : 52}' + + - do: + bulk: + refresh: true + index: index_byte + body: + - '{ "index" : { "_id" : "byte1" } }' + - '{"field1" : 13, "field2": 13}' + - '{ "index" : { "_id" : "byte2" } }' + - '{"field1" : 23}' + - '{ "index" : { "_id" : "byte3" } }' + - '{"field1" : 33, "field2": 33}' + - '{ "index" : { "_id" : "byte4" } }' + - '{"field1" : 43}' + - '{ "index" : { "_id" : "byte5" } }' + - '{"field1" : 53, "field2": 53}' + + +--- + "Simple sort": + - do: + search: + index: index_long,index_int,index_short,index_byte + body: + sort: [ { field1: { "order": "asc"} } ] + - match: { hits.hits.0.sort.0: 10 } + - match: { hits.hits.1.sort.0: 11 } + - match: { hits.hits.2.sort.0: 12 } + - match: { hits.hits.3.sort.0: 13 } + - match: { hits.hits.4.sort.0: 20 } + - match: { hits.hits.5.sort.0: 21 } + - match: { hits.hits.6.sort.0: 22 } + - match: { hits.hits.7.sort.0: 23 } + - match: { hits.hits.8.sort.0: 30 } + - match: { hits.hits.9.sort.0: 31 } + + - do: + search: + index: index_long,index_int,index_short,index_byte + body: + sort: [ { field1: { "order": "asc"} } ] + search_after: [31] + - match: { hits.hits.0.sort.0: 32 } + - match: { hits.hits.1.sort.0: 33 } + - match: { hits.hits.2.sort.0: 40 } + - match: { hits.hits.3.sort.0: 41 } + - match: { hits.hits.4.sort.0: 42 } + - match: { hits.hits.5.sort.0: 43 } + - match: { hits.hits.6.sort.0: 50 } + - match: { hits.hits.7.sort.0: 51 } + - match: { hits.hits.8.sort.0: 52 } + - match: { hits.hits.9.sort.0: 53 } + +--- + "Sort missing values sort last": + - requires: + cluster_features: [ "search.sort.int_sort_for_int_short_byte_fields" ] + reason: "Integer Sort is used on integer, short, byte field types" + - do: + search: + index: index_long,index_int,index_short,index_byte + body: + sort: [ { field2: { "order": "asc" } } ] + + - match: { hits.hits.0.sort.0: 11 } + - match: { hits.hits.1.sort.0: 13 } + - match: { hits.hits.2.sort.0: 20 } + - match: { hits.hits.3.sort.0: 22 } + - match: { hits.hits.4.sort.0: 31 } + - match: { hits.hits.5.sort.0: 33 } + - match: { hits.hits.6.sort.0: 40 } + - match: { hits.hits.7.sort.0: 42 } + - match: { hits.hits.8.sort.0: 51 } + - match: { hits.hits.9.sort.0: 53 } + + - do: + search: + index: index_long,index_int,index_short,index_byte + body: + sort: [ { field2: { "order": "asc" } } ] + search_after: [ 53 ] + + # Then all documents with missing field2 + # missing values on fields with integer type return Integer.MAX_VALUE + # missing values on fields with long type return Long.MAX_VALUE + - match: { hits.hits.0.sort.0: 2147483647 } + - match: { hits.hits.1.sort.0: 2147483647 } + - match: { hits.hits.2.sort.0: 2147483647 } + - match: { hits.hits.3.sort.0: 2147483647 } + - match: { hits.hits.4.sort.0: 2147483647 } + - match: { hits.hits.5.sort.0: 2147483647 } + - match: { hits.hits.6.sort.0: 2147483647 } + - match: { hits.hits.7.sort.0: 9223372036854775807 } + - match: { hits.hits.8.sort.0: 9223372036854775807 } + - match: { hits.hits.9.sort.0: 9223372036854775807 } + diff --git a/server/src/main/java/org/elasticsearch/search/SearchFeatures.java b/server/src/main/java/org/elasticsearch/search/SearchFeatures.java index 6c835bdf8badc..c7b1d16bccaa1 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchFeatures.java +++ b/server/src/main/java/org/elasticsearch/search/SearchFeatures.java @@ -29,9 +29,15 @@ public Set getFeatures() { "search.completion_field.duplicate.support" ); public static final NodeFeature RESCORER_MISSING_FIELD_BAD_REQUEST = new NodeFeature("search.rescorer.missing.field.bad.request"); + public static final NodeFeature INT_SORT_FOR_INT_SHORT_BYTE_FIELDS = new NodeFeature("search.sort.int_sort_for_int_short_byte_fields"); @Override public Set getTestFeatures() { - return Set.of(RETRIEVER_RESCORER_ENABLED, COMPLETION_FIELD_SUPPORTS_DUPLICATE_SUGGESTIONS, RESCORER_MISSING_FIELD_BAD_REQUEST); + return Set.of( + RETRIEVER_RESCORER_ENABLED, + COMPLETION_FIELD_SUPPORTS_DUPLICATE_SUGGESTIONS, + RESCORER_MISSING_FIELD_BAD_REQUEST, + INT_SORT_FOR_INT_SHORT_BYTE_FIELDS + ); } } From b8397f5d410ffd3e4b93f900fce4e8216032c335 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Mon, 2 Jun 2025 13:28:04 -0400 Subject: [PATCH 9/9] Add TODO for bucketSort --- .../fielddata/fieldcomparator/IntValuesComparatorSource.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/IntValuesComparatorSource.java b/server/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/IntValuesComparatorSource.java index f6e29d0f30018..7c2b3888f7a47 100644 --- a/server/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/IntValuesComparatorSource.java +++ b/server/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/IntValuesComparatorSource.java @@ -62,4 +62,6 @@ protected NumericDocValues getNumericDocValues(LeafReaderContext context, String }; } + // TODO: add newBucketedSort based on integer values + }