Skip to content

Commit 2dca948

Browse files
authored
Merge branch 'main' into active_shards_percent_issue
2 parents 7ba0bac + ca5ca98 commit 2dca948

File tree

236 files changed

+3227
-1522
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

236 files changed

+3227
-1522
lines changed

benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,9 @@
4141
import org.elasticsearch.compute.data.LongBlock;
4242
import org.elasticsearch.compute.data.LongVector;
4343
import org.elasticsearch.compute.data.Page;
44+
import org.elasticsearch.compute.lucene.AlwaysReferencedIndexedByShardId;
45+
import org.elasticsearch.compute.lucene.IndexedByShardIdFromSingleton;
4446
import org.elasticsearch.compute.lucene.LuceneSourceOperator;
45-
import org.elasticsearch.compute.lucene.ShardRefCounted;
4647
import org.elasticsearch.compute.lucene.read.ValuesSourceReaderOperator;
4748
import org.elasticsearch.compute.lucene.read.ValuesSourceReaderOperatorStatus;
4849
import org.elasticsearch.compute.operator.topn.TopNOperator;
@@ -368,7 +369,7 @@ public void benchmark() {
368369
blockFactory,
369370
ByteSizeValue.ofMb(1).getBytes(),
370371
fields(name),
371-
List.of(new ValuesSourceReaderOperator.ShardContext(reader, () -> {
372+
new IndexedByShardIdFromSingleton<>(new ValuesSourceReaderOperator.ShardContext(reader, () -> {
372373
throw new UnsupportedOperationException("can't load _source here");
373374
}, EsqlPlugin.STORED_FIELDS_SEQUENTIAL_PROPORTION.getDefault(Settings.EMPTY))),
374375
0
@@ -538,7 +539,7 @@ private void setupPages() {
538539
pages.add(
539540
new Page(
540541
new DocVector(
541-
ShardRefCounted.ALWAYS_REFERENCED,
542+
AlwaysReferencedIndexedByShardId.INSTANCE,
542543
blockFactory.newConstantIntBlockWith(0, end - begin).asVector(),
543544
blockFactory.newConstantIntBlockWith(ctx.ord, end - begin).asVector(),
544545
docs.build(),
@@ -575,8 +576,7 @@ record ItrAndOrd(PrimitiveIterator.OfInt itr, int ord) {}
575576
pages.add(
576577
new Page(
577578
new DocVector(
578-
579-
ShardRefCounted.ALWAYS_REFERENCED,
579+
AlwaysReferencedIndexedByShardId.INSTANCE,
580580
blockFactory.newConstantIntVector(0, size),
581581
leafs.build(),
582582
docs.build(),
@@ -594,7 +594,7 @@ record ItrAndOrd(PrimitiveIterator.OfInt itr, int ord) {}
594594
pages.add(
595595
new Page(
596596
new DocVector(
597-
ShardRefCounted.ALWAYS_REFERENCED,
597+
AlwaysReferencedIndexedByShardId.INSTANCE,
598598
blockFactory.newConstantIntBlockWith(0, size).asVector(),
599599
leafs.build().asBlock().asVector(),
600600
docs.build(),
@@ -621,8 +621,7 @@ record ItrAndOrd(PrimitiveIterator.OfInt itr, int ord) {}
621621
pages.add(
622622
new Page(
623623
new DocVector(
624-
625-
ShardRefCounted.ALWAYS_REFERENCED,
624+
AlwaysReferencedIndexedByShardId.INSTANCE,
626625
blockFactory.newConstantIntVector(0, 1),
627626
blockFactory.newConstantIntVector(next.ord, 1),
628627
blockFactory.newConstantIntVector(next.itr.nextInt(), 1),

benchmarks/src/main/java/org/elasticsearch/benchmark/script/ScriptScoreBenchmark.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
import org.apache.lucene.index.IndexWriter;
1616
import org.apache.lucene.index.IndexWriterConfig;
1717
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
18-
import org.apache.lucene.index.SortedNumericDocValues;
1918
import org.apache.lucene.search.IndexSearcher;
2019
import org.apache.lucene.search.MatchAllDocsQuery;
2120
import org.apache.lucene.search.Query;
@@ -29,6 +28,7 @@
2928
import org.elasticsearch.index.fielddata.FieldDataContext;
3029
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
3130
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
31+
import org.elasticsearch.index.fielddata.SortedNumericLongValues;
3232
import org.elasticsearch.index.mapper.IndexType;
3333
import org.elasticsearch.index.mapper.MappedFieldType;
3434
import org.elasticsearch.index.mapper.MappingLookup;
@@ -179,14 +179,14 @@ private ScoreScript.Factory bareMetalScript() {
179179
return new ScoreScript.LeafFactory() {
180180
@Override
181181
public ScoreScript newInstance(DocReader docReader) throws IOException {
182-
SortedNumericDocValues values = ifd.load(((DocValuesDocReader) docReader).getLeafReaderContext()).getLongValues();
182+
SortedNumericLongValues values = ifd.load(((DocValuesDocReader) docReader).getLeafReaderContext()).getLongValues();
183183
return new ScoreScript(params, null, docReader) {
184184
private int docId;
185185

186186
@Override
187187
public double execute(ExplanationHolder explanation) {
188188
try {
189-
values.advance(docId);
189+
values.advanceExact(docId);
190190
if (values.docValueCount() != 1) {
191191
throw new IllegalArgumentException("script only works when there is exactly one value");
192192
}

docs/changelog/132757.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 132757
2+
summary: Late materialization after TopN (Node level)
3+
area: ES|QL
4+
type: feature
5+
issues: []

docs/changelog/136265.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 136265
2+
summary: Adding `match_only_text` subfield to `*.display_name` fields in `ecs@mappings` to be compliant with the latest additions in ECS
3+
area: Data streams
4+
type: feature
5+
issues: []

docs/reference/elasticsearch/rest-apis/retrievers/retrievers-examples.md

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,7 @@ GET /retrievers_example/_search
440440
"query": "artificial intelligence"
441441
}
442442
}
443-
}
443+
}
444444
```
445445

446446
This returns the following response based on the final rrf score for each result.
@@ -497,7 +497,7 @@ GET /retrievers_example/_search
497497
"fields": ["text", "text_semantic"]
498498
}
499499
}
500-
}
500+
}
501501
```
502502

503503
::::{note}
@@ -570,7 +570,7 @@ GET /retrievers_example/_search
570570
"normalizer": "minmax"
571571
}
572572
}
573-
}
573+
}
574574
```
575575

576576
This returns the following response based on the normalized score for each result:
@@ -1503,6 +1503,7 @@ PUT _inference/rerank/my-rerank-model
15031503
```
15041504

15051505
Let’s start by reranking the results of the `rrf` retriever in our previous example.
1506+
We'll also apply a `chunk_rescorer` to ensure that we only consider the best scoring chunks when sending information to the reranker.
15061507

15071508
```console
15081509
GET retrievers_example/_search
@@ -1541,7 +1542,15 @@ GET retrievers_example/_search
15411542
},
15421543
"field": "text",
15431544
"inference_id": "my-rerank-model",
1544-
"inference_text": "What are the state of the art applications of AI in information retrieval?"
1545+
"inference_text": "What are the state of the art applications of AI in information retrieval?",
1546+
"chunk_rescorer": {
1547+
"size": 1,
1548+
"chunking_settings": {
1549+
"strategy": "sentence",
1550+
"max_chunk_size": 300,
1551+
"sentence_overlap": 0
1552+
}
1553+
},
15451554
}
15461555
},
15471556
"_source": false

docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,25 @@ score = ln(score), if score < 0
8686

8787
Applies the specified [boolean query filter](/reference/query-languages/query-dsl/query-dsl-bool-query.md) to the child `retriever`. If the child retriever already specifies any filters, then this top-level filter is applied in conjuction with the filter defined in the child retriever.
8888

89+
`chunk_rescorer` {applies_to}`stack: beta 9.2`
90+
: (Optional, `object`)
91+
92+
Chunks and scores documents based on configured chunking settings, and only sends the best scoring chunks to the reranking model as input. This helps improve relevance when reranking long documents that would otherwise be truncated by the reranking model's token limit.
93+
94+
Parameters for `chunk_rescorer`:
95+
96+
`size`
97+
: (Optional, `int`)
98+
99+
The number of chunks to pass to the reranker. Defaults to `1`.
100+
101+
`chunking_settings`
102+
: (Optional, `object`)
103+
104+
Settings for chunking text into smaller passages for scoring and reranking. Defaults to the optimal chunking settings for [Elastic Rerank](docs-content:///explore-analyze/machine-learning/nlp/ml-nlp-rerank.md). Refer to the [Inference API documentation](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put#operation-inference-put-body-application-json-chunking_settings) for valid values for `chunking_settings`.
105+
:::{warning}
106+
If you configure chunks larger than the reranker's token limit, the results may be truncated. This can degrade relevance significantly.
107+
:::
89108

90109

91110
## Example: Elastic Rerank [text-similarity-reranker-retriever-example-elastic-rerank]

modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/histogram/AutoDateHistogramAggregator.java

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,7 @@
88
*/
99
package org.elasticsearch.aggregations.bucket.histogram;
1010

11-
import org.apache.lucene.index.DocValues;
12-
import org.apache.lucene.index.NumericDocValues;
13-
import org.apache.lucene.index.SortedNumericDocValues;
11+
import org.apache.lucene.search.LongValues;
1412
import org.apache.lucene.search.ScoreMode;
1513
import org.apache.lucene.util.CollectionUtil;
1614
import org.elasticsearch.aggregations.bucket.histogram.AutoDateHistogramAggregationBuilder.RoundingInfo;
@@ -19,6 +17,7 @@
1917
import org.elasticsearch.common.util.IntArray;
2018
import org.elasticsearch.common.util.LongArray;
2119
import org.elasticsearch.core.Releasables;
20+
import org.elasticsearch.index.fielddata.SortedNumericLongValues;
2221
import org.elasticsearch.search.DocValueFormat;
2322
import org.elasticsearch.search.aggregations.AggregationExecutionContext;
2423
import org.elasticsearch.search.aggregations.Aggregator;
@@ -125,17 +124,17 @@ public final DeferringBucketCollector buildDeferringCollector() {
125124
return deferringCollector;
126125
}
127126

128-
protected abstract LeafBucketCollector getLeafCollector(SortedNumericDocValues values, LeafBucketCollector sub) throws IOException;
127+
protected abstract LeafBucketCollector getLeafCollector(SortedNumericLongValues values, LeafBucketCollector sub) throws IOException;
129128

130-
protected abstract LeafBucketCollector getLeafCollector(NumericDocValues values, LeafBucketCollector sub) throws IOException;
129+
protected abstract LeafBucketCollector getLeafCollector(LongValues values, LeafBucketCollector sub) throws IOException;
131130

132131
@Override
133132
public final LeafBucketCollector getLeafCollector(AggregationExecutionContext aggCtx, LeafBucketCollector sub) throws IOException {
134133
if (valuesSource == null) {
135134
return LeafBucketCollector.NO_OP_COLLECTOR;
136135
}
137-
final SortedNumericDocValues values = valuesSource.longValues(aggCtx.getLeafReaderContext());
138-
final NumericDocValues singleton = DocValues.unwrapSingleton(values);
136+
final SortedNumericLongValues values = valuesSource.longValues(aggCtx.getLeafReaderContext());
137+
final LongValues singleton = SortedNumericLongValues.unwrapSingleton(values);
139138
return singleton != null ? getLeafCollector(singleton, sub) : getLeafCollector(values, sub);
140139
}
141140

@@ -239,7 +238,7 @@ private static class FromSingle extends AutoDateHistogramAggregator {
239238
}
240239

241240
@Override
242-
protected LeafBucketCollector getLeafCollector(SortedNumericDocValues values, LeafBucketCollector sub) {
241+
protected LeafBucketCollector getLeafCollector(SortedNumericLongValues values, LeafBucketCollector sub) {
243242
return new LeafBucketCollectorBase(sub, values) {
244243
@Override
245244
public void collect(int doc, long owningBucketOrd) throws IOException {
@@ -265,7 +264,7 @@ public void collect(int doc, long owningBucketOrd) throws IOException {
265264
}
266265

267266
@Override
268-
protected LeafBucketCollector getLeafCollector(NumericDocValues values, LeafBucketCollector sub) {
267+
protected LeafBucketCollector getLeafCollector(LongValues values, LeafBucketCollector sub) {
269268
return new LeafBucketCollectorBase(sub, values) {
270269
@Override
271270
public void collect(int doc, long owningBucketOrd) throws IOException {
@@ -461,7 +460,7 @@ private static class FromMany extends AutoDateHistogramAggregator {
461460
}
462461

463462
@Override
464-
protected LeafBucketCollector getLeafCollector(SortedNumericDocValues values, LeafBucketCollector sub) {
463+
protected LeafBucketCollector getLeafCollector(SortedNumericLongValues values, LeafBucketCollector sub) {
465464
return new LeafBucketCollectorBase(sub, values) {
466465
@Override
467466
public void collect(int doc, long owningBucketOrd) throws IOException {
@@ -487,7 +486,7 @@ public void collect(int doc, long owningBucketOrd) throws IOException {
487486
}
488487

489488
@Override
490-
protected LeafBucketCollector getLeafCollector(NumericDocValues values, LeafBucketCollector sub) {
489+
protected LeafBucketCollector getLeafCollector(LongValues values, LeafBucketCollector sub) {
491490
return new LeafBucketCollectorBase(sub, values) {
492491
@Override
493492
public void collect(int doc, long owningBucketOrd) throws IOException {

modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/TimeSeriesAggregator.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,12 @@
99

1010
package org.elasticsearch.aggregations.bucket.timeseries;
1111

12-
import org.apache.lucene.index.SortedNumericDocValues;
1312
import org.apache.lucene.util.BytesRef;
1413
import org.elasticsearch.common.util.LongArray;
1514
import org.elasticsearch.common.util.ObjectArray;
1615
import org.elasticsearch.core.Releasables;
1716
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
17+
import org.elasticsearch.index.fielddata.SortedNumericLongValues;
1818
import org.elasticsearch.index.mapper.RoutingPathFields;
1919
import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper;
2020
import org.elasticsearch.search.aggregations.AggregationExecutionContext;
@@ -121,7 +121,7 @@ protected LeafBucketCollector getLeafCollector(AggregationExecutionContext aggCt
121121
for (var entry : dimensionValueSources.entrySet()) {
122122
String fieldName = entry.getKey();
123123
if (entry.getValue() instanceof ValuesSource.Numeric numericVS) {
124-
SortedNumericDocValues docValues = numericVS.longValues(aggCtx.getLeafReaderContext());
124+
SortedNumericLongValues docValues = numericVS.longValues(aggCtx.getLeafReaderContext());
125125
dimensionConsumers.put(entry.getKey(), (docId, tsidBuilder) -> {
126126
if (docValues.advanceExact(docId)) {
127127
assert docValues.docValueCount() == 1 : "Dimension field cannot be a multi-valued field";

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,8 @@
99

1010
package org.elasticsearch.index.mapper.extras;
1111

12-
import org.apache.lucene.index.DocValues;
1312
import org.apache.lucene.index.LeafReaderContext;
14-
import org.apache.lucene.index.NumericDocValues;
15-
import org.apache.lucene.index.SortedNumericDocValues;
13+
import org.apache.lucene.search.LongValues;
1614
import org.apache.lucene.search.Query;
1715
import org.elasticsearch.common.Explicit;
1816
import org.elasticsearch.common.settings.Setting;
@@ -28,6 +26,7 @@
2826
import org.elasticsearch.index.fielddata.LeafNumericFieldData;
2927
import org.elasticsearch.index.fielddata.NumericDoubleValues;
3028
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
29+
import org.elasticsearch.index.fielddata.SortedNumericLongValues;
3130
import org.elasticsearch.index.fielddata.SourceValueFetcherSortedDoubleIndexFieldData;
3231
import org.elasticsearch.index.fielddata.plain.LeafDoubleFieldData;
3332
import org.elasticsearch.index.fielddata.plain.SortedNumericIndexFieldData;
@@ -831,8 +830,8 @@ public void close() {
831830

832831
@Override
833832
public SortedNumericDoubleValues getDoubleValues() {
834-
final SortedNumericDocValues values = scaledFieldData.getLongValues();
835-
final NumericDocValues singleValues = DocValues.unwrapSingleton(values);
833+
final SortedNumericLongValues values = scaledFieldData.getLongValues();
834+
final LongValues singleValues = SortedNumericLongValues.unwrapSingleton(values);
836835
if (singleValues != null) {
837836
return FieldData.singleton(new NumericDoubleValues() {
838837
@Override

muted-tests.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -591,9 +591,6 @@ tests:
591591
- class: org.elasticsearch.xpack.esql.qa.single_node.GenerativeForkIT
592592
method: test {csv-spec:union_types.MultiIndexTsLongStatsDrop}
593593
issue: https://github.com/elastic/elasticsearch/issues/136113
594-
- class: org.elasticsearch.xpack.esql.ccq.MultiClusterSpecIT
595-
method: test {csv-spec:lookup-join-expression.LookupMultiColTwoExprAndNoMatch}
596-
issue: https://github.com/elastic/elasticsearch/issues/136121
597594
- class: org.elasticsearch.xpack.esql.qa.single_node.GenerativeForkIT
598595
method: test {csv-spec:bucket.BucketByWeekInString}
599596
issue: https://github.com/elastic/elasticsearch/issues/136136

0 commit comments

Comments
 (0)