Skip to content

Commit a47b90f

Browse files
committed
Merge branch 'main' of github.com:ankit--sethi/elasticsearch into bugfix/handle-fork-to-transport-worker
2 parents 874a787 + e411935 commit a47b90f

File tree

405 files changed

+6024
-2032
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

405 files changed

+6024
-2032
lines changed

benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/QueryPlanningBenchmark.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ public void setup() {
119119
}
120120

121121
private LogicalPlan plan(EsqlParser parser, Analyzer analyzer, LogicalPlanOptimizer optimizer, String query) {
122-
var parsed = parser.createStatement(query, new QueryParams(), telemetry, config);
122+
var parsed = parser.createStatement(query, new QueryParams(), telemetry);
123123
var analyzed = analyzer.analyze(parsed);
124124
var optimized = optimizer.optimize(analyzed);
125125
return optimized;

benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,9 @@
4141
import org.elasticsearch.compute.data.LongBlock;
4242
import org.elasticsearch.compute.data.LongVector;
4343
import org.elasticsearch.compute.data.Page;
44+
import org.elasticsearch.compute.lucene.AlwaysReferencedIndexedByShardId;
45+
import org.elasticsearch.compute.lucene.IndexedByShardIdFromSingleton;
4446
import org.elasticsearch.compute.lucene.LuceneSourceOperator;
45-
import org.elasticsearch.compute.lucene.ShardRefCounted;
4647
import org.elasticsearch.compute.lucene.read.ValuesSourceReaderOperator;
4748
import org.elasticsearch.compute.lucene.read.ValuesSourceReaderOperatorStatus;
4849
import org.elasticsearch.compute.operator.topn.TopNOperator;
@@ -368,7 +369,7 @@ public void benchmark() {
368369
blockFactory,
369370
ByteSizeValue.ofMb(1).getBytes(),
370371
fields(name),
371-
List.of(new ValuesSourceReaderOperator.ShardContext(reader, () -> {
372+
new IndexedByShardIdFromSingleton<>(new ValuesSourceReaderOperator.ShardContext(reader, () -> {
372373
throw new UnsupportedOperationException("can't load _source here");
373374
}, EsqlPlugin.STORED_FIELDS_SEQUENTIAL_PROPORTION.getDefault(Settings.EMPTY))),
374375
0
@@ -538,7 +539,7 @@ private void setupPages() {
538539
pages.add(
539540
new Page(
540541
new DocVector(
541-
ShardRefCounted.ALWAYS_REFERENCED,
542+
AlwaysReferencedIndexedByShardId.INSTANCE,
542543
blockFactory.newConstantIntBlockWith(0, end - begin).asVector(),
543544
blockFactory.newConstantIntBlockWith(ctx.ord, end - begin).asVector(),
544545
docs.build(),
@@ -575,8 +576,7 @@ record ItrAndOrd(PrimitiveIterator.OfInt itr, int ord) {}
575576
pages.add(
576577
new Page(
577578
new DocVector(
578-
579-
ShardRefCounted.ALWAYS_REFERENCED,
579+
AlwaysReferencedIndexedByShardId.INSTANCE,
580580
blockFactory.newConstantIntVector(0, size),
581581
leafs.build(),
582582
docs.build(),
@@ -594,7 +594,7 @@ record ItrAndOrd(PrimitiveIterator.OfInt itr, int ord) {}
594594
pages.add(
595595
new Page(
596596
new DocVector(
597-
ShardRefCounted.ALWAYS_REFERENCED,
597+
AlwaysReferencedIndexedByShardId.INSTANCE,
598598
blockFactory.newConstantIntBlockWith(0, size).asVector(),
599599
leafs.build().asBlock().asVector(),
600600
docs.build(),
@@ -621,8 +621,7 @@ record ItrAndOrd(PrimitiveIterator.OfInt itr, int ord) {}
621621
pages.add(
622622
new Page(
623623
new DocVector(
624-
625-
ShardRefCounted.ALWAYS_REFERENCED,
624+
AlwaysReferencedIndexedByShardId.INSTANCE,
626625
blockFactory.newConstantIntVector(0, 1),
627626
blockFactory.newConstantIntVector(next.ord, 1),
628627
blockFactory.newConstantIntVector(next.itr.nextInt(), 1),

benchmarks/src/main/java/org/elasticsearch/benchmark/script/ScriptScoreBenchmark.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
import org.apache.lucene.index.IndexWriter;
1616
import org.apache.lucene.index.IndexWriterConfig;
1717
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
18-
import org.apache.lucene.index.SortedNumericDocValues;
1918
import org.apache.lucene.search.IndexSearcher;
2019
import org.apache.lucene.search.MatchAllDocsQuery;
2120
import org.apache.lucene.search.Query;
@@ -29,6 +28,7 @@
2928
import org.elasticsearch.index.fielddata.FieldDataContext;
3029
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
3130
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
31+
import org.elasticsearch.index.fielddata.SortedNumericLongValues;
3232
import org.elasticsearch.index.mapper.IndexType;
3333
import org.elasticsearch.index.mapper.MappedFieldType;
3434
import org.elasticsearch.index.mapper.MappingLookup;
@@ -179,14 +179,14 @@ private ScoreScript.Factory bareMetalScript() {
179179
return new ScoreScript.LeafFactory() {
180180
@Override
181181
public ScoreScript newInstance(DocReader docReader) throws IOException {
182-
SortedNumericDocValues values = ifd.load(((DocValuesDocReader) docReader).getLeafReaderContext()).getLongValues();
182+
SortedNumericLongValues values = ifd.load(((DocValuesDocReader) docReader).getLeafReaderContext()).getLongValues();
183183
return new ScoreScript(params, null, docReader) {
184184
private int docId;
185185

186186
@Override
187187
public double execute(ExplanationHolder explanation) {
188188
try {
189-
values.advance(docId);
189+
values.advanceExact(docId);
190190
if (values.docValueCount() != 1) {
191191
throw new IllegalArgumentException("script only works when there is exactly one value");
192192
}

docs/changelog/132757.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 132757
2+
summary: Late materialization after TopN (Node level)
3+
area: ES|QL
4+
type: feature
5+
issues: []

docs/changelog/136265.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 136265
2+
summary: Adding `match_only_text` subfield to `*.display_name` fields in `ecs@mappings` to be compliant with the latest additions in ECS
3+
area: Data streams
4+
type: feature
5+
issues: []

docs/reference/elasticsearch/mapping-reference/dense-vector.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,10 @@ To retrieve vector values explicitly, you can use:
133133
}
134134
```
135135

136+
:::{tip}
137+
For more context about the decision to exclude vectors from `_source` by default, read the [blog post](https://www.elastic.co/search-labs/blog/elasticsearch-exclude-vectors-from-source).
138+
:::
139+
136140
### Storage behavior and `_source`
137141

138142
By default, `dense_vector` fields are **not stored in `_source`** on disk. This is also controlled by the index setting `index.mapping.exclude_source_vectors`.

docs/reference/elasticsearch/mapping-reference/semantic-text.md

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,27 @@ POST test-index/_search
658658
This will return verbose chunked embeddings content that is used to perform
659659
semantic search for `semantic_text` fields.
660660

661+
## Cross-cluster search (CCS) [ccs]
662+
```{applies_to}
663+
stack: ga 9.2
664+
serverless: unavailable
665+
```
666+
667+
`semantic_text` supports [Cross-Cluster Search (CCS)](docs-content://solutions/search/cross-cluster-search.md) through the [`_search` endpoint](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search)
668+
when [`ccs_minimize_roundtrips`](docs-content://solutions/search/cross-cluster-search.md#ccs-network-delays) is set to `true`.
669+
This is the default value for `ccs_minimize_roundtrips`, so most CCS queries should work automatically:
670+
671+
```console
672+
POST local-index,remote-cluster:remote-index/_search
673+
{
674+
"query": {
675+
"match": {
676+
"my_semantic_field": "Which country is Paris in?"
677+
}
678+
}
679+
}
680+
```
681+
661682
## Limitations [limitations]
662683

663684
`semantic_text` field types have the following limitations:
@@ -666,5 +687,6 @@ semantic search for `semantic_text` fields.
666687
of [nested fields](/reference/elasticsearch/mapping-reference/nested.md).
667688
* `semantic_text` fields can’t currently be set as part
668689
of [dynamic templates](docs-content://manage-data/data-store/mapping/dynamic-templates.md).
669-
* `semantic_text` fields are not supported with Cross-Cluster Search (CCS) or
670-
Cross-Cluster Replication (CCR).
690+
* `semantic_text` fields do not support [Cross-Cluster Search (CCS)](docs-content://solutions/search/cross-cluster-search.md) when [`ccs_minimize_roundtrips`](docs-content://solutions/search/cross-cluster-search.md#ccs-network-delays) is set to `false`.
691+
* `semantic_text` fields do not support [Cross-Cluster Search (CCS)](docs-content://solutions/search/cross-cluster-search.md) in [ES|QL](/reference/query-languages/esql.md).
692+
* `semantic_text` fields do not support [Cross-Cluster Replication (CCR)](docs-content://deploy-manage/tools/cross-cluster-replication.md).

docs/reference/elasticsearch/mapping-reference/sparse-vector.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,10 @@ POST my-index-2/_search
126126
}
127127
```
128128

129+
:::{tip}
130+
For more context about the decision to exclude vectors from `_source` by default, read the [blog post](https://www.elastic.co/search-labs/blog/elasticsearch-exclude-vectors-from-source).
131+
:::
132+
129133
### Storage behavior and `_source`
130134

131135
By default, `sparse_vector` fields are not stored in `_source` on disk. This is also controlled by the index setting `index.mapping.exclude_source_vectors`.

docs/reference/elasticsearch/rest-apis/retrievers/retrievers-examples.md

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,7 @@ GET /retrievers_example/_search
440440
"query": "artificial intelligence"
441441
}
442442
}
443-
}
443+
}
444444
```
445445

446446
This returns the following response based on the final rrf score for each result.
@@ -497,7 +497,7 @@ GET /retrievers_example/_search
497497
"fields": ["text", "text_semantic"]
498498
}
499499
}
500-
}
500+
}
501501
```
502502

503503
::::{note}
@@ -570,7 +570,7 @@ GET /retrievers_example/_search
570570
"normalizer": "minmax"
571571
}
572572
}
573-
}
573+
}
574574
```
575575

576576
This returns the following response based on the normalized score for each result:
@@ -1503,6 +1503,7 @@ PUT _inference/rerank/my-rerank-model
15031503
```
15041504

15051505
Let’s start by reranking the results of the `rrf` retriever in our previous example.
1506+
We'll also apply a `chunk_rescorer` to ensure that we only consider the best scoring chunks when sending information to the reranker.
15061507

15071508
```console
15081509
GET retrievers_example/_search
@@ -1541,7 +1542,15 @@ GET retrievers_example/_search
15411542
},
15421543
"field": "text",
15431544
"inference_id": "my-rerank-model",
1544-
"inference_text": "What are the state of the art applications of AI in information retrieval?"
1545+
"inference_text": "What are the state of the art applications of AI in information retrieval?",
1546+
"chunk_rescorer": {
1547+
"size": 1,
1548+
"chunking_settings": {
1549+
"strategy": "sentence",
1550+
"max_chunk_size": 300,
1551+
"sentence_overlap": 0
1552+
}
1553+
},
15451554
}
15461555
},
15471556
"_source": false

docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,25 @@ score = ln(score), if score < 0
8686

8787
Applies the specified [boolean query filter](/reference/query-languages/query-dsl/query-dsl-bool-query.md) to the child `retriever`. If the child retriever already specifies any filters, then this top-level filter is applied in conjuction with the filter defined in the child retriever.
8888

89+
`chunk_rescorer` {applies_to}`stack: beta 9.2`
90+
: (Optional, `object`)
91+
92+
Chunks and scores documents based on configured chunking settings, and only sends the best scoring chunks to the reranking model as input. This helps improve relevance when reranking long documents that would otherwise be truncated by the reranking model's token limit.
93+
94+
Parameters for `chunk_rescorer`:
95+
96+
`size`
97+
: (Optional, `int`)
98+
99+
The number of chunks to pass to the reranker. Defaults to `1`.
100+
101+
`chunking_settings`
102+
: (Optional, `object`)
103+
104+
Settings for chunking text into smaller passages for scoring and reranking. Defaults to the optimal chunking settings for [Elastic Rerank](docs-content:///explore-analyze/machine-learning/nlp/ml-nlp-rerank.md). Refer to the [Inference API documentation](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put#operation-inference-put-body-application-json-chunking_settings) for valid values for `chunking_settings`.
105+
:::{warning}
106+
If you configure chunks larger than the reranker's token limit, the results may be truncated. This can degrade relevance significantly.
107+
:::
89108

90109

91110
## Example: Elastic Rerank [text-similarity-reranker-retriever-example-elastic-rerank]

0 commit comments

Comments
 (0)