Skip to content

Commit 93487b2

Browse files
authored
Merge branch 'main' into reshard_disruption_test
2 parents 2947315 + 79e2e04 commit 93487b2

File tree

21 files changed

+842
-231
lines changed

21 files changed

+842
-231
lines changed

docs/changelog/131056.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 131056
2+
summary: Add existing shards allocator settings to failure store allowed list
3+
area: Data streams
4+
type: bug
5+
issues: []

docs/changelog/131081.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 131081
2+
summary: Fix knn search error when dimensions are not set
3+
area: Vector Search
4+
type: bug
5+
issues:
6+
- 129550

docs/changelog/131111.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 131111
2+
summary: Don't allow field caps to use semantic queries as index filters
3+
area: Search
4+
type: bug
5+
issues: []

docs/reference/query-languages/query-dsl/query-dsl-bool-query.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ A query that matches documents matching boolean combinations of other queries. T
1313
| --- | --- |
1414
| `must` | The clause (query) must appear in matching documents and will contribute to the score. Each query defined under a `must` acts as a logical "AND", returning only documents that match *all* the specified queries. |
1515
| `should` | The clause (query) should appear in the matching document. Each query defined under a `should` acts as a logical "OR", returning documents that match *any* of the specified queries. |
16-
| `filter` | The clause (query) must appear in matching documents. However unlike`must` the score of the query will be ignored. Filter clauses are executedin [filter context](/reference/query-languages/query-dsl/query-filter-context.md), meaning that scoring is ignoredand clauses are considered for caching. Each query defined under a `filter` acts as a logical "AND", returning only documents that match *all* the specified queries. |
17-
| `must_not` | The clause (query) must not appear in the matchingdocuments. Clauses are executed in [filter context](/reference/query-languages/query-dsl/query-filter-context.md) meaningthat scoring is ignored and clauses are considered for caching. Because scoring isignored, a score of `0` for all documents is returned. Each query defined under a `must_not` acts as a logical "NOT", returning only documents that do not match any of the specified queries. |
16+
| `filter` | The clause (query) must appear in matching documents. However unlike `must` the score of the query will be ignored. Filter clauses are executed in [filter context](/reference/query-languages/query-dsl/query-filter-context.md), meaning that scoring is ignored and clauses are considered for caching. Each query defined under a `filter` acts as a logical "AND", returning only documents that match *all* the specified queries. |
17+
| `must_not` | The clause (query) must not appear in the matching documents. Clauses are executed in [filter context](/reference/query-languages/query-dsl/query-filter-context.md) meaning that scoring is ignored and clauses are considered for caching. Because scoring is ignored, a score of `0` for all documents is returned. Each query defined under a `must_not` acts as a logical "NOT", returning only documents that do not match any of the specified queries. |
1818

1919
The `must` and `should` clauses function as logical AND, OR operators, contributing to the scoring of results. However, these results are not cached, which means repeated queries won't benefit from faster retrieval. In contrast, the `filter` and `must_not` clauses are used to include or exclude results without impacting the score, unless used within a `constant_score` query.
2020

docs/reference/query-languages/query-dsl/query-dsl-knn-query.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,6 @@ POST my-image-index/_search
165165

166166
Knn query can be used as a part of hybrid search, where knn query is combined with other lexical queries. For example, the query below finds documents with `title` matching `mountain lake`, and combines them with the top 10 documents that have the closest image vectors to the `query_vector`. The combined documents are then scored and the top 3 top scored documents are returned.
167167

168-
+
169168

170169
```console
171170
POST my-image-index/_search

muted-tests.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -176,9 +176,6 @@ tests:
176176
- class: org.elasticsearch.test.rest.ClientYamlTestSuiteIT
177177
method: test {yaml=snapshot.delete/10_basic/Delete a snapshot asynchronously}
178178
issue: https://github.com/elastic/elasticsearch/issues/122102
179-
- class: org.elasticsearch.smoketest.SmokeTestMonitoringWithSecurityIT
180-
method: testHTTPExporterWithSSL
181-
issue: https://github.com/elastic/elasticsearch/issues/122220
182179
- class: org.elasticsearch.blocks.SimpleBlocksIT
183180
method: testConcurrentAddBlock
184181
issue: https://github.com/elastic/elasticsearch/issues/122324

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/40_knn_search.yml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -670,3 +670,36 @@ setup:
670670
properties:
671671
embedding:
672672
type: dense_vector
673+
674+
675+
---
676+
"Searching with no data dimensions specified":
677+
- requires:
678+
cluster_features: "search.vectors.no_dimensions_bugfix"
679+
reason: "Search with no dimensions bugfix"
680+
681+
- do:
682+
indices.create:
683+
index: empty-test
684+
body:
685+
mappings:
686+
properties:
687+
vector:
688+
type: dense_vector
689+
index: true
690+
691+
- do:
692+
search:
693+
index: empty-test
694+
body:
695+
fields: [ "name" ]
696+
knn:
697+
field: vector
698+
query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ]
699+
k: 3
700+
num_candidates: 3
701+
rescore_vector:
702+
oversample: 1.5
703+
similarity: 0.1
704+
705+
- match: { hits.total.value: 0 }

server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesRequest.java

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,13 @@
1818
import org.elasticsearch.common.Strings;
1919
import org.elasticsearch.common.io.stream.StreamInput;
2020
import org.elasticsearch.common.io.stream.StreamOutput;
21+
import org.elasticsearch.index.query.BoolQueryBuilder;
22+
import org.elasticsearch.index.query.BoostingQueryBuilder;
23+
import org.elasticsearch.index.query.ConstantScoreQueryBuilder;
24+
import org.elasticsearch.index.query.DisMaxQueryBuilder;
25+
import org.elasticsearch.index.query.NestedQueryBuilder;
2126
import org.elasticsearch.index.query.QueryBuilder;
27+
import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
2228
import org.elasticsearch.tasks.CancellableTask;
2329
import org.elasticsearch.tasks.Task;
2430
import org.elasticsearch.tasks.TaskId;
@@ -268,9 +274,53 @@ public ActionRequestValidationException validate() {
268274
if (fields == null || fields.length == 0) {
269275
validationException = ValidateActions.addValidationError("no fields specified", validationException);
270276
}
277+
278+
// Band-aid fix for https://github.com/elastic/elasticsearch/issues/116106.
279+
// Semantic queries are high-recall queries, making them poor filters and effectively the same as an exists query when used in that
280+
// context.
281+
if (containsSemanticQuery(indexFilter)) {
282+
validationException = ValidateActions.addValidationError(
283+
"index filter cannot contain semantic queries. Use an exists query instead.",
284+
validationException
285+
);
286+
}
287+
271288
return validationException;
272289
}
273290

291+
/**
292+
* Recursively checks if a query builder contains any semantic queries
293+
*/
294+
private static boolean containsSemanticQuery(QueryBuilder queryBuilder) {
295+
boolean containsSemanticQuery = false;
296+
297+
if (queryBuilder == null) {
298+
return containsSemanticQuery;
299+
}
300+
301+
if ("semantic".equals(queryBuilder.getWriteableName())) {
302+
containsSemanticQuery = true;
303+
} else if (queryBuilder instanceof BoolQueryBuilder boolQuery) {
304+
containsSemanticQuery = boolQuery.must().stream().anyMatch(FieldCapabilitiesRequest::containsSemanticQuery)
305+
|| boolQuery.mustNot().stream().anyMatch(FieldCapabilitiesRequest::containsSemanticQuery)
306+
|| boolQuery.should().stream().anyMatch(FieldCapabilitiesRequest::containsSemanticQuery)
307+
|| boolQuery.filter().stream().anyMatch(FieldCapabilitiesRequest::containsSemanticQuery);
308+
} else if (queryBuilder instanceof DisMaxQueryBuilder disMaxQuery) {
309+
containsSemanticQuery = disMaxQuery.innerQueries().stream().anyMatch(FieldCapabilitiesRequest::containsSemanticQuery);
310+
} else if (queryBuilder instanceof NestedQueryBuilder nestedQuery) {
311+
containsSemanticQuery = containsSemanticQuery(nestedQuery.query());
312+
} else if (queryBuilder instanceof BoostingQueryBuilder boostingQuery) {
313+
containsSemanticQuery = containsSemanticQuery(boostingQuery.positiveQuery())
314+
|| containsSemanticQuery(boostingQuery.negativeQuery());
315+
} else if (queryBuilder instanceof ConstantScoreQueryBuilder constantScoreQuery) {
316+
containsSemanticQuery = containsSemanticQuery(constantScoreQuery.innerQuery());
317+
} else if (queryBuilder instanceof FunctionScoreQueryBuilder functionScoreQuery) {
318+
containsSemanticQuery = containsSemanticQuery(functionScoreQuery.query());
319+
}
320+
321+
return containsSemanticQuery;
322+
}
323+
274324
@Override
275325
public boolean equals(Object o) {
276326
if (this == o) return true;

server/src/main/java/org/elasticsearch/cluster/metadata/DataStreamFailureStoreDefinition.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
package org.elasticsearch.cluster.metadata;
1111

1212
import org.elasticsearch.cluster.routing.allocation.DataTier;
13+
import org.elasticsearch.cluster.routing.allocation.ExistingShardsAllocator;
1314
import org.elasticsearch.common.compress.CompressedXContent;
1415
import org.elasticsearch.common.settings.Setting;
1516
import org.elasticsearch.common.settings.Settings;
@@ -40,7 +41,9 @@ public class DataStreamFailureStoreDefinition {
4041
IndexMetadata.SETTING_NUMBER_OF_SHARDS,
4142
IndexMetadata.SETTING_NUMBER_OF_REPLICAS,
4243
IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS,
43-
IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey()
44+
IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(),
45+
// Different recovery implementations may be provided on the index which need to be preserved.
46+
ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_SETTING.getKey()
4447
);
4548
public static final Set<String> SUPPORTED_USER_SETTINGS_PREFIXES = Set.of(
4649
IndexMetadata.INDEX_ROUTING_REQUIRE_GROUP_PREFIX + ".",

server/src/main/java/org/elasticsearch/common/util/concurrent/EsExecutors.java

Lines changed: 34 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,7 @@ public void rejectedExecution(Runnable task, ThreadPoolExecutor executor) {
577577
}
578578

579579
public static class TaskTrackingConfig {
580+
// This is a random starting point alpha.
580581
public static final double DEFAULT_EXECUTION_TIME_EWMA_ALPHA_FOR_TEST = 0.3;
581582

582583
private final boolean trackExecutionTime;
@@ -597,17 +598,6 @@ public static class TaskTrackingConfig {
597598
DEFAULT_EXECUTION_TIME_EWMA_ALPHA_FOR_TEST
598599
);
599600

600-
public TaskTrackingConfig(boolean trackOngoingTasks, double executionTimeEWMAAlpha) {
601-
this(true, trackOngoingTasks, false, executionTimeEWMAAlpha);
602-
}
603-
604-
/**
605-
* Execution tracking enabled constructor, with extra options to enable further specialized tracking.
606-
*/
607-
public TaskTrackingConfig(boolean trackOngoingTasks, boolean trackMaxQueueLatency, double executionTimeEwmaAlpha) {
608-
this(true, trackOngoingTasks, trackMaxQueueLatency, executionTimeEwmaAlpha);
609-
}
610-
611601
/**
612602
* @param trackExecutionTime Whether to track execution stats
613603
* @param trackOngoingTasks Whether to track ongoing task execution time, not just finished tasks
@@ -641,6 +631,39 @@ public boolean trackMaxQueueLatency() {
641631
public double getExecutionTimeEwmaAlpha() {
642632
return executionTimeEwmaAlpha;
643633
}
634+
635+
public static Builder builder() {
636+
return new Builder();
637+
}
638+
639+
public static class Builder {
640+
private boolean trackExecutionTime = false;
641+
private boolean trackOngoingTasks = false;
642+
private boolean trackMaxQueueLatency = false;
643+
private double ewmaAlpha = DEFAULT_EXECUTION_TIME_EWMA_ALPHA_FOR_TEST;
644+
645+
public Builder() {}
646+
647+
public Builder trackExecutionTime(double alpha) {
648+
trackExecutionTime = true;
649+
ewmaAlpha = alpha;
650+
return this;
651+
}
652+
653+
public Builder trackOngoingTasks() {
654+
trackOngoingTasks = true;
655+
return this;
656+
}
657+
658+
public Builder trackMaxQueueLatency() {
659+
trackMaxQueueLatency = true;
660+
return this;
661+
}
662+
663+
public TaskTrackingConfig build() {
664+
return new TaskTrackingConfig(trackExecutionTime, trackOngoingTasks, trackMaxQueueLatency, ewmaAlpha);
665+
}
666+
}
644667
}
645668

646669
}

0 commit comments

Comments
 (0)