Skip to content

Commit 4003de4

Browse files
authored
Merge branch 'main' into native_vector_ops
2 parents e1a4d77 + bd220a5 commit 4003de4

File tree

16 files changed

+170
-75
lines changed

16 files changed

+170
-75
lines changed

docs/changelog/130531.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 130531
2+
summary: Adding check for `isIndexed` in text fields when generating field exists
3+
queries to avoid ISE when field is stored but not indexed or with `doc_values`
4+
area: Analysis
5+
type: bug
6+
issues: []

muted-tests.yml

Lines changed: 9 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -290,9 +290,6 @@ tests:
290290
- class: org.elasticsearch.packaging.test.DockerTests
291291
method: test023InstallPluginUsingConfigFile
292292
issue: https://github.com/elastic/elasticsearch/issues/126145
293-
- class: org.elasticsearch.search.SearchWithRejectionsIT
294-
method: testOpenContextsAfterRejections
295-
issue: https://github.com/elastic/elasticsearch/issues/126340
296293
- class: org.elasticsearch.smoketest.MlWithSecurityIT
297294
method: test {yaml=ml/start_data_frame_analytics/Test start classification analysis when the dependent variable cardinality is too low}
298295
issue: https://github.com/elastic/elasticsearch/issues/123200
@@ -419,9 +416,6 @@ tests:
419416
- class: org.elasticsearch.packaging.test.TemporaryDirectoryConfigTests
420417
method: test21AcceptsCustomPathInDocker
421418
issue: https://github.com/elastic/elasticsearch/issues/128114
422-
- class: org.elasticsearch.xpack.ml.integration.InferenceIngestIT
423-
method: testPipelineIngestWithModelAliases
424-
issue: https://github.com/elastic/elasticsearch/issues/128417
425419
- class: org.elasticsearch.xpack.search.CrossClusterAsyncSearchIT
426420
method: testCCSClusterDetailsWhereAllShardsSkippedInCanMatch
427421
issue: https://github.com/elastic/elasticsearch/issues/128418
@@ -449,9 +443,6 @@ tests:
449443
- class: org.elasticsearch.packaging.test.DockerTests
450444
method: test150MachineDependentHeap
451445
issue: https://github.com/elastic/elasticsearch/issues/128120
452-
- class: org.elasticsearch.xpack.inference.InferenceGetServicesIT
453-
method: testGetServicesWithCompletionTaskType
454-
issue: https://github.com/elastic/elasticsearch/issues/128952
455446
- class: org.elasticsearch.packaging.test.DockerTests
456447
method: test073RunEsAsDifferentUserAndGroupWithoutBindMounting
457448
issue: https://github.com/elastic/elasticsearch/issues/128996
@@ -528,12 +519,6 @@ tests:
528519
method: "builds distribution from branches via archives extractedAssemble [bwcDistVersion: 8.2.1, bwcProject: bugfix, expectedAssembleTaskName:
529520
extractedAssemble, #2]"
530521
issue: https://github.com/elastic/elasticsearch/issues/119871
531-
- class: org.elasticsearch.xpack.inference.qa.mixed.CohereServiceMixedIT
532-
method: testRerank
533-
issue: https://github.com/elastic/elasticsearch/issues/130009
534-
- class: org.elasticsearch.xpack.inference.qa.mixed.CohereServiceMixedIT
535-
method: testCohereEmbeddings
536-
issue: https://github.com/elastic/elasticsearch/issues/130010
537522
- class: geoip.GeoIpMultiProjectIT
538523
issue: https://github.com/elastic/elasticsearch/issues/130073
539524
- class: org.elasticsearch.xpack.esql.action.EnrichIT
@@ -545,9 +530,6 @@ tests:
545530
- class: org.elasticsearch.compute.aggregation.TopIntAggregatorFunctionTests
546531
method: testManyInitialManyPartialFinalRunnerThrowing
547532
issue: https://github.com/elastic/elasticsearch/issues/130145
548-
- class: org.elasticsearch.index.codec.vectors.cluster.KMeansLocalTests
549-
method: testKMeansNeighbors
550-
issue: https://github.com/elastic/elasticsearch/issues/130258
551533
- class: org.elasticsearch.xpack.test.rest.XPackRestIT
552534
method: test {p0=esql/10_basic/basic with documents_found}
553535
issue: https://github.com/elastic/elasticsearch/issues/130256
@@ -575,30 +557,18 @@ tests:
575557
- class: org.elasticsearch.test.rest.yaml.RcsCcsCommonYamlTestSuiteIT
576558
method: test {p0=msearch/20_typed_keys/Multisearch test with typed_keys parameter for sampler and significant terms}
577559
issue: https://github.com/elastic/elasticsearch/issues/130472
578-
- class: org.elasticsearch.index.codec.vectors.cluster.HierarchicalKMeansTests
579-
method: testHKmeans
580-
issue: https://github.com/elastic/elasticsearch/issues/130497
581560
- class: org.elasticsearch.xpack.esql.action.EsqlActionBreakerIT
582561
method: testProjectWhere
583562
issue: https://github.com/elastic/elasticsearch/issues/130504
584563
- class: org.elasticsearch.xpack.esql.action.EsqlActionBreakerIT
585564
method: testTopNPushedToLucene
586565
issue: https://github.com/elastic/elasticsearch/issues/130505
587-
- class: org.elasticsearch.xpack.logsdb.LogsdbTestSuiteIT
588-
method: test {yaml=/52_esql_insist_operator_synthetic_source/FROM with INSIST_🐔and LIMIT 1}
589-
issue: https://github.com/elastic/elasticsearch/issues/130506
590-
- class: org.elasticsearch.xpack.logsdb.LogsdbTestSuiteIT
591-
method: test {yaml=/52_esql_insist_operator_synthetic_source/FROM with INSIST_🐔}
592-
issue: https://github.com/elastic/elasticsearch/issues/130507
593566
- class: org.elasticsearch.common.ssl.DefaultJdkTrustConfigTests
594567
method: testGetSystemTrustStoreWithNoSystemProperties
595568
issue: https://github.com/elastic/elasticsearch/issues/130517
596569
- class: org.elasticsearch.common.ssl.DefaultJdkTrustConfigTests
597570
method: testGetNonPKCS11TrustStoreWithPasswordSet
598571
issue: https://github.com/elastic/elasticsearch/issues/130519
599-
- class: org.elasticsearch.repositories.blobstore.BlobStoreCorruptionIT
600-
method: testCorruptionDetection
601-
issue: https://github.com/elastic/elasticsearch/issues/130536
602572
- class: org.elasticsearch.multiproject.test.CoreWithMultipleProjectsClientYamlTestSuiteIT
603573
method: test {yaml=indices.resolve_index/10_basic_resolve_index/Resolve index with hidden and closed indices}
604574
issue: https://github.com/elastic/elasticsearch/issues/130568
@@ -632,6 +602,15 @@ tests:
632602
- class: org.elasticsearch.xpack.monitoring.exporter.http.HttpExporterTests
633603
method: testCreateRestClient
634604
issue: https://github.com/elastic/elasticsearch/issues/130600
605+
- class: org.elasticsearch.xpack.esql.qa.single_node.GenerativeIT
606+
method: test
607+
issue: https://github.com/elastic/elasticsearch/issues/130067
608+
- class: org.elasticsearch.index.codec.vectors.cluster.HierarchicalKMeansTests
609+
method: testHKmeans
610+
issue: https://github.com/elastic/elasticsearch/issues/130497
611+
- class: org.elasticsearch.backwards.MixedClusterClientYamlTestSuiteIT
612+
method: test {p0=search.vectors/40_knn_search/Dimensions are dynamically set}
613+
issue: https://github.com/elastic/elasticsearch/issues/130626
635614

636615
# Examples:
637616
#

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/160_exists_query.yml

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ setup:
4545
type: keyword
4646
text:
4747
type: text
48+
text_stored_not_indexed:
49+
type: text
50+
store: true
51+
index: false
4852

4953
- do:
5054
headers:
@@ -70,6 +74,7 @@ setup:
7074
inner1: "foo"
7175
inner2: "bar"
7276
text: "foo bar"
77+
text_stored_not_indexed: "foo bar"
7378

7479
- do:
7580
headers:
@@ -94,6 +99,7 @@ setup:
9499
object:
95100
inner1: "foo"
96101
text: "foo bar"
102+
text_stored_not_indexed: "foo bar"
97103

98104
- do:
99105
headers:
@@ -119,6 +125,7 @@ setup:
119125
object:
120126
inner2: "bar"
121127
text: "foo bar"
128+
text_stored_not_indexed: "foo bar"
122129

123130
- do:
124131
index:
@@ -184,6 +191,12 @@ setup:
184191
doc_values: false
185192
text:
186193
type: text
194+
keyword_stored_norms_not_indexed:
195+
type: keyword
196+
doc_values: false
197+
index: false
198+
store: true
199+
norms: true
187200

188201
- do:
189202
headers:
@@ -209,6 +222,7 @@ setup:
209222
inner1: "foo"
210223
inner2: "bar"
211224
text: "foo bar"
225+
keyword_stored_norms_not_indexed: "foo bar"
212226

213227
- do:
214228
headers:
@@ -233,6 +247,7 @@ setup:
233247
object:
234248
inner1: "foo"
235249
text: "foo bar"
250+
keyword_stored_norms_not_indexed: "foo bar"
236251

237252
- do:
238253
headers:
@@ -258,6 +273,7 @@ setup:
258273
object:
259274
inner2: "bar"
260275
text: "foo bar"
276+
keyword_stored_norms_not_indexed: "foo bar"
261277

262278
- do:
263279
index:
@@ -1268,3 +1284,48 @@ setup:
12681284
field: text
12691285

12701286
- match: {hits.total: 1}
1287+
1288+
---
1289+
"Test exists query on text field with no dv, that is stored but not indexed":
1290+
- requires:
1291+
capabilities:
1292+
- method: POST
1293+
path: /_search
1294+
capabilities: [ field_exists_query_for_text_fields_no_index_or_dv ]
1295+
test_runner_features: capabilities
1296+
reason: "Before the fix, this query would throw an ISE because the field is not indexed and has no doc values."
1297+
1298+
- do:
1299+
search:
1300+
rest_total_hits_as_int: true
1301+
index: test
1302+
body:
1303+
query:
1304+
exists:
1305+
field: text_stored_not_indexed
1306+
1307+
# this should not throw, but rather return 0 hits, as the field is not indexed nor it has doc values
1308+
- match: {hits.total: 0}
1309+
1310+
1311+
---
1312+
"Test exists query on keyword field with no dv, that is stored, with norms, but not indexed":
1313+
- requires:
1314+
capabilities:
1315+
- method: POST
1316+
path: /_search
1317+
capabilities: [ field_exists_query_for_text_fields_no_index_or_dv ]
1318+
test_runner_features: capabilities
1319+
reason: "Before the fix, this query would throw an ISE because the field is not indexed and has no doc values."
1320+
1321+
- do:
1322+
search:
1323+
rest_total_hits_as_int: true
1324+
index: test-no-dv
1325+
body:
1326+
query:
1327+
exists:
1328+
field: keyword_stored_norms_not_indexed
1329+
1330+
# this should not throw, but rather return 0 hits, as the field is not indexed nor it has doc values
1331+
- match: {hits.total: 0}

server/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/BlobStoreCorruptionIT.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ public void testCorruptionDetection() throws Exception {
9090
"fallback message",
9191
"org.elasticsearch.repositories.blobstore.BlobStoreRepository",
9292
Level.ERROR,
93-
"index [*] shard generation [*] in ["
93+
"index [*] shard generation [*] in [default/"
9494
+ repositoryName
9595
+ "][*] not found - falling back to reading all shard snapshots"
9696
)
@@ -100,7 +100,9 @@ public void testCorruptionDetection() throws Exception {
100100
"shard blobs list",
101101
"org.elasticsearch.repositories.blobstore.BlobStoreRepository",
102102
Level.ERROR,
103-
"read shard snapshots [*] due to missing shard generation [*] for index [*] in [" + repositoryName + "][*]"
103+
"read shard snapshots [*] due to missing shard generation [*] for index [*] in [default/"
104+
+ repositoryName
105+
+ "][*]"
104106
)
105107
);
106108
client().admin()

server/src/internalClusterTest/java/org/elasticsearch/search/SearchWithRejectionsIT.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ public void testOpenContextsAfterRejections() throws Exception {
5858
}
5959
assertBusy(
6060
() -> assertThat(indicesAdmin().prepareStats().get().getTotal().getSearch().getOpenContexts(), equalTo(0L)),
61-
1,
61+
2,
6262
TimeUnit.SECONDS
6363
);
6464
}

server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/HierarchicalKMeans.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,8 @@ public KMeansResult cluster(FloatVectorValues vectors, int targetSize) throws IO
6868
KMeansIntermediate kMeansIntermediate = clusterAndSplit(vectors, targetSize);
6969
if (kMeansIntermediate.centroids().length > 1 && kMeansIntermediate.centroids().length < vectors.size()) {
7070
int localSampleSize = Math.min(kMeansIntermediate.centroids().length * samplesPerCluster / 2, vectors.size());
71-
KMeansLocal kMeansLocal = new KMeansLocal(localSampleSize, maxIterations, clustersPerNeighborhood, DEFAULT_SOAR_LAMBDA);
72-
kMeansLocal.cluster(vectors, kMeansIntermediate, true);
71+
KMeansLocal kMeansLocal = new KMeansLocal(localSampleSize, maxIterations);
72+
kMeansLocal.cluster(vectors, kMeansIntermediate, clustersPerNeighborhood, DEFAULT_SOAR_LAMBDA);
7373
}
7474

7575
return kMeansIntermediate;

server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/KMeansLocal.java

Lines changed: 30 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -35,18 +35,10 @@ class KMeansLocal {
3535

3636
final int sampleSize;
3737
final int maxIterations;
38-
final int clustersPerNeighborhood;
39-
final float soarLambda;
4038

41-
KMeansLocal(int sampleSize, int maxIterations, int clustersPerNeighborhood, float soarLambda) {
39+
KMeansLocal(int sampleSize, int maxIterations) {
4240
this.sampleSize = sampleSize;
4341
this.maxIterations = maxIterations;
44-
this.clustersPerNeighborhood = clustersPerNeighborhood;
45-
this.soarLambda = soarLambda;
46-
}
47-
48-
KMeansLocal(int sampleSize, int maxIterations) {
49-
this(sampleSize, maxIterations, -1, -1f);
5042
}
5143

5244
/**
@@ -198,8 +190,13 @@ private void computeNeighborhoods(float[][] centers, List<NeighborHood> neighbor
198190
}
199191
}
200192

201-
private int[] assignSpilled(FloatVectorValues vectors, List<NeighborHood> neighborhoods, float[][] centroids, int[] assignments)
202-
throws IOException {
193+
private int[] assignSpilled(
194+
FloatVectorValues vectors,
195+
List<NeighborHood> neighborhoods,
196+
float[][] centroids,
197+
int[] assignments,
198+
float soarLambda
199+
) throws IOException {
203200
// SOAR uses an adjusted distance for assigning spilled documents which is
204201
// given by:
205202
//
@@ -264,6 +261,10 @@ private int[] assignSpilled(FloatVectorValues vectors, List<NeighborHood> neighb
264261
return spilledAssignments;
265262
}
266263

264+
record NeighborHood(int[] neighbors, float maxIntraDistance) {
265+
static final NeighborHood EMPTY = new NeighborHood(new int[0], Float.POSITIVE_INFINITY);
266+
}
267+
267268
/**
268269
* cluster using a lloyd k-means algorithm that is not neighbor aware
269270
*
@@ -274,11 +275,7 @@ private int[] assignSpilled(FloatVectorValues vectors, List<NeighborHood> neighb
274275
* @throws IOException is thrown if vectors is inaccessible
275276
*/
276277
void cluster(FloatVectorValues vectors, KMeansIntermediate kMeansIntermediate) throws IOException {
277-
cluster(vectors, kMeansIntermediate, false);
278-
}
279-
280-
record NeighborHood(int[] neighbors, float maxIntraDistance) {
281-
static final NeighborHood EMPTY = new NeighborHood(new int[0], Float.POSITIVE_INFINITY);
278+
doCluster(vectors, kMeansIntermediate, -1, -1);
282279
}
283280

284281
/**
@@ -290,12 +287,23 @@ record NeighborHood(int[] neighbors, float maxIntraDistance) {
290287
* the prior assignments of the given vectors; care should be taken in
291288
* passing in a valid output object with a centroids array that is the size of centroids expected
292289
* and assignments that are the same size as the vectors. The SOAR assignments are overwritten by this operation.
293-
* @param neighborAware whether nearby neighboring centroids and their vectors should be used to update the centroid positions,
294-
* implies SOAR assignments
295-
* @throws IOException is thrown if vectors is inaccessible
290+
* @param clustersPerNeighborhood number of nearby neighboring centroids to be used to update the centroid positions.
291+
* @param soarLambda lambda used for SOAR assignments
292+
*
293+
* @throws IOException is thrown if vectors is inaccessible or if the clustersPerNeighborhood is less than 2
296294
*/
297-
void cluster(FloatVectorValues vectors, KMeansIntermediate kMeansIntermediate, boolean neighborAware) throws IOException {
295+
void cluster(FloatVectorValues vectors, KMeansIntermediate kMeansIntermediate, int clustersPerNeighborhood, float soarLambda)
296+
throws IOException {
297+
if (clustersPerNeighborhood < 2) {
298+
throw new IllegalArgumentException("clustersPerNeighborhood must be at least 2, got [" + clustersPerNeighborhood + "]");
299+
}
300+
doCluster(vectors, kMeansIntermediate, clustersPerNeighborhood, soarLambda);
301+
}
302+
303+
private void doCluster(FloatVectorValues vectors, KMeansIntermediate kMeansIntermediate, int clustersPerNeighborhood, float soarLambda)
304+
throws IOException {
298305
float[][] centroids = kMeansIntermediate.centroids();
306+
boolean neighborAware = clustersPerNeighborhood != -1 && centroids.length > 1;
299307

300308
List<NeighborHood> neighborhoods = null;
301309
// if there are very few centroids, don't bother with neighborhoods or neighbor aware clustering
@@ -308,11 +316,11 @@ void cluster(FloatVectorValues vectors, KMeansIntermediate kMeansIntermediate, b
308316
computeNeighborhoods(centroids, neighborhoods, clustersPerNeighborhood);
309317
}
310318
cluster(vectors, kMeansIntermediate, neighborhoods);
311-
if (neighborAware && clustersPerNeighborhood > 0) {
319+
if (neighborAware) {
312320
int[] assignments = kMeansIntermediate.assignments();
313321
assert assignments != null;
314322
assert assignments.length == vectors.size();
315-
kMeansIntermediate.setSoarAssignments(assignSpilled(vectors, neighborhoods, centroids, assignments));
323+
kMeansIntermediate.setSoarAssignments(assignSpilled(vectors, neighborhoods, centroids, assignments, soarLambda));
316324
}
317325
}
318326

server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ public Query regexpQuery(
371371
}
372372

373373
public Query existsQuery(SearchExecutionContext context) {
374-
if (hasDocValues() || getTextSearchInfo().hasNorms()) {
374+
if (hasDocValues() || (isIndexed() && getTextSearchInfo().hasNorms())) {
375375
return new FieldExistsQuery(name());
376376
} else {
377377
return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name()));

server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ private SearchCapabilities() {}
5252
private static final String SIGNIFICANT_TERMS_ON_NESTED_FIELDS = "significant_terms_on_nested_fields";
5353
private static final String EXCLUDE_VECTORS_PARAM = "exclude_vectors_param";
5454
private static final String DENSE_VECTOR_UPDATABLE_BBQ = "dense_vector_updatable_bbq";
55+
private static final String FIELD_EXISTS_QUERY_FOR_TEXT_FIELDS_NO_INDEX_OR_DV = "field_exists_query_for_text_fields_no_index_or_dv";
5556

5657
public static final Set<String> CAPABILITIES;
5758
static {
@@ -75,6 +76,7 @@ private SearchCapabilities() {}
7576
capabilities.add(SIGNIFICANT_TERMS_ON_NESTED_FIELDS);
7677
capabilities.add(EXCLUDE_VECTORS_PARAM);
7778
capabilities.add(DENSE_VECTOR_UPDATABLE_BBQ);
79+
capabilities.add(FIELD_EXISTS_QUERY_FOR_TEXT_FIELDS_NO_INDEX_OR_DV);
7880
CAPABILITIES = Set.copyOf(capabilities);
7981
}
8082
}

0 commit comments

Comments
 (0)