Skip to content

Commit 4bdec28

Browse files
committed
Merge branch 'main' of github.com:elastic/elasticsearch into knn_patience
2 parents 197bb58 + ed071cc commit 4bdec28

File tree

10 files changed

+95
-65
lines changed

10 files changed

+95
-65
lines changed

.buildkite/hooks/pre-command

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,14 @@ if [[ "${USE_LUCENE_SNAPSHOT_CREDS:-}" == "true" ]]; then
6464
unset data
6565
fi
6666

67+
if [[ "${USE_MAVEN_GPG:-}" == "true" ]]; then
68+
vault_path="kv/ci-shared/release-eng/team-release-secrets/es-delivery/gpg"
69+
ORG_GRADLE_PROJECT_signingKey=$(vault kv get --field="private_key" $vault_path)
70+
ORG_GRADLE_PROJECT_signingPassword=$(vault kv get --field="passphase" $vault_path)
71+
export ORG_GRADLE_PROJECT_signingKey
72+
export ORG_GRADLE_PROJECT_signingPassword
73+
fi
74+
6775
if [[ "${USE_DRA_CREDENTIALS:-}" == "true" ]]; then
6876
DRA_VAULT_ROLE_ID_SECRET=$(vault read -field=role-id secret/ci/elastic-elasticsearch/legacy-vault-credentials)
6977
export DRA_VAULT_ROLE_ID_SECRET

.buildkite/pipelines/dra-workflow.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ steps:
22
- command: .buildkite/scripts/dra-workflow.sh
33
env:
44
USE_DRA_CREDENTIALS: "true"
5+
USE_MAVEN_GPG: "true"
56
USE_PROD_DOCKER_CREDENTIALS: "true"
67
agents:
78
provider: gcp

.buildkite/scripts/run-pr-upgrade-tests.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ fi
1818

1919
# Identify the merge base of the current commit (branch) and the base branch of the pull request.
2020
# PR upgrade tests are run from the merge base to the current commit.
21-
BASE_COMMIT=$(git merge-base $BUILDKITE_PULL_REQUEST_BASE_BRANCH $BUILDKITE_COMMIT)
21+
git fetch origin $BUILDKITE_PULL_REQUEST_BASE_BRANCH
22+
BASE_COMMIT=$(git merge-base origin/$BUILDKITE_PULL_REQUEST_BASE_BRANCH $BUILDKITE_COMMIT)
2223

2324
VERSION=$(sed -n 's/^elasticsearch[[:space:]]*=[[:space:]]*\(.*\)/\1/p' build-tools-internal/version.properties)
2425

build-conventions/src/main/java/org/elasticsearch/gradle/internal/conventions/PublishPlugin.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,11 @@
1010
package org.elasticsearch.gradle.internal.conventions;
1111

1212
import groovy.util.Node;
13+
import nmcp.NmcpPlugin;
1314

1415
import com.github.jengelman.gradle.plugins.shadow.ShadowExtension;
1516
import com.github.jengelman.gradle.plugins.shadow.ShadowPlugin;
1617

17-
import nmcp.NmcpPlugin;
18-
1918
import org.elasticsearch.gradle.internal.conventions.info.GitInfo;
2019
import org.elasticsearch.gradle.internal.conventions.precommit.PomValidationPrecommitPlugin;
2120
import org.elasticsearch.gradle.internal.conventions.util.Util;
@@ -41,6 +40,8 @@
4140
import org.gradle.api.tasks.bundling.Jar;
4241
import org.gradle.initialization.layout.BuildLayout;
4342
import org.gradle.language.base.plugins.LifecycleBasePlugin;
43+
import org.gradle.plugins.signing.SigningExtension;
44+
import org.gradle.plugins.signing.SigningPlugin;
4445
import org.w3c.dom.Element;
4546

4647
import java.io.File;
@@ -69,6 +70,7 @@ public void apply(Project project) {
6970
project.getPluginManager().apply(PomValidationPrecommitPlugin.class);
7071
project.getPluginManager().apply(LicensingPlugin.class);
7172
project.getPluginManager().apply(NmcpPlugin.class);
73+
project.getPluginManager().apply(SigningPlugin.class);
7274
configureJavadocJar(project);
7375
configureSourcesJar(project);
7476
configurePomGeneration(project);
@@ -79,6 +81,13 @@ public void apply(Project project) {
7981
private void configurePublications(Project project) {
8082
var publishingExtension = project.getExtensions().getByType(PublishingExtension.class);
8183
var publication = publishingExtension.getPublications().create("elastic", MavenPublication.class);
84+
Provider<String> signingKey = project.getProviders().gradleProperty("signingKey");
85+
if (signingKey.isPresent()) {
86+
SigningExtension signing = project.getExtensions().getByType(SigningExtension.class);
87+
signing.useInMemoryPgpKeys(signingKey.get(), project.getProviders().gradleProperty("signingPassword").get());
88+
signing.sign(publication);
89+
}
90+
8291
project.afterEvaluate(project1 -> {
8392
if (project1.getPlugins().hasPlugin(ShadowPlugin.class)) {
8493
configureWithShadowPlugin(project1, publication);

docs/reference/query-languages/query-dsl/query-dsl-knn-query.md

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,39 @@ A sample query can look like below:
229229

230230
Note that nested `knn` only supports `score_mode=max`.
231231

232+
## Knn query on a semantic_text field [knn-query-with-semantic-text]
233+
234+
Elasticsearch supports knn queries over a [
235+
`semantic_text` field](/reference/elasticsearch/mapping-reference/semantic-text.md).
236+
237+
Here is an example using the `query_vector_builder`:
238+
239+
```json
240+
{
241+
"query": {
242+
"knn": {
243+
"field": "inference_field",
244+
"k": 10,
245+
"num_candidates": 100,
246+
"query_vector_builder": {
247+
"text_embedding": {
248+
"model_text": "test"
249+
}
250+
}
251+
}
252+
},
253+
"_source": {
254+
"exclude": "inference_field.inference.chunks"
255+
}
256+
}
257+
```
258+
259+
Note that for `semantic_text` fields, the `model_id` does not have to be
260+
provided as it can be inferred from the `semantic_text` field mapping.
261+
262+
Knn search using query vectors over `semantic_text` fields is also supported,
263+
with no change to the API.
264+
232265
## Knn query with aggregations [knn-query-aggregations]
233266

234267
`knn` query calculates aggregations on top `k` documents from each shard. Thus, the final results from aggregations contain `k * number_of_shards` documents. This is different from the [top level knn section](docs-content://solutions/search/vector/knn.md) where aggregations are calculated on the global top `k` nearest documents.

muted-tests.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,11 @@ tests:
569569
- class: org.elasticsearch.compute.aggregation.TopIntAggregatorFunctionTests
570570
method: testManyInitialManyPartialFinalRunnerThrowing
571571
issue: https://github.com/elastic/elasticsearch/issues/130145
572+
- class: org.elasticsearch.xpack.logsdb.patternedtext.PatternedTextFieldMapperTests
573+
issue: https://github.com/elastic/elasticsearch/issues/130162
574+
- class: org.elasticsearch.ingest.geoip.direct.TransportPutDatabaseConfigurationActionTests
575+
method: testValidatePrerequisites
576+
issue: https://github.com/elastic/elasticsearch/issues/130178
572577

573578
# Examples:
574579
#

server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/HierarchicalKMeans.java

Lines changed: 5 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
package org.elasticsearch.index.codec.vectors.cluster;
1111

1212
import org.apache.lucene.index.FloatVectorValues;
13-
import org.apache.lucene.util.VectorUtil;
1413

1514
import java.io.IOException;
1615

@@ -21,7 +20,7 @@ public class HierarchicalKMeans {
2120

2221
static final int MAXK = 128;
2322
static final int MAX_ITERATIONS_DEFAULT = 6;
24-
static final int SAMPLES_PER_CLUSTER_DEFAULT = 256;
23+
static final int SAMPLES_PER_CLUSTER_DEFAULT = 64;
2524
static final float DEFAULT_SOAR_LAMBDA = 1.0f;
2625

2726
final int dimension;
@@ -67,8 +66,7 @@ public KMeansResult cluster(FloatVectorValues vectors, int targetSize) throws IO
6766
// partition the space
6867
KMeansIntermediate kMeansIntermediate = clusterAndSplit(vectors, targetSize);
6968
if (kMeansIntermediate.centroids().length > 1 && kMeansIntermediate.centroids().length < vectors.size()) {
70-
float f = Math.min((float) samplesPerCluster / targetSize, 1.0f);
71-
int localSampleSize = (int) (f * vectors.size());
69+
int localSampleSize = Math.min(kMeansIntermediate.centroids().length * samplesPerCluster, vectors.size());
7270
KMeansLocal kMeansLocal = new KMeansLocal(localSampleSize, maxIterations, clustersPerNeighborhood, DEFAULT_SOAR_LAMBDA);
7371
kMeansLocal.cluster(vectors, kMeansIntermediate, true);
7472
}
@@ -86,42 +84,16 @@ KMeansIntermediate clusterAndSplit(final FloatVectorValues vectors, final int ta
8684

8785
// TODO: instead of creating a sub-cluster assignments reuse the parent array each time
8886
int[] assignments = new int[vectors.size()];
89-
9087
KMeansLocal kmeans = new KMeansLocal(m, maxIterations);
9188
float[][] centroids = KMeansLocal.pickInitialCentroids(vectors, k);
92-
KMeansIntermediate kMeansIntermediate = new KMeansIntermediate(centroids);
89+
KMeansIntermediate kMeansIntermediate = new KMeansIntermediate(centroids, assignments, vectors::ordToDoc);
9390
kmeans.cluster(vectors, kMeansIntermediate);
9491

9592
// TODO: consider adding cluster size counts to the kmeans algo
9693
// handle assignment here so we can track distance and cluster size
9794
int[] centroidVectorCount = new int[centroids.length];
98-
float[][] nextCentroids = new float[centroids.length][dimension];
99-
for (int i = 0; i < vectors.size(); i++) {
100-
float smallest = Float.MAX_VALUE;
101-
int centroidIdx = -1;
102-
float[] vector = vectors.vectorValue(i);
103-
for (int j = 0; j < centroids.length; j++) {
104-
float[] centroid = centroids[j];
105-
float d = VectorUtil.squareDistance(vector, centroid);
106-
if (d < smallest) {
107-
smallest = d;
108-
centroidIdx = j;
109-
}
110-
}
111-
centroidVectorCount[centroidIdx]++;
112-
for (int j = 0; j < dimension; j++) {
113-
nextCentroids[centroidIdx][j] += vector[j];
114-
}
115-
assignments[i] = centroidIdx;
116-
}
117-
118-
// update centroids based on assignments of all vectors
119-
for (int i = 0; i < centroids.length; i++) {
120-
if (centroidVectorCount[i] > 0) {
121-
for (int j = 0; j < dimension; j++) {
122-
centroids[i][j] = nextCentroids[i][j] / centroidVectorCount[i];
123-
}
124-
}
95+
for (int assigment : assignments) {
96+
centroidVectorCount[assigment]++;
12597
}
12698

12799
int effectiveK = 0;
@@ -131,8 +103,6 @@ KMeansIntermediate clusterAndSplit(final FloatVectorValues vectors, final int ta
131103
}
132104
}
133105

134-
kMeansIntermediate = new KMeansIntermediate(centroids, assignments, vectors::ordToDoc);
135-
136106
if (effectiveK == 1) {
137107
return kMeansIntermediate;
138108
}

server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/KMeansIntermediate.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,6 @@ private KMeansIntermediate(float[][] centroids, int[] assignments, IntToIntFunct
3131
this(new float[0][0], new int[0], i -> i, new int[0]);
3232
}
3333

34-
KMeansIntermediate(float[][] centroids) {
35-
this(centroids, new int[0], i -> i, new int[0]);
36-
}
37-
3834
KMeansIntermediate(float[][] centroids, int[] assignments) {
3935
this(centroids, assignments, i -> i, new int[0]);
4036
}

server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/KMeansLocal.java

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -87,17 +87,17 @@ private boolean stepLloyd(
8787

8888
for (int i = 0; i < sampleSize; i++) {
8989
float[] vector = vectors.vectorValue(i);
90-
int[] neighborOffsets = null;
91-
int centroidIdx = -1;
90+
final int assignment = assignments[i];
91+
final int bestCentroidOffset;
9292
if (neighborhoods != null) {
93-
neighborOffsets = neighborhoods.get(assignments[i]);
94-
centroidIdx = assignments[i];
93+
bestCentroidOffset = getBestCentroidFromNeighbours(centroids, vector, assignment, neighborhoods.get(assignment));
94+
} else {
95+
bestCentroidOffset = getBestCentroid(centroids, vector);
9596
}
96-
int bestCentroidOffset = getBestCentroidOffset(centroids, vector, centroidIdx, neighborOffsets);
97-
if (assignments[i] != bestCentroidOffset) {
97+
if (assignment != bestCentroidOffset) {
98+
assignments[i] = bestCentroidOffset;
9899
changed = true;
99100
}
100-
assignments[i] = bestCentroidOffset;
101101
centroidCounts[bestCentroidOffset]++;
102102
for (int d = 0; d < dim; d++) {
103103
nextCentroids[bestCentroidOffset][d] += vector[d];
@@ -116,23 +116,28 @@ private boolean stepLloyd(
116116
return changed;
117117
}
118118

119-
int getBestCentroidOffset(float[][] centroids, float[] vector, int centroidIdx, int[] centroidOffsets) {
119+
int getBestCentroidFromNeighbours(float[][] centroids, float[] vector, int centroidIdx, int[] centroidOffsets) {
120120
int bestCentroidOffset = centroidIdx;
121-
float minDsq;
122-
if (centroidIdx > 0 && centroidIdx < centroids.length) {
123-
minDsq = VectorUtil.squareDistance(vector, centroids[centroidIdx]);
124-
} else {
125-
minDsq = Float.MAX_VALUE;
121+
assert centroidIdx >= 0 && centroidIdx < centroids.length;
122+
float minDsq = VectorUtil.squareDistance(vector, centroids[centroidIdx]);
123+
for (int offset : centroidOffsets) {
124+
float dsq = VectorUtil.squareDistance(vector, centroids[offset]);
125+
if (dsq < minDsq) {
126+
minDsq = dsq;
127+
bestCentroidOffset = offset;
128+
}
126129
}
130+
return bestCentroidOffset;
131+
}
127132

128-
int k = 0;
129-
for (int j = 0; j < centroids.length; j++) {
130-
if (centroidOffsets == null || j == centroidOffsets[k]) {
131-
float dsq = VectorUtil.squareDistance(vector, centroids[j]);
132-
if (dsq < minDsq) {
133-
minDsq = dsq;
134-
bestCentroidOffset = j;
135-
}
133+
int getBestCentroid(float[][] centroids, float[] vector) {
134+
int bestCentroidOffset = 0;
135+
float minDsq = Float.MAX_VALUE;
136+
for (int i = 0; i < centroids.length; i++) {
137+
float dsq = VectorUtil.squareDistance(vector, centroids[i]);
138+
if (dsq < minDsq) {
139+
minDsq = dsq;
140+
bestCentroidOffset = i;
136141
}
137142
}
138143
return bestCentroidOffset;
@@ -271,7 +276,8 @@ void cluster(FloatVectorValues vectors, KMeansIntermediate kMeansIntermediate, L
271276
return;
272277
}
273278

274-
int[] assignments = new int[n];
279+
int[] assignments = kMeansIntermediate.assignments();
280+
assert assignments.length == n;
275281
float[][] nextCentroids = new float[centroids.length][vectors.dimension()];
276282
for (int i = 0; i < maxIterations; i++) {
277283
if (stepLloyd(vectors, centroids, nextCentroids, assignments, sampleSize, neighborhoods) == false) {
@@ -291,7 +297,7 @@ void cluster(FloatVectorValues vectors, KMeansIntermediate kMeansIntermediate, L
291297
* @param maxIterations the max iterations to shift centroids
292298
*/
293299
public static void cluster(FloatVectorValues vectors, float[][] centroids, int sampleSize, int maxIterations) throws IOException {
294-
KMeansIntermediate kMeansIntermediate = new KMeansIntermediate(centroids);
300+
KMeansIntermediate kMeansIntermediate = new KMeansIntermediate(centroids, new int[vectors.size()], vectors::ordToDoc);
295301
KMeansLocal kMeans = new KMeansLocal(sampleSize, maxIterations);
296302
kMeans.cluster(vectors, kMeansIntermediate);
297303
}

x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionIT.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1680,6 +1680,7 @@ public void testQueryOnEmptyDataIndex() {
16801680
}
16811681

16821682
public void testGroupingStatsOnMissingFields() {
1683+
assumeTrue("Pragmas are only allowed in snapshots", Build.current().isSnapshot());
16831684
assertAcked(client().admin().indices().prepareCreate("missing_field_index").setMapping("data", "type=long"));
16841685
long oneValue = between(1, 1000);
16851686
indexDoc("missing_field_index", "1", "data", oneValue);

0 commit comments

Comments
 (0)