Skip to content

Commit e3c2985

Browse files
committed
Merge remote-tracking branch 'upstream/main' into index-templates-tracking
* upstream/main: (26 commits) [Fleet] add privileges to `kibana_system` to read integrations data (elastic#132400) Add `TestEntitlementsRule` with support for dynamic entitled node paths for testing (elastic#132077) Reduce logging frequency for GCS per project clients (elastic#132429) Skip update/100_synthetic_source tests in yamlRestCompatTests (elastic#132296) Correct exception for missing nested path (elastic#132408) Fixing esql release tests elastic#132369 (elastic#132406) Adjust date docvalue formatting to return 4xx instead of 5xx (elastic#132414) Handle nested fields with the termvectors REST API in artificial docs (elastic#92568) Only collect bulk scored vectors when exceeding min competitive (elastic#132293) Fix release tests diskbbq update (elastic#132405) ESQL: Fix skipping of generative tests (elastic#132390) Short circuit failure handling in OIDC flow (elastic#130618) Small optimization in OptimizedScalarQuantizer by using mul instead of div (elastic#132397) Aggs: Add validation to Bucket script pipeline agg (elastic#132320) ESQL: Multiple parameters in ungrouped aggs (elastic#132375) ESQL: Explain test operators (elastic#132374) EQL: Deal with internally created IN in a different way for EQL (elastic#132167) Speed up hierarchical k-means by computing distances in bulk (elastic#132384) Reduce the number of fields per document (elastic#132322) Assert current thread in ESQL (elastic#132324) ...
2 parents 08aec70 + 364c70e commit e3c2985

File tree

207 files changed

+6468
-3499
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

207 files changed

+6468
-3499
lines changed
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
package org.elasticsearch.benchmark.vector;
10+
11+
import org.apache.lucene.util.VectorUtil;
12+
import org.apache.lucene.util.quantization.OptimizedScalarQuantizer;
13+
import org.elasticsearch.common.logging.LogConfigurator;
14+
import org.elasticsearch.simdvec.ESVectorUtil;
15+
import org.openjdk.jmh.annotations.Benchmark;
16+
import org.openjdk.jmh.annotations.BenchmarkMode;
17+
import org.openjdk.jmh.annotations.Fork;
18+
import org.openjdk.jmh.annotations.Measurement;
19+
import org.openjdk.jmh.annotations.Mode;
20+
import org.openjdk.jmh.annotations.OutputTimeUnit;
21+
import org.openjdk.jmh.annotations.Param;
22+
import org.openjdk.jmh.annotations.Scope;
23+
import org.openjdk.jmh.annotations.Setup;
24+
import org.openjdk.jmh.annotations.State;
25+
import org.openjdk.jmh.annotations.Warmup;
26+
import org.openjdk.jmh.infra.Blackhole;
27+
28+
import java.io.IOException;
29+
import java.util.Random;
30+
import java.util.concurrent.TimeUnit;
31+
32+
@BenchmarkMode(Mode.Throughput)
33+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
34+
@State(Scope.Benchmark)
35+
// first iteration is complete garbage, so make sure we really warmup
36+
@Warmup(iterations = 4, time = 1)
37+
// real iterations. not useful to spend tons of time here, better to fork more
38+
@Measurement(iterations = 5, time = 1)
39+
// engage some noise reduction
40+
@Fork(value = 1)
41+
public class DistanceBulkBenchmark {
42+
43+
static {
44+
LogConfigurator.configureESLogging(); // native access requires logging to be initialized
45+
}
46+
47+
@Param({ "384", "782", "1024" })
48+
int dims;
49+
50+
int length;
51+
52+
int numVectors = 4 * 100;
53+
int numQueries = 10;
54+
55+
float[][] vectors;
56+
float[][] queries;
57+
float[] distances = new float[4];
58+
59+
@Setup
60+
public void setup() throws IOException {
61+
Random random = new Random(123);
62+
63+
this.length = OptimizedScalarQuantizer.discretize(dims, 64) / 8;
64+
65+
vectors = new float[numVectors][dims];
66+
for (float[] vector : vectors) {
67+
for (int i = 0; i < dims; i++) {
68+
vector[i] = random.nextFloat();
69+
}
70+
}
71+
72+
queries = new float[numQueries][dims];
73+
for (float[] query : queries) {
74+
for (int i = 0; i < dims; i++) {
75+
query[i] = random.nextFloat();
76+
}
77+
}
78+
}
79+
80+
@Benchmark
81+
@Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
82+
public void squareDistance(Blackhole bh) {
83+
for (int j = 0; j < numQueries; j++) {
84+
float[] query = queries[j];
85+
for (int i = 0; i < numVectors; i++) {
86+
float[] vector = vectors[i];
87+
float distance = VectorUtil.squareDistance(query, vector);
88+
bh.consume(distance);
89+
}
90+
}
91+
}
92+
93+
@Benchmark
94+
@Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
95+
public void soarDistance(Blackhole bh) {
96+
for (int j = 0; j < numQueries; j++) {
97+
float[] query = queries[j];
98+
for (int i = 0; i < numVectors; i++) {
99+
float[] vector = vectors[i];
100+
float distance = ESVectorUtil.soarDistance(query, vector, vector, 1.0f, 1.0f);
101+
bh.consume(distance);
102+
}
103+
}
104+
}
105+
106+
@Benchmark
107+
@Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
108+
public void squareDistanceBulk(Blackhole bh) {
109+
for (int j = 0; j < numQueries; j++) {
110+
float[] query = queries[j];
111+
for (int i = 0; i < numVectors; i += 4) {
112+
ESVectorUtil.squareDistanceBulk(query, vectors[i], vectors[i + 1], vectors[i + 2], vectors[i + 3], distances);
113+
for (float distance : distances) {
114+
bh.consume(distance);
115+
}
116+
117+
}
118+
}
119+
}
120+
121+
@Benchmark
122+
@Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
123+
public void soarDistanceBulk(Blackhole bh) {
124+
for (int j = 0; j < numQueries; j++) {
125+
float[] query = queries[j];
126+
for (int i = 0; i < numVectors; i += 4) {
127+
ESVectorUtil.soarDistanceBulk(
128+
query,
129+
vectors[i],
130+
vectors[i + 1],
131+
vectors[i + 2],
132+
vectors[i + 3],
133+
vectors[i],
134+
1.0f,
135+
1.0f,
136+
distances
137+
);
138+
for (float distance : distances) {
139+
bh.consume(distance);
140+
}
141+
142+
}
143+
}
144+
}
145+
}

docs/changelog/132167.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 132167
2+
summary: Deal with internally created IN in a different way for EQL
3+
area: EQL
4+
type: bug
5+
issues:
6+
- 118621

docs/changelog/132320.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 132320
2+
summary: "Aggs: Add validation to Bucket script pipeline agg"
3+
area: Aggregations
4+
type: bug
5+
issues:
6+
- 132272

docs/changelog/132408.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 132408
2+
summary: Correct exception for missing nested path
3+
area: Search
4+
type: bug
5+
issues: []

docs/changelog/132414.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 132414
2+
summary: Adjust date docvalue formatting to return 4xx instead of 5xx
3+
area: Search
4+
type: bug
5+
issues: []

docs/changelog/92568.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 92568
2+
summary: Support nested fields for term vectors API when using artificial documents
3+
area: Search
4+
type: enhancement
5+
issues:
6+
- 91902

libs/logstash-bridge/src/main/java/org/elasticsearch/logstashbridge/script/ScriptServiceBridge.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
*/
99
package org.elasticsearch.logstashbridge.script;
1010

11+
import org.elasticsearch.cluster.project.ProjectResolver;
1112
import org.elasticsearch.common.settings.Settings;
13+
import org.elasticsearch.core.FixForMultiProject;
1214
import org.elasticsearch.ingest.common.ProcessorsWhitelistExtension;
1315
import org.elasticsearch.logstashbridge.StableBridgeAPI;
1416
import org.elasticsearch.logstashbridge.common.SettingsBridge;
@@ -66,7 +68,9 @@ private static ScriptService getScriptService(final Settings settings, final Lon
6668
MustacheScriptEngine.NAME,
6769
new MustacheScriptEngine(settings)
6870
);
69-
return new ScriptService(settings, scriptEngines, ScriptModule.CORE_CONTEXTS, timeProvider);
71+
@FixForMultiProject // Should this be non-null?
72+
final ProjectResolver projectResolver = null;
73+
return new ScriptService(settings, scriptEngines, ScriptModule.CORE_CONTEXTS, timeProvider, projectResolver);
7074
}
7175

7276
private static List<Whitelist> getPainlessBaseWhiteList() {

libs/simdvec/src/main/java/org/elasticsearch/simdvec/ES91OSQVectorsScorer.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ public float score(
141141
*
142142
* <p>The results are stored in the provided scores array.
143143
*/
144-
public void scoreBulk(
144+
public float scoreBulk(
145145
byte[] q,
146146
float queryLowerInterval,
147147
float queryUpperInterval,
@@ -158,6 +158,7 @@ public void scoreBulk(
158158
targetComponentSums[i] = Short.toUnsignedInt(in.readShort());
159159
}
160160
in.readFloats(additionalCorrections, 0, BULK_SIZE);
161+
float maxScore = Float.NEGATIVE_INFINITY;
161162
for (int i = 0; i < BULK_SIZE; i++) {
162163
scores[i] = score(
163164
queryLowerInterval,
@@ -172,6 +173,10 @@ public void scoreBulk(
172173
additionalCorrections[i],
173174
scores[i]
174175
);
176+
if (scores[i] > maxScore) {
177+
maxScore = scores[i];
178+
}
175179
}
180+
return maxScore;
176181
}
177182
}

libs/simdvec/src/main/java/org/elasticsearch/simdvec/ESVectorUtil.java

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,4 +293,79 @@ public static int quantizeVectorWithIntervals(float[] vector, int[] destination,
293293
}
294294
return IMPL.quantizeVectorWithIntervals(vector, destination, lowInterval, upperInterval, bit);
295295
}
296+
297+
/**
298+
* Bulk computation of square distances between a query vector and four vectors.Result is stored in the provided distances array.
299+
*
300+
* @param q the query vector
301+
* @param v0 the first vector
302+
* @param v1 the second vector
303+
* @param v2 the third vector
304+
* @param v3 the fourth vector
305+
* @param distances an array to store the computed square distances, must have length 4
306+
*
307+
* @throws IllegalArgumentException if the dimensions of the vectors do not match or if the distances array does not have length 4
308+
*/
309+
public static void squareDistanceBulk(float[] q, float[] v0, float[] v1, float[] v2, float[] v3, float[] distances) {
310+
if (q.length != v0.length) {
311+
throw new IllegalArgumentException("vector dimensions differ: " + q.length + "!=" + v0.length);
312+
}
313+
if (q.length != v1.length) {
314+
throw new IllegalArgumentException("vector dimensions differ: " + q.length + "!=" + v1.length);
315+
}
316+
if (q.length != v2.length) {
317+
throw new IllegalArgumentException("vector dimensions differ: " + q.length + "!=" + v2.length);
318+
}
319+
if (q.length != v3.length) {
320+
throw new IllegalArgumentException("vector dimensions differ: " + q.length + "!=" + v3.length);
321+
}
322+
if (distances.length != 4) {
323+
throw new IllegalArgumentException("distances array must have length 4, but was: " + distances.length);
324+
}
325+
IMPL.squareDistanceBulk(q, v0, v1, v2, v3, distances);
326+
}
327+
328+
/**
329+
* Bulk computation of the soar distance for a vector to four centroids
330+
* @param v1 the vector
331+
* @param c0 the first centroid
332+
* @param c1 the second centroid
333+
* @param c2 the third centroid
334+
* @param c3 the fourth centroid
335+
* @param originalResidual the residual with the actually nearest centroid
336+
* @param soarLambda the lambda parameter
337+
* @param rnorm distance to the nearest centroid
338+
* @param distances an array to store the computed soar distances, must have length 4
339+
*/
340+
public static void soarDistanceBulk(
341+
float[] v1,
342+
float[] c0,
343+
float[] c1,
344+
float[] c2,
345+
float[] c3,
346+
float[] originalResidual,
347+
float soarLambda,
348+
float rnorm,
349+
float[] distances
350+
) {
351+
if (v1.length != c0.length) {
352+
throw new IllegalArgumentException("vector dimensions differ: " + v1.length + "!=" + c0.length);
353+
}
354+
if (v1.length != c1.length) {
355+
throw new IllegalArgumentException("vector dimensions differ: " + v1.length + "!=" + c1.length);
356+
}
357+
if (v1.length != c2.length) {
358+
throw new IllegalArgumentException("vector dimensions differ: " + v1.length + "!=" + c2.length);
359+
}
360+
if (v1.length != c3.length) {
361+
throw new IllegalArgumentException("vector dimensions differ: " + v1.length + "!=" + c3.length);
362+
}
363+
if (v1.length != originalResidual.length) {
364+
throw new IllegalArgumentException("vector dimensions differ: " + v1.length + "!=" + originalResidual.length);
365+
}
366+
if (distances.length != 4) {
367+
throw new IllegalArgumentException("distances array must have length 4, but was: " + distances.length);
368+
}
369+
IMPL.soarDistanceBulk(v1, c0, c1, c2, c3, originalResidual, soarLambda, rnorm, distances);
370+
}
296371
}

libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/DefaultESVectorUtilSupport.java

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,11 @@ public void calculateOSQGridPoints(float[] target, int[] quantize, int points, f
8080
float dbb = 0;
8181
float dax = 0;
8282
float dbx = 0;
83+
float invPmOnes = 1f / (points - 1f);
8384
for (int i = 0; i < target.length; ++i) {
8485
float v = target[i];
8586
float k = quantize[i];
86-
float s = k / (points - 1);
87+
float s = k * invPmOnes;
8788
float ms = 1f - s;
8889
daa = fma(ms, ms, daa);
8990
dab = fma(ms, s, dab);
@@ -293,4 +294,30 @@ public int quantizeVectorWithIntervals(float[] vector, int[] destination, float
293294
}
294295
return sumQuery;
295296
}
297+
298+
@Override
299+
public void squareDistanceBulk(float[] query, float[] v0, float[] v1, float[] v2, float[] v3, float[] distances) {
300+
distances[0] = VectorUtil.squareDistance(query, v0);
301+
distances[1] = VectorUtil.squareDistance(query, v1);
302+
distances[2] = VectorUtil.squareDistance(query, v2);
303+
distances[3] = VectorUtil.squareDistance(query, v3);
304+
}
305+
306+
@Override
307+
public void soarDistanceBulk(
308+
float[] v1,
309+
float[] c0,
310+
float[] c1,
311+
float[] c2,
312+
float[] c3,
313+
float[] originalResidual,
314+
float soarLambda,
315+
float rnorm,
316+
float[] distances
317+
) {
318+
distances[0] = soarDistance(v1, c0, originalResidual, soarLambda, rnorm);
319+
distances[1] = soarDistance(v1, c1, originalResidual, soarLambda, rnorm);
320+
distances[2] = soarDistance(v1, c2, originalResidual, soarLambda, rnorm);
321+
distances[3] = soarDistance(v1, c3, originalResidual, soarLambda, rnorm);
322+
}
296323
}

0 commit comments

Comments
 (0)