Skip to content

Commit a2f7ad2

Browse files
authored
Merge branch 'main' into markjhoy/add_sparse_vector_token_pruning_index_options
2 parents 8623573 + aeb3718 commit a2f7ad2

File tree

171 files changed

+3825
-699
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

171 files changed

+3825
-699
lines changed
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
package org.elasticsearch.benchmark.vector;
10+
11+
import org.apache.lucene.store.Directory;
12+
import org.apache.lucene.store.IOContext;
13+
import org.apache.lucene.store.IndexInput;
14+
import org.apache.lucene.store.IndexOutput;
15+
import org.apache.lucene.store.MMapDirectory;
16+
import org.apache.lucene.util.VectorUtil;
17+
import org.elasticsearch.common.logging.LogConfigurator;
18+
import org.elasticsearch.core.IOUtils;
19+
import org.elasticsearch.simdvec.ES91Int4VectorsScorer;
20+
import org.elasticsearch.simdvec.internal.vectorization.ESVectorizationProvider;
21+
import org.openjdk.jmh.annotations.Benchmark;
22+
import org.openjdk.jmh.annotations.BenchmarkMode;
23+
import org.openjdk.jmh.annotations.Fork;
24+
import org.openjdk.jmh.annotations.Measurement;
25+
import org.openjdk.jmh.annotations.Mode;
26+
import org.openjdk.jmh.annotations.OutputTimeUnit;
27+
import org.openjdk.jmh.annotations.Param;
28+
import org.openjdk.jmh.annotations.Scope;
29+
import org.openjdk.jmh.annotations.Setup;
30+
import org.openjdk.jmh.annotations.State;
31+
import org.openjdk.jmh.annotations.TearDown;
32+
import org.openjdk.jmh.annotations.Warmup;
33+
import org.openjdk.jmh.infra.Blackhole;
34+
35+
import java.io.IOException;
36+
import java.nio.file.Files;
37+
import java.util.concurrent.ThreadLocalRandom;
38+
import java.util.concurrent.TimeUnit;
39+
40+
@BenchmarkMode(Mode.Throughput)
41+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
42+
@State(Scope.Benchmark)
43+
// first iteration is complete garbage, so make sure we really warmup
44+
@Warmup(iterations = 4, time = 1)
45+
// real iterations. not useful to spend tons of time here, better to fork more
46+
@Measurement(iterations = 5, time = 1)
47+
// engage some noise reduction
48+
@Fork(value = 1)
49+
public class Int4ScorerBenchmark {
50+
51+
static {
52+
LogConfigurator.configureESLogging(); // native access requires logging to be initialized
53+
}
54+
55+
@Param({ "384", "702", "1024" })
56+
int dims;
57+
58+
int numVectors = 200;
59+
int numQueries = 10;
60+
61+
byte[] scratch;
62+
byte[][] binaryVectors;
63+
byte[][] binaryQueries;
64+
65+
ES91Int4VectorsScorer scorer;
66+
Directory dir;
67+
IndexInput in;
68+
69+
@Setup
70+
public void setup() throws IOException {
71+
binaryVectors = new byte[numVectors][dims];
72+
dir = new MMapDirectory(Files.createTempDirectory("vectorData"));
73+
try (IndexOutput out = dir.createOutput("vectors", IOContext.DEFAULT)) {
74+
for (byte[] binaryVector : binaryVectors) {
75+
for (int i = 0; i < dims; i++) {
76+
// 4-bit quantization
77+
binaryVector[i] = (byte) ThreadLocalRandom.current().nextInt(16);
78+
}
79+
out.writeBytes(binaryVector, 0, binaryVector.length);
80+
}
81+
}
82+
83+
in = dir.openInput("vectors", IOContext.DEFAULT);
84+
binaryQueries = new byte[numVectors][dims];
85+
for (byte[] binaryVector : binaryVectors) {
86+
for (int i = 0; i < dims; i++) {
87+
// 4-bit quantization
88+
binaryVector[i] = (byte) ThreadLocalRandom.current().nextInt(16);
89+
}
90+
}
91+
92+
scratch = new byte[dims];
93+
scorer = ESVectorizationProvider.getInstance().newES91Int4VectorsScorer(in, dims);
94+
}
95+
96+
@TearDown
97+
public void teardown() throws IOException {
98+
IOUtils.close(dir, in);
99+
}
100+
101+
@Benchmark
102+
@Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
103+
public void scoreFromArray(Blackhole bh) throws IOException {
104+
for (int j = 0; j < numQueries; j++) {
105+
in.seek(0);
106+
for (int i = 0; i < numVectors; i++) {
107+
in.readBytes(scratch, 0, dims);
108+
bh.consume(VectorUtil.int4DotProduct(binaryQueries[j], scratch));
109+
}
110+
}
111+
}
112+
113+
@Benchmark
114+
@Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
115+
public void scoreFromMemorySegmentOnlyVector(Blackhole bh) throws IOException {
116+
for (int j = 0; j < numQueries; j++) {
117+
in.seek(0);
118+
for (int i = 0; i < numVectors; i++) {
119+
bh.consume(scorer.int4DotProduct(binaryQueries[j]));
120+
}
121+
}
122+
}
123+
}

docs/changelog/125143.yaml

Lines changed: 0 additions & 6 deletions
This file was deleted.

docs/changelog/129325.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 129325
2+
summary: Check for model deployment in inference endpoints before stopping
3+
area: Machine Learning
4+
type: bug
5+
issues:
6+
- 128549

docs/changelog/129413.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 129413
2+
summary: '`SageMaker` Elastic Payload'
3+
area: Machine Learning
4+
type: enhancement
5+
issues: []

docs/changelog/129557.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 129557
2+
summary: Pushdown for LIKE (LIST)
3+
area: ES|QL
4+
type: enhancement
5+
issues: []

docs/changelog/129659.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 129659
2+
summary: Simplified RRF Retriever
3+
area: Search
4+
type: enhancement
5+
issues: []

docs/reference/query-languages/esql/_snippets/functions/description/match.md

Lines changed: 13 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/query-languages/esql/_snippets/functions/description/match_phrase.md

Lines changed: 10 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/query-languages/esql/_snippets/functions/examples/match_phrase.md

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/query-languages/esql/_snippets/functions/examples/qstr.md

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)