Skip to content

Commit 4682213

Browse files
committed
Add optimized native Neon, AVX2, and AVX 512 float32 vector operations.
1 parent 139cebc commit 4682213

File tree

35 files changed

+2890
-113
lines changed

35 files changed

+2890
-113
lines changed
Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.benchmark.vector;
11+
12+
import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer;
13+
import org.apache.lucene.codecs.lucene95.OffHeapFloatVectorValues;
14+
import org.apache.lucene.index.FloatVectorValues;
15+
import org.apache.lucene.index.VectorSimilarityFunction;
16+
import org.apache.lucene.store.Directory;
17+
import org.apache.lucene.store.IOContext;
18+
import org.apache.lucene.store.IndexInput;
19+
import org.apache.lucene.store.IndexOutput;
20+
import org.apache.lucene.store.MMapDirectory;
21+
import org.apache.lucene.util.hnsw.RandomVectorScorer;
22+
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
23+
import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
24+
import org.elasticsearch.common.logging.LogConfigurator;
25+
import org.elasticsearch.core.IOUtils;
26+
import org.elasticsearch.simdvec.VectorScorerFactory;
27+
import org.openjdk.jmh.annotations.Benchmark;
28+
import org.openjdk.jmh.annotations.BenchmarkMode;
29+
import org.openjdk.jmh.annotations.Fork;
30+
import org.openjdk.jmh.annotations.Measurement;
31+
import org.openjdk.jmh.annotations.Mode;
32+
import org.openjdk.jmh.annotations.OutputTimeUnit;
33+
import org.openjdk.jmh.annotations.Param;
34+
import org.openjdk.jmh.annotations.Scope;
35+
import org.openjdk.jmh.annotations.Setup;
36+
import org.openjdk.jmh.annotations.State;
37+
import org.openjdk.jmh.annotations.TearDown;
38+
import org.openjdk.jmh.annotations.Warmup;
39+
40+
import java.io.IOException;
41+
import java.nio.ByteBuffer;
42+
import java.nio.ByteOrder;
43+
import java.nio.file.Files;
44+
import java.util.concurrent.ThreadLocalRandom;
45+
import java.util.concurrent.TimeUnit;
46+
47+
import static org.elasticsearch.simdvec.VectorSimilarityType.DOT_PRODUCT;
48+
import static org.elasticsearch.simdvec.VectorSimilarityType.EUCLIDEAN;
49+
50+
@Fork(value = 1, jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
51+
@Warmup(iterations = 3, time = 3)
52+
@Measurement(iterations = 5, time = 3)
53+
@BenchmarkMode(Mode.Throughput)
54+
@OutputTimeUnit(TimeUnit.MICROSECONDS)
55+
@State(Scope.Thread)
56+
/**
57+
* Benchmark that compares various float32 vector similarity function
58+
* implementations;: scalar, lucene's panama-ized, and Elasticsearch's native.
59+
* Run with ./gradlew -p benchmarks run --args 'Float32ScorerBenchmark'
60+
*/
61+
public class Float32ScorerBenchmark {
62+
63+
static {
64+
LogConfigurator.configureESLogging(); // native access requires logging to be initialized
65+
}
66+
67+
@Param({ "96", "768", "1024" })
68+
public int dims;
69+
final int size = 3; // there are only two vectors to compare against
70+
71+
Directory dir;
72+
IndexInput in;
73+
VectorScorerFactory factory;
74+
75+
float[] vec1, vec2, vec3;
76+
77+
UpdateableRandomVectorScorer luceneDotScorer;
78+
UpdateableRandomVectorScorer luceneSqrScorer;
79+
UpdateableRandomVectorScorer nativeDotScorer;
80+
UpdateableRandomVectorScorer nativeSqrScorer;
81+
82+
RandomVectorScorer luceneDotScorerQuery;
83+
RandomVectorScorer nativeDotScorerQuery;
84+
RandomVectorScorer luceneSqrScorerQuery;
85+
RandomVectorScorer nativeSqrScorerQuery;
86+
87+
@Setup
88+
public void setup() throws IOException {
89+
var optionalVectorScorerFactory = VectorScorerFactory.instance();
90+
if (optionalVectorScorerFactory.isEmpty()) {
91+
String msg = "JDK=["
92+
+ Runtime.version()
93+
+ "], os.name=["
94+
+ System.getProperty("os.name")
95+
+ "], os.arch=["
96+
+ System.getProperty("os.arch")
97+
+ "]";
98+
throw new AssertionError("Vector scorer factory not present. Cannot run the benchmark. " + msg);
99+
}
100+
factory = optionalVectorScorerFactory.get();
101+
vec1 = randomFloatArray(dims);
102+
vec2 = randomFloatArray(dims);
103+
vec3 = randomFloatArray(dims);
104+
105+
dir = new MMapDirectory(Files.createTempDirectory("nativeFloat32Bench"));
106+
try (IndexOutput out = dir.createOutput("vector32.data", IOContext.DEFAULT)) {
107+
writeFloat32Vectors(out, vec1, vec2, vec3);
108+
}
109+
in = dir.openInput("vector32.data", IOContext.DEFAULT);
110+
var values = vectorValues(dims, 3, in, VectorSimilarityFunction.DOT_PRODUCT);
111+
luceneDotScorer = luceneScoreSupplier(values, VectorSimilarityFunction.DOT_PRODUCT).scorer();
112+
luceneDotScorer.setScoringOrdinal(0);
113+
values = vectorValues(dims, 3, in, VectorSimilarityFunction.EUCLIDEAN);
114+
luceneSqrScorer = luceneScoreSupplier(values, VectorSimilarityFunction.EUCLIDEAN).scorer();
115+
luceneSqrScorer.setScoringOrdinal(0);
116+
117+
nativeDotScorer = factory.getFloat32VectorScorerSupplier(DOT_PRODUCT, in, values).get().scorer();
118+
nativeDotScorer.setScoringOrdinal(0);
119+
nativeSqrScorer = factory.getFloat32VectorScorerSupplier(EUCLIDEAN, in, values).get().scorer();
120+
nativeSqrScorer.setScoringOrdinal(0);
121+
122+
// setup for getFloat32VectorScorer / query vector scoring
123+
float[] queryVec = new float[dims];
124+
for (int i = 0; i < dims; i++) {
125+
queryVec[i] = ThreadLocalRandom.current().nextFloat();
126+
}
127+
luceneDotScorerQuery = luceneScorer(values, VectorSimilarityFunction.DOT_PRODUCT, queryVec);
128+
nativeDotScorerQuery = factory.getFloat32VectorScorer(VectorSimilarityFunction.DOT_PRODUCT, values, queryVec).get();
129+
luceneSqrScorerQuery = luceneScorer(values, VectorSimilarityFunction.EUCLIDEAN, queryVec);
130+
nativeSqrScorerQuery = factory.getFloat32VectorScorer(VectorSimilarityFunction.EUCLIDEAN, values, queryVec).get();
131+
}
132+
133+
@TearDown
134+
public void teardown() throws IOException {
135+
IOUtils.close(dir, in);
136+
}
137+
138+
// we score against two different ords to avoid the lastOrd cache in vector values
139+
@Benchmark
140+
public float dotProductLucene() throws IOException {
141+
return luceneDotScorer.score(1) + luceneDotScorer.score(2);
142+
}
143+
144+
@Benchmark
145+
public float dotProductNative() throws IOException {
146+
return nativeDotScorer.score(1) + nativeDotScorer.score(2);
147+
}
148+
149+
@Benchmark
150+
public float dotProductScalar() {
151+
return dotProductScalarImpl(vec1, vec2) + dotProductScalarImpl(vec1, vec3);
152+
}
153+
154+
@Benchmark
155+
public float dotProductLuceneQuery() throws IOException {
156+
return luceneDotScorerQuery.score(1) + luceneDotScorerQuery.score(2);
157+
}
158+
159+
@Benchmark
160+
public float dotProductNativeQuery() throws IOException {
161+
return nativeDotScorerQuery.score(1) + nativeDotScorerQuery.score(2);
162+
}
163+
164+
// -- square distance
165+
166+
@Benchmark
167+
public float squareDistanceLucene() throws IOException {
168+
return luceneSqrScorer.score(1) + luceneSqrScorer.score(2);
169+
}
170+
171+
@Benchmark
172+
public float squareDistanceNative() throws IOException {
173+
return nativeSqrScorer.score(1) + nativeSqrScorer.score(2);
174+
}
175+
176+
@Benchmark
177+
public float squareDistanceScalar() {
178+
return squareDistanceScalarImpl(vec1, vec2) + squareDistanceScalarImpl(vec1, vec3);
179+
}
180+
181+
@Benchmark
182+
public float squareDistanceLuceneQuery() throws IOException {
183+
return luceneSqrScorerQuery.score(1) + luceneSqrScorerQuery.score(2);
184+
}
185+
186+
@Benchmark
187+
public float squareDistanceNativeQuery() throws IOException {
188+
return nativeSqrScorerQuery.score(1) + nativeSqrScorerQuery.score(2);
189+
}
190+
191+
static float dotProductScalarImpl(float[] vec1, float[] vec2) {
192+
float dot = 0;
193+
for (int i = 0; i < vec1.length; i++) {
194+
dot += vec1[i] * vec2[i];
195+
}
196+
return Math.max((1 + dot) / 2, 0);
197+
}
198+
199+
static float squareDistanceScalarImpl(float[] vec1, float[] vec2) {
200+
float dst = 0;
201+
for (int i = 0; i < vec1.length; i++) {
202+
float diff = vec1[i] - vec2[i];
203+
dst += diff * diff;
204+
}
205+
return 1 / (1f + dst);
206+
}
207+
208+
FloatVectorValues vectorValues(int dims, int size, IndexInput in, VectorSimilarityFunction sim) throws IOException {
209+
var slice = in.slice("values", 0, in.length());
210+
var byteSize = dims * Float.BYTES;
211+
return new OffHeapFloatVectorValues.DenseOffHeapVectorValues(dims, size, slice, byteSize, DefaultFlatVectorScorer.INSTANCE, sim);
212+
}
213+
214+
RandomVectorScorerSupplier luceneScoreSupplier(FloatVectorValues values, VectorSimilarityFunction sim) throws IOException {
215+
return DefaultFlatVectorScorer.INSTANCE.getRandomVectorScorerSupplier(sim, values);
216+
}
217+
218+
RandomVectorScorer luceneScorer(FloatVectorValues values, VectorSimilarityFunction sim, float[] queryVec) throws IOException {
219+
return DefaultFlatVectorScorer.INSTANCE.getRandomVectorScorer(sim, values, queryVec);
220+
}
221+
222+
static void writeFloat32Vectors(IndexOutput out, float[]... vectors) throws IOException {
223+
var buffer = ByteBuffer.allocate(vectors[0].length * Float.BYTES).order(ByteOrder.LITTLE_ENDIAN);
224+
for (var v : vectors) {
225+
buffer.asFloatBuffer().put(v);
226+
out.writeBytes(buffer.array(), buffer.array().length);
227+
}
228+
}
229+
230+
static float[] randomFloatArray(int length) {
231+
var random = ThreadLocalRandom.current();
232+
float[] fa = new float[length];
233+
for (int i = 0; i < length; i++) {
234+
fa[i] = random.nextFloat();
235+
}
236+
return fa;
237+
}
238+
}

benchmarks/src/main/java/org/elasticsearch/benchmark/vector/VectorScorerBenchmark.java renamed to benchmarks/src/main/java/org/elasticsearch/benchmark/vector/Int7uScorerBenchmark.java

Lines changed: 5 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -55,24 +55,23 @@
5555
/**
5656
* Benchmark that compares various scalar quantized vector similarity function
5757
* implementations;: scalar, lucene's panama-ized, and Elasticsearch's native.
58-
* Run with ./gradlew -p benchmarks run --args 'VectorScorerBenchmark'
58+
* Run with ./gradlew -p benchmarks run --args 'Int7uScorerBenchmark'
5959
*/
60-
public class VectorScorerBenchmark {
60+
public class Int7uScorerBenchmark {
6161

6262
static {
6363
LogConfigurator.configureESLogging(); // native access requires logging to be initialized
6464
}
6565

6666
@Param({ "96", "768", "1024" })
67-
int dims;
68-
int size = 2; // there are only two vectors to compare
67+
public int dims;
68+
final int size = 2; // there are only two vectors to compare
6969

7070
Directory dir;
7171
IndexInput in;
7272
VectorScorerFactory factory;
7373

74-
byte[] vec1;
75-
byte[] vec2;
74+
byte[] vec1, vec2;
7675
float vec1Offset;
7776
float vec2Offset;
7877
float scoreCorrectionConstant;
@@ -139,39 +138,6 @@ public void setup() throws IOException {
139138
nativeDotScorerQuery = factory.getInt7SQVectorScorer(VectorSimilarityFunction.DOT_PRODUCT, values, queryVec).get();
140139
luceneSqrScorerQuery = luceneScorer(values, VectorSimilarityFunction.EUCLIDEAN, queryVec);
141140
nativeSqrScorerQuery = factory.getInt7SQVectorScorer(VectorSimilarityFunction.EUCLIDEAN, values, queryVec).get();
142-
143-
// sanity
144-
var f1 = dotProductLucene();
145-
var f2 = dotProductNative();
146-
var f3 = dotProductScalar();
147-
if (f1 != f2) {
148-
throw new AssertionError("lucene[" + f1 + "] != " + "native[" + f2 + "]");
149-
}
150-
if (f1 != f3) {
151-
throw new AssertionError("lucene[" + f1 + "] != " + "scalar[" + f3 + "]");
152-
}
153-
// square distance
154-
f1 = squareDistanceLucene();
155-
f2 = squareDistanceNative();
156-
f3 = squareDistanceScalar();
157-
if (f1 != f2) {
158-
throw new AssertionError("lucene[" + f1 + "] != " + "native[" + f2 + "]");
159-
}
160-
if (f1 != f3) {
161-
throw new AssertionError("lucene[" + f1 + "] != " + "scalar[" + f3 + "]");
162-
}
163-
164-
var q1 = dotProductLuceneQuery();
165-
var q2 = dotProductNativeQuery();
166-
if (q1 != q2) {
167-
throw new AssertionError("query: lucene[" + q1 + "] != " + "native[" + q2 + "]");
168-
}
169-
170-
var sqr1 = squareDistanceLuceneQuery();
171-
var sqr2 = squareDistanceNativeQuery();
172-
if (sqr1 != sqr2) {
173-
throw new AssertionError("query: lucene[" + q1 + "] != " + "native[" + q2 + "]");
174-
}
175141
}
176142

177143
@TearDown

0 commit comments

Comments
 (0)