Skip to content

Commit 4cd2247

Browse files
authored
Random vector index build jmh + setup scripts (#424)
* add JMH test for random index buildup Signed-off-by: Samuel Herman <sherman8915@gmail.com> * add script for node setup Signed-off-by: Samuel Herman <sherman8915@gmail.com> * add rat exclusion for scripts Signed-off-by: Samuel Herman <sherman8915@gmail.com> --------- Signed-off-by: Samuel Herman <sherman8915@gmail.com>
1 parent e574a9d commit 4cd2247

File tree

4 files changed

+147
-5
lines changed

4 files changed

+147
-5
lines changed

benchmarks-jmh/README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,21 +37,21 @@ Common JMH command line options you can use in the configuration or command line
3737

3838
2. Focus on specific benchmarks
3939

40-
For example in the below command lines we are going to run only `IndexConstructionWithStaticSetBenchmark`
40+
For example in the below command lines we are going to run only `IndexConstructionWithRandomSetBenchmark`
4141
```shell
4242
mvn clean install -DskipTests=true
43-
BENCHMARK_NAME="IndexConstructionWithStaticSetBenchmark"
43+
BENCHMARK_NAME="IndexConstructionWithRandomSetBenchmark"
4444
java --enable-native-access=ALL-UNNAMED \
4545
--add-modules=jdk.incubator.vector \
4646
-XX:+HeapDumpOnOutOfMemoryError \
47-
-Xmx14G -Djvector.experimental.enable_native_vectorization=true \
48-
-jar benchmarks-jmh/target/benchmarks-jmh-4.0.0-beta.2-SNAPSHOT.jar $BENCHMARK_NAME
47+
-Xmx20G -Djvector.experimental.enable_native_vectorization=true \
48+
-jar benchmarks-jmh/target/benchmarks-jmh-4.0.0-beta.3-SNAPSHOT.jar $BENCHMARK_NAME
4949
```
5050

5151
If you want to rerun a specific benchmark without testing the entire grid of scenarios defined in the benchmark.
5252
You can just do the following to set M and beamWidth:
5353
```shell
54-
java -jar target/benchmarks.jar IndexConstructionWithStaticSetBenchmark -p M=32 -p beamWidth=100
54+
java -jar benchmarks-jmh/target/benchmarks-jmh-4.0.0-beta.3-SNAPSHOT.jar IndexConstructionWithStaticSetBenchmark -p M=32 -p beamWidth=100
5555
```
5656

5757

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
###### Script for test node setup ######
2+
3+
sudo apt-get update
4+
5+
# Download JDK 22
6+
wget https://download.java.net/java/GA/jdk22.0.2/c9ecb94cd31b495da20a27d4581645e8/9/GPL/openjdk-22.0.2_linux-x64_bin.tar.gz
7+
8+
# Extract JDK 22
9+
tar -xzf openjdk-22.0.2_linux-x64_bin.tar.gz
10+
11+
sudo mkdir -p /usr/lib/jvm
12+
sudo mv jdk-22.0.2 /usr/lib/jvm/jdk-22.0.2
13+
14+
########################################
15+
# Setup Alternatives
16+
########################################
17+
sudo update-alternatives --install "/usr/bin/java" "java" "/usr/lib/jvm/jdk-22.0.2/bin/java" 1
18+
sudo update-alternatives --install "/usr/bin/javac" "javac" "/usr/lib/jvm/jdk-22.0.2/bin/javac" 1
19+
20+
########################################
21+
# Verification
22+
########################################
23+
24+
echo
25+
echo "Installation complete. Current default Java version:"
26+
java -version
27+
28+
# Install Maven
29+
sudo apt-get install maven -y
30+
31+
# Install Git
32+
sudo apt-get install git -y
33+
34+
# clone jvector
35+
git clone https://github.com/datastax/jvector.git
36+
37+
# Build jvector
38+
cd jvector
39+
mvn clean install -DskipTests=true
40+
41+
# Run benchmarks
42+
java --enable-native-access=ALL-UNNAMED \
43+
--add-modules=jdk.incubator.vector \
44+
-XX:+HeapDumpOnOutOfMemoryError \
45+
-Xmx14G -Djvector.experimental.enable_native_vectorization=true \
46+
-jar target/benchmarks-jmh-4.0.0-beta.3-SNAPSHOT.jar
47+
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/*
2+
* Copyright DataStax, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package io.github.jbellis.jvector.bench;
17+
18+
import io.github.jbellis.jvector.example.util.SiftLoader;
19+
import io.github.jbellis.jvector.graph.GraphIndexBuilder;
20+
import io.github.jbellis.jvector.graph.ListRandomAccessVectorValues;
21+
import io.github.jbellis.jvector.graph.RandomAccessVectorValues;
22+
import io.github.jbellis.jvector.graph.similarity.BuildScoreProvider;
23+
import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
24+
import io.github.jbellis.jvector.vector.VectorizationProvider;
25+
import io.github.jbellis.jvector.vector.types.VectorFloat;
26+
import org.openjdk.jmh.annotations.*;
27+
import org.openjdk.jmh.infra.Blackhole;
28+
import org.slf4j.Logger;
29+
import org.slf4j.LoggerFactory;
30+
31+
import java.io.IOException;
32+
import java.util.ArrayList;
33+
import java.util.Set;
34+
import java.util.concurrent.TimeUnit;
35+
36+
import io.github.jbellis.jvector.vector.types.VectorTypeSupport;
37+
38+
39+
@BenchmarkMode(Mode.AverageTime)
40+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
41+
@State(Scope.Thread)
42+
@Fork(1)
43+
@Warmup(iterations = 2)
44+
@Measurement(iterations = 5)
45+
@Threads(1)
46+
public class IndexConstructionWithRandomSetBenchmark {
47+
private static final Logger log = LoggerFactory.getLogger(IndexConstructionWithRandomSetBenchmark.class);
48+
private static final VectorTypeSupport VECTOR_TYPE_SUPPORT = VectorizationProvider.getInstance().getVectorTypeSupport();
49+
private RandomAccessVectorValues ravv;
50+
private BuildScoreProvider bsp;
51+
private int M = 32; // graph degree
52+
private int beamWidth = 100;
53+
@Param({"768", "1536"})
54+
private int originalDimension;
55+
@Param({"10000", "100000", "1000000"})
56+
int numBaseVectors;
57+
58+
@Setup
59+
public void setup() throws IOException {
60+
61+
final var baseVectors = new ArrayList<VectorFloat<?>>(numBaseVectors);
62+
for (int i = 0; i < numBaseVectors; i++) {
63+
VectorFloat<?> vector = createRandomVector(originalDimension);
64+
baseVectors.add(vector);
65+
}
66+
// wrap the raw vectors in a RandomAccessVectorValues
67+
ravv = new ListRandomAccessVectorValues(baseVectors, originalDimension);
68+
69+
// score provider using the raw, in-memory vectors
70+
bsp = BuildScoreProvider.randomAccessScoreProvider(ravv, VectorSimilarityFunction.EUCLIDEAN);
71+
}
72+
73+
@TearDown
74+
public void tearDown() throws IOException {
75+
76+
}
77+
78+
@Benchmark
79+
public void buildIndexBenchmark(Blackhole blackhole) throws IOException {
80+
// score provider using the raw, in-memory vectors
81+
try (final var graphIndexBuilder = new GraphIndexBuilder(bsp, ravv.dimension(), M, beamWidth, 1.2f, 1.2f, true)) {
82+
final var graphIndex = graphIndexBuilder.build(ravv);
83+
blackhole.consume(graphIndex);
84+
}
85+
}
86+
87+
private VectorFloat<?> createRandomVector(int dimension) {
88+
VectorFloat<?> vector = VECTOR_TYPE_SUPPORT.createFloatVector(dimension);
89+
for (int i = 0; i < dimension; i++) {
90+
vector.set(i, (float) Math.random());
91+
}
92+
return vector;
93+
}
94+
}

rat-excludes.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@ src/assembly/mrjar.xml
1212
src/assembly/sourcesjar.xml
1313
src/main/java/io/github/jbellis/jvector/vector/cnative/*
1414
src/main/resources/log4j2.xml
15+
scripts/test_node_setup.sh

0 commit comments

Comments
 (0)