99
1010package org .elasticsearch .benchmark .vector .scorer ;
1111
12- import org .apache .lucene .codecs .lucene99 .Lucene99ScalarQuantizedVectorScorer ;
13- import org .apache .lucene .codecs .lucene99 .OffHeapQuantizedByteVectorValues ;
1412import org .apache .lucene .index .VectorSimilarityFunction ;
1513import org .apache .lucene .store .Directory ;
1614import org .apache .lucene .store .IOContext ;
1715import org .apache .lucene .store .IndexInput ;
18- import org .apache .lucene .store .IndexOutput ;
1916import org .apache .lucene .store .MMapDirectory ;
2017import org .apache .lucene .util .hnsw .RandomVectorScorer ;
21- import org .apache .lucene .util .hnsw .RandomVectorScorerSupplier ;
2218import org .apache .lucene .util .hnsw .UpdateableRandomVectorScorer ;
23- import org .apache .lucene .util .quantization .QuantizedByteVectorValues ;
24- import org .apache .lucene .util .quantization .ScalarQuantizer ;
2519import org .elasticsearch .common .logging .LogConfigurator ;
2620import org .elasticsearch .core .IOUtils ;
2721import org .elasticsearch .logging .LogManager ;
4236
4337import java .io .IOException ;
4438import java .nio .file .Files ;
39+ import java .nio .file .Path ;
4540import java .util .concurrent .ThreadLocalRandom ;
4641import java .util .concurrent .TimeUnit ;
4742
43+ import static org .elasticsearch .benchmark .vector .scorer .BenchmarkUtils .createRandomInt7VectorData ;
44+ import static org .elasticsearch .benchmark .vector .scorer .BenchmarkUtils .getScorerFactoryOrDie ;
45+ import static org .elasticsearch .benchmark .vector .scorer .BenchmarkUtils .luceneScoreSupplier ;
46+ import static org .elasticsearch .benchmark .vector .scorer .BenchmarkUtils .luceneScorer ;
47+ import static org .elasticsearch .benchmark .vector .scorer .BenchmarkUtils .readNodeCorrectionConstant ;
48+ import static org .elasticsearch .benchmark .vector .scorer .BenchmarkUtils .supportsHeapSegments ;
49+ import static org .elasticsearch .benchmark .vector .scorer .BenchmarkUtils .vectorValues ;
4850import static org .elasticsearch .simdvec .VectorSimilarityType .DOT_PRODUCT ;
4951import static org .elasticsearch .simdvec .VectorSimilarityType .EUCLIDEAN ;
5052
53+ /**
54+ * Benchmark that compares various scalar quantized vector similarity function
55+ * implementations: scalar, lucene's panama-ized, and Elasticsearch's native.
56+ * Run with ./gradlew -p benchmarks run --args 'VectorScorerInt7uBenchmark'
57+ */
5158@ Fork (value = 1 , jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
5259@ Warmup (iterations = 3 , time = 3 )
5360@ Measurement (iterations = 5 , time = 3 )
5461@ BenchmarkMode (Mode .Throughput )
5562@ OutputTimeUnit (TimeUnit .MICROSECONDS )
5663@ State (Scope .Thread )
57- /**
58- * Benchmark that compares various scalar quantized vector similarity function
59- * implementations;: scalar, lucene's panama-ized, and Elasticsearch's native.
60- * Run with ./gradlew -p benchmarks run --args 'Int7uScorerBenchmark'
61- */
6264public class VectorScorerInt7uBenchmark {
6365
6466 static {
@@ -71,15 +73,16 @@ public class VectorScorerInt7uBenchmark {
7173
7274 @ Param ({ "96" , "768" , "1024" })
7375 public int dims ;
74- final int size = 2 ; // there are only two vectors to compare
76+ public int numVectors = 2 ; // there are only two vectors to compare
7577
78+ Path path ;
7679 Directory dir ;
7780 IndexInput in ;
7881 VectorScorerFactory factory ;
7982
8083 byte [] vec1 , vec2 ;
81- float vec1Offset ;
82- float vec2Offset ;
84+ float vec1CorrectionConstant ;
85+ float vec2CorrectionConstant ;
8386 float scoreCorrectionConstant ;
8487
8588 UpdateableRandomVectorScorer luceneDotScorer ;
@@ -94,45 +97,34 @@ public class VectorScorerInt7uBenchmark {
9497
9598 @ Setup
9699 public void setup () throws IOException {
97- var optionalVectorScorerFactory = VectorScorerFactory .instance ();
98- if (optionalVectorScorerFactory .isEmpty ()) {
99- String msg = "JDK=["
100- + Runtime .version ()
101- + "], os.name=["
102- + System .getProperty ("os.name" )
103- + "], os.arch=["
104- + System .getProperty ("os.arch" )
105- + "]" ;
106- throw new AssertionError ("Vector scorer factory not present. Cannot run the benchmark. " + msg );
107- }
108- factory = optionalVectorScorerFactory .get ();
109- vec1 = new byte [dims ];
110- vec2 = new byte [dims ];
111-
112- randomInt7BytesBetween (vec1 );
113- randomInt7BytesBetween (vec2 );
114- vec1Offset = ThreadLocalRandom .current ().nextFloat ();
115- vec2Offset = ThreadLocalRandom .current ().nextFloat ();
116-
117- dir = new MMapDirectory (Files .createTempDirectory ("nativeScalarQuantBench" ));
118- try (IndexOutput out = dir .createOutput ("vector.data" , IOContext .DEFAULT )) {
119- out .writeBytes (vec1 , 0 , vec1 .length );
120- out .writeInt (Float .floatToIntBits (vec1Offset ));
121- out .writeBytes (vec2 , 0 , vec2 .length );
122- out .writeInt (Float .floatToIntBits (vec2Offset ));
123- }
100+ factory = getScorerFactoryOrDie ();
101+
102+ var random = ThreadLocalRandom .current ();
103+ path = Files .createTempDirectory ("Int7uScorerBenchmark" );
104+ dir = new MMapDirectory (path );
105+ createRandomInt7VectorData (random , dir , dims , numVectors );
106+
124107 in = dir .openInput ("vector.data" , IOContext .DEFAULT );
125- var values = vectorValues (dims , 2 , in , VectorSimilarityFunction .DOT_PRODUCT );
126- scoreCorrectionConstant = values .getScalarQuantizer ().getConstantMultiplier ();
127- luceneDotScorer = luceneScoreSupplier (values , VectorSimilarityFunction .DOT_PRODUCT ).scorer ();
108+ final var dotProductValues = vectorValues (dims , numVectors , in , VectorSimilarityFunction .DOT_PRODUCT );
109+ scoreCorrectionConstant = dotProductValues .getScalarQuantizer ().getConstantMultiplier ();
110+ luceneDotScorer = luceneScoreSupplier (dotProductValues , VectorSimilarityFunction .DOT_PRODUCT ).scorer ();
128111 luceneDotScorer .setScoringOrdinal (0 );
129- values = vectorValues (dims , 2 , in , VectorSimilarityFunction .EUCLIDEAN );
130- luceneSqrScorer = luceneScoreSupplier (values , VectorSimilarityFunction .EUCLIDEAN ).scorer ();
131- luceneSqrScorer .setScoringOrdinal (0 );
132-
133- nativeDotScorer = factory .getInt7SQVectorScorerSupplier (DOT_PRODUCT , in , values , scoreCorrectionConstant ).get ().scorer ();
112+ nativeDotScorer = factory .getInt7SQVectorScorerSupplier (DOT_PRODUCT , in , dotProductValues , scoreCorrectionConstant )
113+ .orElseThrow ()
114+ .scorer ();
134115 nativeDotScorer .setScoringOrdinal (0 );
135- nativeSqrScorer = factory .getInt7SQVectorScorerSupplier (EUCLIDEAN , in , values , scoreCorrectionConstant ).get ().scorer ();
116+
117+ vec1 = dotProductValues .vectorValue (0 ).clone ();
118+ vec1CorrectionConstant = readNodeCorrectionConstant (dotProductValues , 0 );
119+ vec2 = dotProductValues .vectorValue (1 ).clone ();
120+ vec2CorrectionConstant = readNodeCorrectionConstant (dotProductValues , 1 );
121+
122+ final var euclideanValues = vectorValues (dims , numVectors , in , VectorSimilarityFunction .EUCLIDEAN );
123+ luceneSqrScorer = luceneScoreSupplier (euclideanValues , VectorSimilarityFunction .EUCLIDEAN ).scorer ();
124+ luceneSqrScorer .setScoringOrdinal (0 );
125+ nativeSqrScorer = factory .getInt7SQVectorScorerSupplier (EUCLIDEAN , in , euclideanValues , scoreCorrectionConstant )
126+ .orElseThrow ()
127+ .scorer ();
136128 nativeSqrScorer .setScoringOrdinal (0 );
137129
138130 if (supportsHeapSegments ()) {
@@ -141,16 +133,19 @@ public void setup() throws IOException {
141133 for (int i = 0 ; i < dims ; i ++) {
142134 queryVec [i ] = ThreadLocalRandom .current ().nextFloat ();
143135 }
144- luceneDotScorerQuery = luceneScorer (values , VectorSimilarityFunction .DOT_PRODUCT , queryVec );
145- nativeDotScorerQuery = factory .getInt7SQVectorScorer (VectorSimilarityFunction .DOT_PRODUCT , values , queryVec ).get ();
146- luceneSqrScorerQuery = luceneScorer (values , VectorSimilarityFunction .EUCLIDEAN , queryVec );
147- nativeSqrScorerQuery = factory .getInt7SQVectorScorer (VectorSimilarityFunction .EUCLIDEAN , values , queryVec ).get ();
136+ luceneDotScorerQuery = luceneScorer (dotProductValues , VectorSimilarityFunction .DOT_PRODUCT , queryVec );
137+ nativeDotScorerQuery = factory .getInt7SQVectorScorer (VectorSimilarityFunction .DOT_PRODUCT , dotProductValues , queryVec )
138+ .orElseThrow ();
139+ luceneSqrScorerQuery = luceneScorer (euclideanValues , VectorSimilarityFunction .EUCLIDEAN , queryVec );
140+ nativeSqrScorerQuery = factory .getInt7SQVectorScorer (VectorSimilarityFunction .EUCLIDEAN , euclideanValues , queryVec )
141+ .orElseThrow ();
148142 }
149143 }
150144
151145 @ TearDown
152146 public void teardown () throws IOException {
153147 IOUtils .close (dir , in );
148+ IOUtils .rm (path );
154149 }
155150
156151 @ Benchmark
@@ -169,7 +164,7 @@ public float dotProductScalar() {
169164 for (int i = 0 ; i < vec1 .length ; i ++) {
170165 dotProduct += vec1 [i ] * vec2 [i ];
171166 }
172- float adjustedDistance = dotProduct * scoreCorrectionConstant + vec1Offset + vec2Offset ;
167+ float adjustedDistance = dotProduct * scoreCorrectionConstant + vec1CorrectionConstant + vec2CorrectionConstant ;
173168 return (1 + adjustedDistance ) / 2 ;
174169 }
175170
@@ -215,33 +210,4 @@ public float squareDistanceLuceneQuery() throws IOException {
215210 public float squareDistanceNativeQuery () throws IOException {
216211 return nativeSqrScorerQuery .score (1 );
217212 }
218-
219- static boolean supportsHeapSegments () {
220- return Runtime .version ().feature () >= 22 ;
221- }
222-
223- QuantizedByteVectorValues vectorValues (int dims , int size , IndexInput in , VectorSimilarityFunction sim ) throws IOException {
224- var sq = new ScalarQuantizer (0.1f , 0.9f , (byte ) 7 );
225- var slice = in .slice ("values" , 0 , in .length ());
226- return new OffHeapQuantizedByteVectorValues .DenseOffHeapVectorValues (dims , size , sq , false , sim , null , slice );
227- }
228-
229- RandomVectorScorerSupplier luceneScoreSupplier (QuantizedByteVectorValues values , VectorSimilarityFunction sim ) throws IOException {
230- return new Lucene99ScalarQuantizedVectorScorer (null ).getRandomVectorScorerSupplier (sim , values );
231- }
232-
233- RandomVectorScorer luceneScorer (QuantizedByteVectorValues values , VectorSimilarityFunction sim , float [] queryVec ) throws IOException {
234- return new Lucene99ScalarQuantizedVectorScorer (null ).getRandomVectorScorer (sim , values , queryVec );
235- }
236-
237- // Unsigned int7 byte vectors have values in the range of 0 to 127 (inclusive).
238- static final byte MIN_INT7_VALUE = 0 ;
239- static final byte MAX_INT7_VALUE = 127 ;
240-
241- static void randomInt7BytesBetween (byte [] bytes ) {
242- var random = ThreadLocalRandom .current ();
243- for (int i = 0 , len = bytes .length ; i < len ;) {
244- bytes [i ++] = (byte ) random .nextInt (MIN_INT7_VALUE , MAX_INT7_VALUE + 1 );
245- }
246- }
247213}
0 commit comments