|
9 | 9 | import org.apache.lucene.document.Document; |
10 | 10 | import org.apache.lucene.document.Field; |
11 | 11 | import org.apache.lucene.document.IntField; |
| 12 | +import org.apache.lucene.document.KnnByteVectorField; |
12 | 13 | import org.apache.lucene.document.KnnFloatVectorField; |
13 | 14 | import org.apache.lucene.index.DirectoryReader; |
14 | 15 | import org.apache.lucene.index.IndexReader; |
@@ -146,7 +147,6 @@ public void testFullPrecisionVectorSimilarityDVS() throws Exception { |
146 | 147 |
|
147 | 148 | float[] queryVector = TestVectorUtil.randomVector(VECTOR_DIMENSION); |
148 | 149 | try (IndexReader reader = DirectoryReader.open(dir)) { |
149 | | - FieldExistsQuery query = new FieldExistsQuery(KNN_FIELD); |
150 | 150 | for (LeafReaderContext ctx : reader.leaves()) { |
151 | 151 | DoubleValues fpSimValues = FloatVectorSimilarityValuesSource.fullPrecisionScores(ctx, queryVector, KNN_FIELD); |
152 | 152 | DoubleValues quantizedSimValues = DoubleValuesSource.similarityToQueryVector(ctx, queryVector, KNN_FIELD); |
@@ -180,4 +180,114 @@ public void testFullPrecisionVectorSimilarityDVS() throws Exception { |
180 | 180 | } |
181 | 181 | } |
182 | 182 | } |
| 183 | + |
| 184 | + @Test |
| 185 | + public void testFullPrecisionByteVectorSimilarityDVS() throws Exception { |
| 186 | + List<byte[]> vectors = new ArrayList<>(); |
| 187 | + int numVectors = atLeast(NUM_VECTORS); |
| 188 | + int numSegments = random().nextInt(2, 10); |
| 189 | + final VectorSimilarityFunction vectorSimilarityFunction = |
| 190 | + VectorSimilarityFunction.values()[ |
| 191 | + random().nextInt(VectorSimilarityFunction.values().length)]; |
| 192 | + |
| 193 | + try (Directory dir = newDirectory()) { |
| 194 | + int id = 0; |
| 195 | + |
| 196 | + // index some 4 bit quantized vectors |
| 197 | + try (IndexWriter w = |
| 198 | + new IndexWriter( |
| 199 | + dir, |
| 200 | + newIndexWriterConfig().setCodec(TestUtil.alwaysKnnVectorsFormat(getKnnFormat(4))))) { |
| 201 | + for (int j = 0; j < numSegments; j++) { |
| 202 | + for (int i = 0; i < numVectors; i++) { |
| 203 | + Document doc = new Document(); |
| 204 | + if (random().nextInt(100) < 30) { |
| 205 | + // skip vector for some docs to create sparse vector field |
| 206 | + doc.add(new IntField("has_vector", 0, Field.Store.YES)); |
| 207 | + } else { |
| 208 | + byte[] vector = TestVectorUtil.randomVectorBytes(VECTOR_DIMENSION); |
| 209 | + vectors.add(vector); |
| 210 | + doc.add(new IntField("id", id++, Field.Store.YES)); |
| 211 | + doc.add(new KnnByteVectorField(KNN_FIELD, vector, vectorSimilarityFunction)); |
| 212 | + doc.add(new IntField("has_vector", 1, Field.Store.YES)); |
| 213 | + } |
| 214 | + w.addDocument(doc); |
| 215 | + w.flush(); |
| 216 | + } |
| 217 | + } |
| 218 | + // add a segment with no vectors |
| 219 | + for (int i = 0; i < 100; i++) { |
| 220 | + Document doc = new Document(); |
| 221 | + doc.add(new IntField("has_vector", 0, Field.Store.YES)); |
| 222 | + w.addDocument(doc); |
| 223 | + } |
| 224 | + w.flush(); |
| 225 | + } |
| 226 | + |
| 227 | + // index some 7 bit quantized vectors |
| 228 | + try (IndexWriter w = |
| 229 | + new IndexWriter( |
| 230 | + dir, |
| 231 | + newIndexWriterConfig().setCodec(TestUtil.alwaysKnnVectorsFormat(getKnnFormat(7))))) { |
| 232 | + for (int j = 0; j < numSegments; j++) { |
| 233 | + for (int i = 0; i < numVectors; i++) { |
| 234 | + Document doc = new Document(); |
| 235 | + if (random().nextInt(100) < 30) { |
| 236 | + // skip vector for some docs to create sparse vector field |
| 237 | + doc.add(new IntField("has_vector", 0, Field.Store.YES)); |
| 238 | + } else { |
| 239 | + byte[] vector = TestVectorUtil.randomVectorBytes(VECTOR_DIMENSION); |
| 240 | + vectors.add(vector); |
| 241 | + doc.add(new IntField("id", id++, Field.Store.YES)); |
| 242 | + doc.add(new KnnByteVectorField(KNN_FIELD, vector, vectorSimilarityFunction)); |
| 243 | + doc.add(new IntField("has_vector", 1, Field.Store.YES)); |
| 244 | + } |
| 245 | + w.addDocument(doc); |
| 246 | + w.flush(); |
| 247 | + } |
| 248 | + } |
| 249 | + // add a segment with no vectors |
| 250 | + for (int i = 0; i < 100; i++) { |
| 251 | + Document doc = new Document(); |
| 252 | + doc.add(new IntField("has_vector", 0, Field.Store.YES)); |
| 253 | + w.addDocument(doc); |
| 254 | + } |
| 255 | + w.flush(); |
| 256 | + } |
| 257 | + |
| 258 | + byte[] queryVector = TestVectorUtil.randomVectorBytes(VECTOR_DIMENSION); |
| 259 | + try (IndexReader reader = DirectoryReader.open(dir)) { |
| 260 | + for (LeafReaderContext ctx : reader.leaves()) { |
| 261 | + DoubleValues fpSimValues = ByteVectorSimilarityValuesSource.fullPrecisionScores(ctx, queryVector, KNN_FIELD); |
| 262 | + DoubleValues quantizedSimValues = DoubleValuesSource.similarityToQueryVector(ctx, queryVector, KNN_FIELD); |
| 263 | + // validate when segment has no vectors |
| 264 | + if (fpSimValues == DoubleValues.EMPTY || quantizedSimValues == DoubleValues.EMPTY) { |
| 265 | + assertEquals(fpSimValues, quantizedSimValues); |
| 266 | + assertNull(ctx.reader().getByteVectorValues(KNN_FIELD)); |
| 267 | + continue; |
| 268 | + } |
| 269 | + StoredFields storedFields = ctx.reader().storedFields(); |
| 270 | + VectorScorer quantizedScorer = |
| 271 | + ctx.reader().getByteVectorValues(KNN_FIELD).scorer(queryVector); |
| 272 | + DocIdSetIterator disi = quantizedScorer.iterator(); |
| 273 | + while (disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { |
| 274 | + int doc = disi.docID(); |
| 275 | + fpSimValues.advanceExact(doc); |
| 276 | + quantizedSimValues.advanceExact(doc); |
| 277 | + int idValue = Integer.parseInt(storedFields.document(doc).get("id")); |
| 278 | + byte[] docVector = vectors.get(idValue); |
| 279 | + assert docVector != null : "Vector for id " + idValue + " not found"; |
| 280 | + // validate full precision vector scores |
| 281 | + double expectedFpScore = vectorSimilarityFunction.compare(queryVector, docVector); |
| 282 | + double actualFpScore = fpSimValues.doubleValue(); |
| 283 | + assertEquals(expectedFpScore, actualFpScore, 1e-5); |
| 284 | + // validate quantized vector scores |
| 285 | + double expectedQScore = quantizedScorer.score(); |
| 286 | + double actualQScore = quantizedSimValues.doubleValue(); |
| 287 | + assertEquals(expectedQScore, actualQScore, 1e-5); |
| 288 | + } |
| 289 | + } |
| 290 | + } |
| 291 | + } |
| 292 | + } |
183 | 293 | } |
0 commit comments