Skip to content

Commit 6d4b5ea

Browse files
author
Michael Sokolov
committed
LUCENE-9905: rename VectorValues.SearchStrategy to VectorValues.SimilarityFunction
1 parent 3115f85 commit 6d4b5ea

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+382
-339
lines changed

lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene60/Lucene60FieldInfosFormat.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ private FieldInfo[] readFieldInfos(IndexInput input, int version) throws IOExcep
212212
pointIndexDimensionCount,
213213
pointNumBytes,
214214
0,
215-
VectorValues.SearchStrategy.NONE,
215+
VectorValues.SimilarityFunction.NONE,
216216
isSoftDeletesField);
217217
} catch (IllegalStateException e) {
218218
throw new CorruptIndexException(

lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosFormat.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ public FieldInfos read(
158158
SimpleTextUtil.readLine(input, scratch);
159159
assert StringHelper.startsWith(scratch.get(), VECTOR_SEARCH_STRATEGY);
160160
String scoreFunction = readString(VECTOR_SEARCH_STRATEGY.length, scratch);
161-
VectorValues.SearchStrategy vectorDistFunc = distanceFunction(scoreFunction);
161+
VectorValues.SimilarityFunction vectorDistFunc = distanceFunction(scoreFunction);
162162

163163
SimpleTextUtil.readLine(input, scratch);
164164
assert StringHelper.startsWith(scratch.get(), SOFT_DELETES);
@@ -201,8 +201,8 @@ public DocValuesType docValuesType(String dvType) {
201201
return DocValuesType.valueOf(dvType);
202202
}
203203

204-
public VectorValues.SearchStrategy distanceFunction(String scoreFunction) {
205-
return VectorValues.SearchStrategy.valueOf(scoreFunction);
204+
public VectorValues.SimilarityFunction distanceFunction(String scoreFunction) {
205+
return VectorValues.SimilarityFunction.valueOf(scoreFunction);
206206
}
207207

208208
private String readString(int offset, BytesRefBuilder scratch) {
@@ -298,7 +298,7 @@ public void write(
298298
SimpleTextUtil.writeNewline(out);
299299

300300
SimpleTextUtil.write(out, VECTOR_SEARCH_STRATEGY);
301-
SimpleTextUtil.write(out, fi.getVectorSearchStrategy().name(), scratch);
301+
SimpleTextUtil.write(out, fi.getVectorSimilarityFunction().name(), scratch);
302302
SimpleTextUtil.writeNewline(out);
303303

304304
SimpleTextUtil.write(out, SOFT_DELETES);

lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextVectorReader.java

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,8 @@ public class SimpleTextVectorReader extends VectorReader {
8282
while (fieldNumber != -1) {
8383
String fieldName = readString(in, FIELD_NAME);
8484
String scoreFunctionName = readString(in, SCORE_FUNCTION);
85-
VectorValues.SearchStrategy searchStrategy =
86-
VectorValues.SearchStrategy.valueOf(scoreFunctionName);
85+
VectorValues.SimilarityFunction similarityFunction =
86+
VectorValues.SimilarityFunction.valueOf(scoreFunctionName);
8787
long vectorDataOffset = readLong(in, VECTOR_DATA_OFFSET);
8888
long vectorDataLength = readLong(in, VECTOR_DATA_LENGTH);
8989
int dimension = readInt(in, VECTOR_DIMENSION);
@@ -95,7 +95,8 @@ public class SimpleTextVectorReader extends VectorReader {
9595
assert fieldEntries.containsKey(fieldName) == false;
9696
fieldEntries.put(
9797
fieldName,
98-
new FieldEntry(dimension, searchStrategy, vectorDataOffset, vectorDataLength, docIds));
98+
new FieldEntry(
99+
dimension, similarityFunction, vectorDataOffset, vectorDataLength, docIds));
99100
fieldNumber = readInt(in, FIELD_NUMBER);
100101
}
101102
SimpleTextUtil.checkFooter(in);
@@ -204,20 +205,20 @@ public void close() throws IOException {
204205
private static class FieldEntry {
205206

206207
final int dimension;
207-
final VectorValues.SearchStrategy searchStrategy;
208+
final VectorValues.SimilarityFunction similarityFunction;
208209

209210
final long vectorDataOffset;
210211
final long vectorDataLength;
211212
final int[] ordToDoc;
212213

213214
FieldEntry(
214215
int dimension,
215-
VectorValues.SearchStrategy searchStrategy,
216+
VectorValues.SimilarityFunction similarityFunction,
216217
long vectorDataOffset,
217218
long vectorDataLength,
218219
int[] ordToDoc) {
219220
this.dimension = dimension;
220-
this.searchStrategy = searchStrategy;
221+
this.similarityFunction = similarityFunction;
221222
this.vectorDataOffset = vectorDataOffset;
222223
this.vectorDataLength = vectorDataLength;
223224
this.ordToDoc = ordToDoc;
@@ -260,8 +261,8 @@ public int size() {
260261
}
261262

262263
@Override
263-
public SearchStrategy searchStrategy() {
264-
return entry.searchStrategy;
264+
public SimilarityFunction similarityFunction() {
265+
return entry.similarityFunction;
265266
}
266267

267268
@Override

lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextVectorWriter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ private void writeMeta(
9898
throws IOException {
9999
writeField(meta, FIELD_NUMBER, field.number);
100100
writeField(meta, FIELD_NAME, field.name);
101-
writeField(meta, SCORE_FUNCTION, field.getVectorSearchStrategy().name());
101+
writeField(meta, SCORE_FUNCTION, field.getVectorSimilarityFunction().name());
102102
writeField(meta, VECTOR_DATA_OFFSET, vectorDataOffset);
103103
writeField(meta, VECTOR_DATA_LENGTH, vectorDataLength);
104104
writeField(meta, VECTOR_DIMENSION, field.getVectorDimension());

lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/TestBlockWriter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ private static FieldInfo getMockFieldInfo(String fieldName, int number) {
116116
0,
117117
0,
118118
0,
119-
VectorValues.SearchStrategy.NONE,
119+
VectorValues.SimilarityFunction.NONE,
120120
true);
121121
}
122122
}

lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/sharedterms/TestSTBlockReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ private static FieldInfo mockFieldInfo(String fieldName, int number) {
203203
0,
204204
0,
205205
0,
206-
VectorValues.SearchStrategy.NONE,
206+
VectorValues.SimilarityFunction.NONE,
207207
false);
208208
}
209209

lucene/core/src/java/org/apache/lucene/codecs/VectorWriter.java

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -68,18 +68,18 @@ private void mergeVectors(FieldInfo mergeFieldInfo, final MergeState mergeState)
6868
}
6969
List<VectorValuesSub> subs = new ArrayList<>();
7070
int dimension = -1;
71-
VectorValues.SearchStrategy searchStrategy = null;
71+
VectorValues.SimilarityFunction similarityFunction = null;
7272
int nonEmptySegmentIndex = 0;
7373
for (int i = 0; i < mergeState.vectorReaders.length; i++) {
7474
VectorReader vectorReader = mergeState.vectorReaders[i];
7575
if (vectorReader != null) {
7676
if (mergeFieldInfo != null && mergeFieldInfo.hasVectorValues()) {
7777
int segmentDimension = mergeFieldInfo.getVectorDimension();
78-
VectorValues.SearchStrategy segmentSearchStrategy =
79-
mergeFieldInfo.getVectorSearchStrategy();
78+
VectorValues.SimilarityFunction segmentSimilarityFunction =
79+
mergeFieldInfo.getVectorSimilarityFunction();
8080
if (dimension == -1) {
8181
dimension = segmentDimension;
82-
searchStrategy = mergeFieldInfo.getVectorSearchStrategy();
82+
similarityFunction = mergeFieldInfo.getVectorSimilarityFunction();
8383
} else if (dimension != segmentDimension) {
8484
throw new IllegalStateException(
8585
"Varying dimensions for vector-valued field "
@@ -88,14 +88,14 @@ private void mergeVectors(FieldInfo mergeFieldInfo, final MergeState mergeState)
8888
+ dimension
8989
+ "!="
9090
+ segmentDimension);
91-
} else if (searchStrategy != segmentSearchStrategy) {
91+
} else if (similarityFunction != segmentSimilarityFunction) {
9292
throw new IllegalStateException(
93-
"Varying search strategys for vector-valued field "
93+
"Varying similarity functions for vector-valued field "
9494
+ mergeFieldInfo.name
9595
+ ": "
96-
+ searchStrategy
96+
+ similarityFunction
9797
+ "!="
98-
+ segmentSearchStrategy);
98+
+ segmentSimilarityFunction);
9999
}
100100
VectorValues values = vectorReader.getVectorValues(mergeFieldInfo.name);
101101
if (values != null) {
@@ -241,8 +241,8 @@ public int dimension() {
241241
}
242242

243243
@Override
244-
public SearchStrategy searchStrategy() {
245-
return subs.get(0).values.searchStrategy();
244+
public SimilarityFunction similarityFunction() {
245+
return subs.get(0).values.similarityFunction();
246246
}
247247

248248
class MergerRandomAccess implements RandomAccessVectorValues {
@@ -272,8 +272,8 @@ public int dimension() {
272272
}
273273

274274
@Override
275-
public SearchStrategy searchStrategy() {
276-
return VectorValuesMerger.this.searchStrategy();
275+
public SimilarityFunction similarityFunction() {
276+
return VectorValuesMerger.this.similarityFunction();
277277
}
278278

279279
@Override

lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90FieldInfosFormat.java

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,7 @@
2929
import org.apache.lucene.index.IndexFileNames;
3030
import org.apache.lucene.index.IndexOptions;
3131
import org.apache.lucene.index.SegmentInfo;
32-
import org.apache.lucene.index.VectorValues;
33-
import org.apache.lucene.index.VectorValues.SearchStrategy;
32+
import org.apache.lucene.index.VectorValues.SimilarityFunction;
3433
import org.apache.lucene.store.ChecksumIndexInput;
3534
import org.apache.lucene.store.DataOutput;
3635
import org.apache.lucene.store.Directory;
@@ -103,8 +102,8 @@
103102
* <li>VectorDistFunction: a byte containing distance function used for similarity calculation.
104103
* <ul>
105104
* <li>0: no distance function is defined for this field.
106-
* <li>1: EUCLIDEAN_HNSW distance. ({@link SearchStrategy#EUCLIDEAN_HNSW})
107-
* <li>2: DOT_PRODUCT_HNSW score. ({@link SearchStrategy#DOT_PRODUCT_HNSW})
105+
* <li>1: EUCLIDEAN_HNSW distance. ({@link SimilarityFunction#EUCLIDEAN})
106+
* <li>2: DOT_PRODUCT_HNSW score. ({@link SimilarityFunction#DOT_PRODUCT})
108107
* </ul>
109108
* </ul>
110109
*
@@ -173,7 +172,7 @@ public FieldInfos read(
173172
pointNumBytes = 0;
174173
}
175174
final int vectorDimension = input.readVInt();
176-
final VectorValues.SearchStrategy vectorDistFunc = getDistFunc(input, input.readByte());
175+
final SimilarityFunction vectorDistFunc = getDistFunc(input, input.readByte());
177176

178177
try {
179178
infos[i] =
@@ -254,12 +253,11 @@ private static DocValuesType getDocValuesType(IndexInput input, byte b) throws I
254253
}
255254
}
256255

257-
private static VectorValues.SearchStrategy getDistFunc(IndexInput input, byte b)
258-
throws IOException {
259-
if (b < 0 || b >= VectorValues.SearchStrategy.values().length) {
256+
private static SimilarityFunction getDistFunc(IndexInput input, byte b) throws IOException {
257+
if (b < 0 || b >= SimilarityFunction.values().length) {
260258
throw new CorruptIndexException("invalid distance function: " + b, input);
261259
}
262-
return VectorValues.SearchStrategy.values()[b];
260+
return SimilarityFunction.values()[b];
263261
}
264262

265263
static {
@@ -348,7 +346,7 @@ public void write(
348346
output.writeVInt(fi.getPointNumBytes());
349347
}
350348
output.writeVInt(fi.getVectorDimension());
351-
output.writeByte((byte) fi.getVectorSearchStrategy().ordinal());
349+
output.writeByte((byte) fi.getVectorSimilarityFunction().ordinal());
352350
}
353351
CodecUtil.writeFooter(output);
354352
}

lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorFormat.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
*
5151
* <ul>
5252
* <li><b>[int32]</b> field number
53-
* <li><b>[int32]</b> vector search strategy ordinal
53+
* <li><b>[int32]</b> vector similarity function ordinal
5454
* <li><b>[vlong]</b> offset to this field's vectors in the .vec file
5555
* <li><b>[vlong]</b> length of this field's vectors, in bytes
5656
* <li><b>[vlong]</b> offset to this field's index in the .vex file

lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorReader.java

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -187,24 +187,28 @@ private void validateFieldEntry(FieldInfo info, FieldEntry fieldEntry) {
187187
}
188188
}
189189

190-
private VectorValues.SearchStrategy readSearchStrategy(DataInput input) throws IOException {
191-
int searchStrategyId = input.readInt();
192-
if (searchStrategyId < 0 || searchStrategyId >= VectorValues.SearchStrategy.values().length) {
193-
throw new CorruptIndexException("Invalid search strategy id: " + searchStrategyId, input);
190+
private VectorValues.SimilarityFunction readSimilarityFunction(DataInput input)
191+
throws IOException {
192+
int similarityFunctionId = input.readInt();
193+
if (similarityFunctionId < 0
194+
|| similarityFunctionId >= VectorValues.SimilarityFunction.values().length) {
195+
throw new CorruptIndexException(
196+
"Invalid similarity function id: " + similarityFunctionId, input);
194197
}
195-
return VectorValues.SearchStrategy.values()[searchStrategyId];
198+
return VectorValues.SimilarityFunction.values()[similarityFunctionId];
196199
}
197200

198201
private FieldEntry readField(DataInput input) throws IOException {
199-
VectorValues.SearchStrategy searchStrategy = readSearchStrategy(input);
200-
switch (searchStrategy) {
202+
VectorValues.SimilarityFunction similarityFunction = readSimilarityFunction(input);
203+
switch (similarityFunction) {
201204
case NONE:
202-
return new FieldEntry(input, searchStrategy);
203-
case DOT_PRODUCT_HNSW:
204-
case EUCLIDEAN_HNSW:
205-
return new HnswGraphFieldEntry(input, searchStrategy);
205+
return new FieldEntry(input, similarityFunction);
206+
case DOT_PRODUCT:
207+
case EUCLIDEAN:
208+
return new HnswGraphFieldEntry(input, similarityFunction);
206209
default:
207-
throw new CorruptIndexException("Unknown vector search strategy: " + searchStrategy, input);
210+
throw new CorruptIndexException(
211+
"Unknown vector similarity function: " + similarityFunction, input);
208212
}
209213
}
210214

@@ -288,7 +292,7 @@ public KnnGraphValues getGraphValues(String field) throws IOException {
288292
}
289293

290294
private KnnGraphValues getGraphValues(FieldEntry entry) throws IOException {
291-
if (entry.searchStrategy.isHnsw()) {
295+
if (entry.similarityFunction.isHnsw()) {
292296
HnswGraphFieldEntry graphEntry = (HnswGraphFieldEntry) entry;
293297
IndexInput bytesSlice =
294298
vectorIndex.slice("graph-data", entry.indexDataOffset, entry.indexDataLength);
@@ -306,16 +310,17 @@ public void close() throws IOException {
306310
private static class FieldEntry {
307311

308312
final int dimension;
309-
final VectorValues.SearchStrategy searchStrategy;
313+
final VectorValues.SimilarityFunction similarityFunction;
310314

311315
final long vectorDataOffset;
312316
final long vectorDataLength;
313317
final long indexDataOffset;
314318
final long indexDataLength;
315319
final int[] ordToDoc;
316320

317-
FieldEntry(DataInput input, VectorValues.SearchStrategy searchStrategy) throws IOException {
318-
this.searchStrategy = searchStrategy;
321+
FieldEntry(DataInput input, VectorValues.SimilarityFunction similarityFunction)
322+
throws IOException {
323+
this.similarityFunction = similarityFunction;
319324
vectorDataOffset = input.readVLong();
320325
vectorDataLength = input.readVLong();
321326
indexDataOffset = input.readVLong();
@@ -338,9 +343,9 @@ private static class HnswGraphFieldEntry extends FieldEntry {
338343

339344
final long[] ordOffsets;
340345

341-
HnswGraphFieldEntry(DataInput input, VectorValues.SearchStrategy searchStrategy)
346+
HnswGraphFieldEntry(DataInput input, VectorValues.SimilarityFunction similarityFunction)
342347
throws IOException {
343-
super(input, searchStrategy);
348+
super(input, similarityFunction);
344349
ordOffsets = new long[size()];
345350
long offset = 0;
346351
for (int i = 0; i < ordOffsets.length; i++) {
@@ -385,8 +390,8 @@ public int size() {
385390
}
386391

387392
@Override
388-
public SearchStrategy searchStrategy() {
389-
return fieldEntry.searchStrategy;
393+
public SimilarityFunction similarityFunction() {
394+
return fieldEntry.similarityFunction;
390395
}
391396

392397
@Override
@@ -425,7 +430,7 @@ public int advance(int target) {
425430
if (ord < 0) {
426431
ord = -(ord + 1);
427432
}
428-
assert ord >= 0 && ord <= fieldEntry.ordToDoc.length;
433+
assert ord <= fieldEntry.ordToDoc.length;
429434
if (ord == fieldEntry.ordToDoc.length) {
430435
doc = NO_MORE_DOCS;
431436
} else {

0 commit comments

Comments
 (0)