Skip to content

Commit 45bd06c

Browse files
author
Michael Sokolov
committed
LUCENE-9905: rename Lucene90VectorFormat and its reader and writer
1 parent 6d4b5ea commit 45bd06c

File tree

12 files changed

+55
-93
lines changed

12 files changed

+55
-93
lines changed

lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ public DocValuesFormat getDocValuesFormatForField(String field) {
8484
}
8585
};
8686

87-
private final VectorFormat vectorFormat = new Lucene90VectorFormat();
87+
private final VectorFormat vectorFormat = new Lucene90HnswVectorFormat();
8888

8989
private final StoredFieldsFormat storedFieldsFormat;
9090

lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorFormat.java renamed to lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorFormat.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,11 @@
6464
*
6565
* @lucene.experimental
6666
*/
67-
public final class Lucene90VectorFormat extends VectorFormat {
67+
public final class Lucene90HnswVectorFormat extends VectorFormat {
6868

69-
static final String META_CODEC_NAME = "Lucene90VectorFormatMeta";
70-
static final String VECTOR_DATA_CODEC_NAME = "Lucene90VectorFormatData";
71-
static final String VECTOR_INDEX_CODEC_NAME = "Lucene90VectorFormatIndex";
69+
static final String META_CODEC_NAME = "Lucene90HnswVectorFormatMeta";
70+
static final String VECTOR_DATA_CODEC_NAME = "Lucene90HnswVectorFormatData";
71+
static final String VECTOR_INDEX_CODEC_NAME = "Lucene90HnswVectorFormatIndex";
7272
static final String META_EXTENSION = "vem";
7373
static final String VECTOR_DATA_EXTENSION = "vec";
7474
static final String VECTOR_INDEX_EXTENSION = "vex";
@@ -77,15 +77,15 @@ public final class Lucene90VectorFormat extends VectorFormat {
7777
static final int VERSION_CURRENT = VERSION_START;
7878

7979
/** Sole constructor */
80-
public Lucene90VectorFormat() {}
80+
public Lucene90HnswVectorFormat() {}
8181

8282
@Override
8383
public VectorWriter fieldsWriter(SegmentWriteState state) throws IOException {
84-
return new Lucene90VectorWriter(state);
84+
return new Lucene90HnswVectorWriter(state);
8585
}
8686

8787
@Override
8888
public VectorReader fieldsReader(SegmentReadState state) throws IOException {
89-
return new Lucene90VectorReader(state);
89+
return new Lucene90HnswVectorReader(state);
9090
}
9191
}

lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorReader.java renamed to lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorReader.java

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -53,34 +53,34 @@
5353
*
5454
* @lucene.experimental
5555
*/
56-
public final class Lucene90VectorReader extends VectorReader {
56+
public final class Lucene90HnswVectorReader extends VectorReader {
5757

5858
private final FieldInfos fieldInfos;
5959
private final Map<String, FieldEntry> fields = new HashMap<>();
6060
private final IndexInput vectorData;
6161
private final IndexInput vectorIndex;
6262
private final long checksumSeed;
6363

64-
Lucene90VectorReader(SegmentReadState state) throws IOException {
64+
Lucene90HnswVectorReader(SegmentReadState state) throws IOException {
6565
this.fieldInfos = state.fieldInfos;
6666

67-
int versionMeta = readMetadata(state, Lucene90VectorFormat.META_EXTENSION);
67+
int versionMeta = readMetadata(state, Lucene90HnswVectorFormat.META_EXTENSION);
6868
long[] checksumRef = new long[1];
6969
boolean success = false;
7070
try {
7171
vectorData =
7272
openDataInput(
7373
state,
7474
versionMeta,
75-
Lucene90VectorFormat.VECTOR_DATA_EXTENSION,
76-
Lucene90VectorFormat.VECTOR_DATA_CODEC_NAME,
75+
Lucene90HnswVectorFormat.VECTOR_DATA_EXTENSION,
76+
Lucene90HnswVectorFormat.VECTOR_DATA_CODEC_NAME,
7777
checksumRef);
7878
vectorIndex =
7979
openDataInput(
8080
state,
8181
versionMeta,
82-
Lucene90VectorFormat.VECTOR_INDEX_EXTENSION,
83-
Lucene90VectorFormat.VECTOR_INDEX_CODEC_NAME,
82+
Lucene90HnswVectorFormat.VECTOR_INDEX_EXTENSION,
83+
Lucene90HnswVectorFormat.VECTOR_INDEX_CODEC_NAME,
8484
checksumRef);
8585
success = true;
8686
} finally {
@@ -101,9 +101,9 @@ private int readMetadata(SegmentReadState state, String fileExtension) throws IO
101101
versionMeta =
102102
CodecUtil.checkIndexHeader(
103103
meta,
104-
Lucene90VectorFormat.META_CODEC_NAME,
105-
Lucene90VectorFormat.VERSION_START,
106-
Lucene90VectorFormat.VERSION_CURRENT,
104+
Lucene90HnswVectorFormat.META_CODEC_NAME,
105+
Lucene90HnswVectorFormat.VERSION_START,
106+
Lucene90HnswVectorFormat.VERSION_CURRENT,
107107
state.segmentInfo.getId(),
108108
state.segmentSuffix);
109109
readFields(meta, state.fieldInfos);
@@ -130,8 +130,8 @@ private static IndexInput openDataInput(
130130
CodecUtil.checkIndexHeader(
131131
in,
132132
codecName,
133-
Lucene90VectorFormat.VERSION_START,
134-
Lucene90VectorFormat.VERSION_CURRENT,
133+
Lucene90HnswVectorFormat.VERSION_START,
134+
Lucene90HnswVectorFormat.VERSION_CURRENT,
135135
state.segmentInfo.getId(),
136136
state.segmentSuffix);
137137
if (versionMeta != versionVectorData) {
@@ -214,7 +214,7 @@ private FieldEntry readField(DataInput input) throws IOException {
214214

215215
@Override
216216
public long ramBytesUsed() {
217-
long totalBytes = RamUsageEstimator.shallowSizeOfInstance(Lucene90VectorReader.class);
217+
long totalBytes = RamUsageEstimator.shallowSizeOfInstance(Lucene90HnswVectorReader.class);
218218
totalBytes +=
219219
RamUsageEstimator.sizeOfMap(
220220
fields, RamUsageEstimator.shallowSizeOfInstance(FieldEntry.class));
@@ -255,7 +255,7 @@ public TopDocs search(String field, float[] target, int k, int fanout) throws IO
255255
HnswGraph.search(target, k, k + fanout, vectorValues, getGraphValues(fieldEntry), random);
256256
int i = 0;
257257
ScoreDoc[] scoreDocs = new ScoreDoc[Math.min(results.size(), k)];
258-
boolean reversed = fieldEntry.searchStrategy.reversed;
258+
boolean reversed = fieldEntry.similarityFunction.reversed;
259259
while (results.size() > 0) {
260260
int node = results.topNode();
261261
float score = results.topScore();
@@ -292,7 +292,7 @@ public KnnGraphValues getGraphValues(String field) throws IOException {
292292
}
293293

294294
private KnnGraphValues getGraphValues(FieldEntry entry) throws IOException {
295-
if (entry.similarityFunction.isHnsw()) {
295+
if (entry.similarityFunction != VectorValues.SimilarityFunction.NONE) {
296296
HnswGraphFieldEntry graphEntry = (HnswGraphFieldEntry) entry;
297297
IndexInput bytesSlice =
298298
vectorIndex.slice("graph-data", entry.indexDataOffset, entry.indexDataLength);

lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorWriter.java renamed to lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorWriter.java

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -40,32 +40,32 @@
4040
*
4141
* @lucene.experimental
4242
*/
43-
public final class Lucene90VectorWriter extends VectorWriter {
43+
public final class Lucene90HnswVectorWriter extends VectorWriter {
4444

4545
private final SegmentWriteState segmentWriteState;
4646
private final IndexOutput meta, vectorData, vectorIndex;
4747

4848
private boolean finished;
4949

50-
Lucene90VectorWriter(SegmentWriteState state) throws IOException {
50+
Lucene90HnswVectorWriter(SegmentWriteState state) throws IOException {
5151
assert state.fieldInfos.hasVectorValues();
5252
segmentWriteState = state;
5353

5454
String metaFileName =
5555
IndexFileNames.segmentFileName(
56-
state.segmentInfo.name, state.segmentSuffix, Lucene90VectorFormat.META_EXTENSION);
56+
state.segmentInfo.name, state.segmentSuffix, Lucene90HnswVectorFormat.META_EXTENSION);
5757

5858
String vectorDataFileName =
5959
IndexFileNames.segmentFileName(
6060
state.segmentInfo.name,
6161
state.segmentSuffix,
62-
Lucene90VectorFormat.VECTOR_DATA_EXTENSION);
62+
Lucene90HnswVectorFormat.VECTOR_DATA_EXTENSION);
6363

6464
String indexDataFileName =
6565
IndexFileNames.segmentFileName(
6666
state.segmentInfo.name,
6767
state.segmentSuffix,
68-
Lucene90VectorFormat.VECTOR_INDEX_EXTENSION);
68+
Lucene90HnswVectorFormat.VECTOR_INDEX_EXTENSION);
6969

7070
boolean success = false;
7171
try {
@@ -75,20 +75,20 @@ public final class Lucene90VectorWriter extends VectorWriter {
7575

7676
CodecUtil.writeIndexHeader(
7777
meta,
78-
Lucene90VectorFormat.META_CODEC_NAME,
79-
Lucene90VectorFormat.VERSION_CURRENT,
78+
Lucene90HnswVectorFormat.META_CODEC_NAME,
79+
Lucene90HnswVectorFormat.VERSION_CURRENT,
8080
state.segmentInfo.getId(),
8181
state.segmentSuffix);
8282
CodecUtil.writeIndexHeader(
8383
vectorData,
84-
Lucene90VectorFormat.VECTOR_DATA_CODEC_NAME,
85-
Lucene90VectorFormat.VERSION_CURRENT,
84+
Lucene90HnswVectorFormat.VECTOR_DATA_CODEC_NAME,
85+
Lucene90HnswVectorFormat.VERSION_CURRENT,
8686
state.segmentInfo.getId(),
8787
state.segmentSuffix);
8888
CodecUtil.writeIndexHeader(
8989
vectorIndex,
90-
Lucene90VectorFormat.VECTOR_INDEX_CODEC_NAME,
91-
Lucene90VectorFormat.VERSION_CURRENT,
90+
Lucene90HnswVectorFormat.VECTOR_INDEX_CODEC_NAME,
91+
Lucene90HnswVectorFormat.VERSION_CURRENT,
9292
state.segmentInfo.getId(),
9393
state.segmentSuffix);
9494
success = true;
@@ -121,7 +121,7 @@ public void writeField(FieldInfo fieldInfo, VectorValues vectors) throws IOExcep
121121
long[] offsets = new long[count];
122122
long vectorDataLength = vectorData.getFilePointer() - vectorDataOffset;
123123
long vectorIndexOffset = vectorIndex.getFilePointer();
124-
if (vectors.similarityFunction().isHnsw()) {
124+
if (vectors.similarityFunction() != VectorValues.SimilarityFunction.NONE) {
125125
if (vectors instanceof RandomAccessVectorValuesProducer) {
126126
writeGraph(
127127
vectorIndex,
@@ -146,7 +146,7 @@ public void writeField(FieldInfo fieldInfo, VectorValues vectors) throws IOExcep
146146
vectorIndexLength,
147147
count,
148148
docIds);
149-
if (vectors.similarityFunction().isHnsw()) {
149+
if (vectors.similarityFunction() != VectorValues.SimilarityFunction.NONE) {
150150
writeGraphOffsets(meta, offsets);
151151
}
152152
}

lucene/core/src/java/org/apache/lucene/codecs/lucene90/package-info.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -180,9 +180,9 @@
180180
* of files, recording dimensionally indexed fields, to enable fast numeric range filtering
181181
* and large numeric values like BigInteger and BigDecimal (1D) and geographic shape
182182
* intersection (2D, 3D).
183-
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90VectorFormat Vector values}. The vector
184-
* format stores numeric vectors in a format optimized for random access and computation,
185-
* supporting high-dimensional nearest-neighbor search.
183+
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat Vector values}. The
184+
* vector format stores numeric vectors in a format optimized for random access and
185+
* computation, supporting high-dimensional nearest-neighbor search.
186186
* </ul>
187187
*
188188
* <p>Details on each of these are provided in their linked pages. </div> <a id="File_Naming"></a>
@@ -310,7 +310,7 @@
310310
* <td>Holds indexed points</td>
311311
* </tr>
312312
* <tr>
313-
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90VectorFormat Vector values}</td>
313+
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90HnswVectorFormat Vector values}</td>
314314
* <td>.vec, .vem</td>
315315
* <td>Holds indexed vectors; <code>.vec</code> files contain the raw vector data, and
316316
* <code>.vem</code> the vector metadata</td>

lucene/core/src/java/org/apache/lucene/document/VectorField.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,8 @@ public static FieldType createHnswType(
7878
throw new IllegalArgumentException(
7979
"cannot index vectors with dimension greater than " + VectorValues.MAX_DIMENSIONS);
8080
}
81-
if (similarityFunction == null || !similarityFunction.isHnsw()) {
82-
throw new IllegalArgumentException(
83-
"similarity function must not be null, received: " + similarityFunction);
81+
if (similarityFunction == null || similarityFunction == VectorValues.SimilarityFunction.NONE) {
82+
throw new IllegalArgumentException("similarity function must not be: " + similarityFunction);
8483
}
8584
FieldType type = new FieldType();
8685
type.setVectorDimensionsAndSimilarityFunction(dimension, similarityFunction);

lucene/core/src/java/org/apache/lucene/index/CheckIndex.java

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@
3838
import org.apache.lucene.codecs.PostingsFormat;
3939
import org.apache.lucene.codecs.StoredFieldsReader;
4040
import org.apache.lucene.codecs.TermVectorsReader;
41-
import org.apache.lucene.codecs.VectorReader;
42-
import org.apache.lucene.codecs.lucene90.Lucene90VectorReader;
4341
import org.apache.lucene.document.Document;
4442
import org.apache.lucene.document.DocumentStoredFieldVisitor;
4543
import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
@@ -2338,29 +2336,6 @@ public static Status.VectorValuesStatus testVectors(
23382336
+ docCount
23392337
+ " docs with values");
23402338
}
2341-
VectorReader vectorReader = reader.getVectorReader();
2342-
if (vectorReader instanceof Lucene90VectorReader) {
2343-
KnnGraphValues graphValues =
2344-
((Lucene90VectorReader) vectorReader).getGraphValues(fieldInfo.name);
2345-
int size = graphValues.size();
2346-
for (int i = 0; i < size; i++) {
2347-
graphValues.seek(i);
2348-
for (int neighbor = graphValues.nextNeighbor();
2349-
neighbor != NO_MORE_DOCS;
2350-
neighbor = graphValues.nextNeighbor()) {
2351-
if (neighbor < 0 || neighbor >= size) {
2352-
throw new RuntimeException(
2353-
"Field \""
2354-
+ fieldInfo.name
2355-
+ "\" has an invalid neighbor ordinal: "
2356-
+ neighbor
2357-
+ " which should be in [0,"
2358-
+ size
2359-
+ ")");
2360-
}
2361-
}
2362-
}
2363-
}
23642339
status.totalVectorValues += docCount;
23652340
}
23662341
}

lucene/core/src/java/org/apache/lucene/index/VectorValues.java

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
* See the License for the specific language governing permissions and
1515
* limitations under the License.
1616
*/
17-
1817
package org.apache.lucene.index;
1918

2019
import static org.apache.lucene.util.VectorUtil.dotProduct;
@@ -83,8 +82,8 @@ public BytesRef binaryValue() throws IOException {
8382
public enum SimilarityFunction {
8483

8584
/**
86-
* No similarity function is provided. Note: {@link VectorReader#search(float[], int, int)} is
87-
* not supported for fields specifying this.
85+
* No similarity function is provided. Note: {@link VectorReader#search(String, float[], int,
86+
* int)} is not supported for fields specifying this.
8887
*/
8988
NONE,
9089

@@ -127,18 +126,6 @@ public float compare(float[] v1, float[] v2) {
127126
throw new IllegalStateException("Incomparable similarity function: " + this);
128127
}
129128
}
130-
131-
/** Return true if vectors indexed using this similarity will be indexed using an HNSW graph */
132-
public boolean isHnsw() {
133-
switch (this) {
134-
case EUCLIDEAN:
135-
case DOT_PRODUCT:
136-
return true;
137-
case NONE:
138-
default:
139-
return false;
140-
}
141-
}
142129
}
143130

144131
/**

lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90VectorFormat.java renamed to lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90HnswVectorFormat.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
import org.apache.lucene.index.BaseVectorFormatTestCase;
2121
import org.apache.lucene.util.TestUtil;
2222

23-
public class TestLucene90VectorFormat extends BaseVectorFormatTestCase {
23+
public class TestLucene90HnswVectorFormat extends BaseVectorFormatTestCase {
2424

2525
@Override
2626
protected Codec getCodec() {

lucene/core/src/test/org/apache/lucene/index/TestKnnGraph.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
import java.util.List;
2828
import java.util.Set;
2929
import org.apache.lucene.codecs.Codec;
30-
import org.apache.lucene.codecs.lucene90.Lucene90VectorReader;
30+
import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorReader;
3131
import org.apache.lucene.document.Document;
3232
import org.apache.lucene.document.Field;
3333
import org.apache.lucene.document.FieldType;
@@ -171,8 +171,9 @@ private int[][] getIndexedGraph(float[][] values, int mergePoint, long seed) thr
171171
iw.forceMerge(1);
172172
}
173173
try (IndexReader reader = DirectoryReader.open(dir)) {
174-
Lucene90VectorReader vectorReader =
175-
((Lucene90VectorReader) ((CodecReader) getOnlyLeafReader(reader)).getVectorReader());
174+
Lucene90HnswVectorReader vectorReader =
175+
((Lucene90HnswVectorReader)
176+
((CodecReader) getOnlyLeafReader(reader)).getVectorReader());
176177
graph = copyGraph(vectorReader.getGraphValues(KNN_GRAPH_FIELD));
177178
}
178179
}
@@ -309,8 +310,8 @@ private void assertConsistentGraph(IndexWriter iw, float[][] values) throws IOEx
309310
for (LeafReaderContext ctx : dr.leaves()) {
310311
LeafReader reader = ctx.reader();
311312
VectorValues vectorValues = reader.getVectorValues(KNN_GRAPH_FIELD);
312-
Lucene90VectorReader vectorReader =
313-
((Lucene90VectorReader) ((CodecReader) reader).getVectorReader());
313+
Lucene90HnswVectorReader vectorReader =
314+
((Lucene90HnswVectorReader) ((CodecReader) reader).getVectorReader());
314315
if (vectorReader == null) {
315316
continue;
316317
}

0 commit comments

Comments
 (0)