Skip to content

Commit 6f6097d

Browse files
committed
Create vector format class that references Lucene changes
1 parent 4823d30 commit 6f6097d

File tree

2 files changed

+800
-0
lines changed

2 files changed

+800
-0
lines changed
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
/*
2+
* @notice
3+
* Licensed to the Apache Software Foundation (ASF) under one or more
4+
* contributor license agreements. See the NOTICE file distributed with
5+
* this work for additional information regarding copyright ownership.
6+
* The ASF licenses this file to You under the Apache License, Version 2.0
7+
* (the "License"); you may not use this file except in compliance with
8+
* the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*
18+
* Modifications copyright (C) 2025 Elasticsearch B.V.
19+
*/
20+
21+
package org.elasticsearch.index.codec.vectors.es819;
22+
23+
import org.apache.lucene.codecs.KnnVectorsFormat;
24+
import org.apache.lucene.codecs.KnnVectorsReader;
25+
import org.apache.lucene.codecs.KnnVectorsWriter;
26+
import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil;
27+
import org.apache.lucene.codecs.hnsw.FlatVectorsFormat;
28+
import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat;
29+
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
30+
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader;
31+
import org.apache.lucene.index.MergePolicy;
32+
import org.apache.lucene.index.MergeScheduler;
33+
import org.apache.lucene.index.SegmentReadState;
34+
import org.apache.lucene.index.SegmentWriteState;
35+
import org.apache.lucene.search.TaskExecutor;
36+
import org.apache.lucene.util.hnsw.HnswGraph;
37+
38+
import java.io.IOException;
39+
import java.util.concurrent.ExecutorService;
40+
41+
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
42+
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN;
43+
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_NUM_MERGE_WORKER;
44+
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.MAXIMUM_BEAM_WIDTH;
45+
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.MAXIMUM_MAX_CONN;
46+
47+
/**
48+
* Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10.3.0
49+
*/
50+
public class ES819HnswReducedHeapVectorsFormat extends KnnVectorsFormat {
51+
52+
static final String NAME = "ES819HnswReducedHeapVectorFormat";
53+
54+
static final String META_CODEC_NAME = "Lucene99HnswVectorsFormatMeta";
55+
static final String VECTOR_INDEX_CODEC_NAME = "Lucene99HnswVectorsFormatIndex";
56+
static final String META_EXTENSION = "vem";
57+
static final String VECTOR_INDEX_EXTENSION = "vex";
58+
static final int DIRECT_MONOTONIC_BLOCK_SHIFT = 16;
59+
60+
/**
61+
* Controls how many of the nearest neighbor candidates are connected to the new node. Defaults to
62+
* {@link Lucene99HnswVectorsFormat#DEFAULT_MAX_CONN}. See {@link HnswGraph} for more details.
63+
*/
64+
private final int maxConn;
65+
66+
/**
67+
* The number of candidate neighbors to track while searching the graph for each newly inserted
68+
* node. Defaults to {@link Lucene99HnswVectorsFormat#DEFAULT_BEAM_WIDTH}. See {@link HnswGraph}
69+
* for details.
70+
*/
71+
private final int beamWidth;
72+
73+
/** The format for storing, reading, and merging vectors on disk. */
74+
private static final FlatVectorsFormat flatVectorsFormat = new Lucene99FlatVectorsFormat(
75+
FlatVectorScorerUtil.getLucene99FlatVectorsScorer()
76+
);
77+
78+
private final int numMergeWorkers;
79+
private final TaskExecutor mergeExec;
80+
81+
/** Constructs a format using default graph construction parameters */
82+
public ES819HnswReducedHeapVectorsFormat() {
83+
this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, DEFAULT_NUM_MERGE_WORKER, null);
84+
}
85+
86+
/**
87+
* Constructs a format using the given graph construction parameters.
88+
*
89+
* @param maxConn the maximum number of connections to a node in the HNSW graph
90+
* @param beamWidth the size of the queue maintained during graph construction.
91+
*/
92+
public ES819HnswReducedHeapVectorsFormat(int maxConn, int beamWidth) {
93+
this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, null);
94+
}
95+
96+
/**
97+
* Constructs a format using the given graph construction parameters and scalar quantization.
98+
*
99+
* @param maxConn the maximum number of connections to a node in the HNSW graph
100+
* @param beamWidth the size of the queue maintained during graph construction.
101+
* @param numMergeWorkers number of workers (threads) that will be used when doing merge. If
102+
* larger than 1, a non-null {@link ExecutorService} must be passed as mergeExec
103+
* @param mergeExec the {@link ExecutorService} that will be used by ALL vector writers that are
104+
* generated by this format to do the merge. If null, the configured {@link
105+
* MergeScheduler#getIntraMergeExecutor(MergePolicy.OneMerge)} is used.
106+
*/
107+
public ES819HnswReducedHeapVectorsFormat(int maxConn, int beamWidth, int numMergeWorkers, ExecutorService mergeExec) {
108+
super(ES819HnswReducedHeapVectorsFormat.NAME);
109+
if (maxConn <= 0 || maxConn > MAXIMUM_MAX_CONN) {
110+
throw new IllegalArgumentException(
111+
"maxConn must be positive and less than or equal to " + MAXIMUM_MAX_CONN + "; maxConn=" + maxConn
112+
);
113+
}
114+
if (beamWidth <= 0 || beamWidth > MAXIMUM_BEAM_WIDTH) {
115+
throw new IllegalArgumentException(
116+
"beamWidth must be positive and less than or equal to " + MAXIMUM_BEAM_WIDTH + "; beamWidth=" + beamWidth
117+
);
118+
}
119+
this.maxConn = maxConn;
120+
this.beamWidth = beamWidth;
121+
if (numMergeWorkers == 1 && mergeExec != null) {
122+
throw new IllegalArgumentException("No executor service is needed as we'll use single thread to merge");
123+
}
124+
this.numMergeWorkers = numMergeWorkers;
125+
if (mergeExec != null) {
126+
this.mergeExec = new TaskExecutor(mergeExec);
127+
} else {
128+
this.mergeExec = null;
129+
}
130+
}
131+
132+
@Override
133+
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
134+
return new ES819HnswReducedHeapVectorsWriter(
135+
state,
136+
maxConn,
137+
beamWidth,
138+
flatVectorsFormat.fieldsWriter(state),
139+
numMergeWorkers,
140+
mergeExec
141+
);
142+
}
143+
144+
@Override
145+
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
146+
return new Lucene99HnswVectorsReader(state, flatVectorsFormat.fieldsReader(state));
147+
}
148+
149+
@Override
150+
public int getMaxDimensions(String fieldName) {
151+
return Lucene99HnswVectorsFormat.DEFAULT_MAX_DIMENSIONS;
152+
}
153+
}

0 commit comments

Comments
 (0)