Skip to content

Commit 461a053

Browse files
committed
Bypass HNSW graph building for tiny segments
1 parent 251e7df commit 461a053

File tree

6 files changed

+247
-32
lines changed

6 files changed

+247
-32
lines changed

lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java

Lines changed: 69 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,25 @@ public class Lucene99HnswScalarQuantizedVectorsFormat extends KnnVectorsFormat {
6565
private final int numMergeWorkers;
6666
private final TaskExecutor mergeExec;
6767

68+
/**
69+
* Whether to bypass HNSW graph building for tiny segments (below {@link
70+
* Lucene99HnswVectorsFormat#HNSW_GRAPH_THRESHOLD}). When enabled, segments with fewer than the
71+
* threshold number of vectors will store only flat vectors with quantization, significantly
72+
* improving indexing performance for workloads with frequent flushes.
73+
*/
74+
private final boolean bypassTinySegments;
75+
6876
/** Constructs a format using default graph construction parameters with 7 bit quantization */
6977
public Lucene99HnswScalarQuantizedVectorsFormat() {
70-
this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, DEFAULT_NUM_MERGE_WORKER, 7, false, null, null);
78+
this(
79+
DEFAULT_MAX_CONN,
80+
DEFAULT_BEAM_WIDTH,
81+
DEFAULT_NUM_MERGE_WORKER,
82+
7,
83+
false,
84+
null,
85+
null,
86+
false);
7187
}
7288

7389
/**
@@ -77,7 +93,19 @@ public Lucene99HnswScalarQuantizedVectorsFormat() {
7793
* @param beamWidth the size of the queue maintained during graph construction.
7894
*/
7995
public Lucene99HnswScalarQuantizedVectorsFormat(int maxConn, int beamWidth) {
80-
this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, 7, false, null, null);
96+
this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, 7, false, null, null, false);
97+
}
98+
99+
/**
100+
* Constructs a format using the given graph construction parameters with 7 bit quantization
101+
*
102+
* @param maxConn the maximum number of connections to a node in the HNSW graph
103+
* @param beamWidth the size of the queue maintained during graph construction.
104+
* @param bypassTinySegments whether to bypass HNSW graph building for tiny segments
105+
*/
106+
public Lucene99HnswScalarQuantizedVectorsFormat(
107+
int maxConn, int beamWidth, boolean bypassTinySegments) {
108+
this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, 7, false, null, null, bypassTinySegments);
81109
}
82110

83111
/**
@@ -107,6 +135,40 @@ public Lucene99HnswScalarQuantizedVectorsFormat(
107135
boolean compress,
108136
Float confidenceInterval,
109137
ExecutorService mergeExec) {
138+
this(maxConn, beamWidth, numMergeWorkers, bits, compress, confidenceInterval, mergeExec, false);
139+
}
140+
141+
/**
142+
* Constructs a format using the given graph construction parameters and scalar quantization.
143+
*
144+
* @param maxConn the maximum number of connections to a node in the HNSW graph
145+
* @param beamWidth the size of the queue maintained during graph construction.
146+
* @param numMergeWorkers number of workers (threads) that will be used when doing merge. If
147+
* larger than 1, a non-null {@link ExecutorService} must be passed as mergeExec
148+
* @param bits the number of bits to use for scalar quantization (must be 4 or 7)
149+
* @param compress whether to compress the quantized vectors by another 50% when bits=4. If
150+
* `true`, pairs of (4 bit quantized) dimensions are packed into a single byte. This must be
151+
* `false` when bits=7. This provides a trade-off of 50% reduction in hot vector memory usage
152+
* during searching, at some decode speed penalty.
153+
* @param confidenceInterval the confidenceInterval for scalar quantizing the vectors, when `null`
154+
* it is calculated based on the vector field dimensions. When `0`, the quantiles are
155+
* dynamically determined by sampling many confidence intervals and determining the most
156+
* accurate pair.
157+
* @param mergeExec the {@link ExecutorService} that will be used by ALL vector writers that are
158+
* generated by this format to do the merge
159+
* @param bypassTinySegments whether to bypass HNSW graph building for tiny segments (below {@link
160+
* Lucene99HnswVectorsFormat#HNSW_GRAPH_THRESHOLD} vectors). When enabled, improves indexing
161+
* performance for workloads with frequent flushes.
162+
*/
163+
public Lucene99HnswScalarQuantizedVectorsFormat(
164+
int maxConn,
165+
int beamWidth,
166+
int numMergeWorkers,
167+
int bits,
168+
boolean compress,
169+
Float confidenceInterval,
170+
ExecutorService mergeExec,
171+
boolean bypassTinySegments) {
110172
super(NAME);
111173
if (maxConn <= 0 || maxConn > MAXIMUM_MAX_CONN) {
112174
throw new IllegalArgumentException(
@@ -124,6 +186,7 @@ public Lucene99HnswScalarQuantizedVectorsFormat(
124186
}
125187
this.maxConn = maxConn;
126188
this.beamWidth = beamWidth;
189+
this.bypassTinySegments = bypassTinySegments;
127190
if (numMergeWorkers == 1 && mergeExec != null) {
128191
throw new IllegalArgumentException(
129192
"No executor service is needed as we'll use single thread to merge");
@@ -146,7 +209,8 @@ public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException
146209
beamWidth,
147210
flatVectorsFormat.fieldsWriter(state),
148211
numMergeWorkers,
149-
mergeExec);
212+
mergeExec,
213+
bypassTinySegments);
150214
}
151215

152216
@Override
@@ -165,6 +229,8 @@ public String toString() {
165229
+ maxConn
166230
+ ", beamWidth="
167231
+ beamWidth
232+
+ ", bypassTinySegments="
233+
+ bypassTinySegments
168234
+ ", flatVectorFormat="
169235
+ flatVectorsFormat
170236
+ ")";

lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsFormat.java

Lines changed: 58 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,13 @@ public final class Lucene99HnswVectorsFormat extends KnnVectorsFormat {
115115
/** Default to use single thread merge */
116116
public static final int DEFAULT_NUM_MERGE_WORKER = 1;
117117

118+
/**
119+
* Threshold below which HNSW graph building is bypassed for tiny segments. Segments with fewer
120+
* vectors will use flat storage only, improving indexing performance when having frequent
121+
* flushes.
122+
*/
123+
public static final int HNSW_GRAPH_THRESHOLD = 10_000;
124+
118125
static final int DIRECT_MONOTONIC_BLOCK_SHIFT = 16;
119126

120127
/**
@@ -137,9 +144,16 @@ public final class Lucene99HnswVectorsFormat extends KnnVectorsFormat {
137144
private final int numMergeWorkers;
138145
private final TaskExecutor mergeExec;
139146

147+
/**
148+
* Whether to bypass HNSW graph building for tiny segments (below {@link #HNSW_GRAPH_THRESHOLD}).
149+
* When enabled, segments with fewer than the threshold number of vectors will store only flat
150+
* vectors, significantly improving indexing performance for workloads with frequent flushes.
151+
*/
152+
private final boolean bypassTinySegments;
153+
140154
/** Constructs a format using default graph construction parameters */
141155
public Lucene99HnswVectorsFormat() {
142-
this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, DEFAULT_NUM_MERGE_WORKER, null);
156+
this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, DEFAULT_NUM_MERGE_WORKER, null, false);
143157
}
144158

145159
/**
@@ -149,11 +163,22 @@ public Lucene99HnswVectorsFormat() {
149163
* @param beamWidth the size of the queue maintained during graph construction.
150164
*/
151165
public Lucene99HnswVectorsFormat(int maxConn, int beamWidth) {
152-
this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, null);
166+
this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, null, false);
153167
}
154168

155169
/**
156-
* Constructs a format using the given graph construction parameters and scalar quantization.
170+
* Constructs a format using the given graph construction parameters.
171+
*
172+
* @param maxConn the maximum number of connections to a node in the HNSW graph
173+
* @param beamWidth the size of the queue maintained during graph construction.
174+
* @param bypassTinySegments whether to bypass HNSW graph building for tiny segments
175+
*/
176+
public Lucene99HnswVectorsFormat(int maxConn, int beamWidth, boolean bypassTinySegments) {
177+
this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, null, bypassTinySegments);
178+
}
179+
180+
/**
181+
* Constructs a format using the given graph construction parameters.
157182
*
158183
* @param maxConn the maximum number of connections to a node in the HNSW graph
159184
* @param beamWidth the size of the queue maintained during graph construction.
@@ -165,6 +190,29 @@ public Lucene99HnswVectorsFormat(int maxConn, int beamWidth) {
165190
*/
166191
public Lucene99HnswVectorsFormat(
167192
int maxConn, int beamWidth, int numMergeWorkers, ExecutorService mergeExec) {
193+
this(maxConn, beamWidth, numMergeWorkers, mergeExec, false);
194+
}
195+
196+
/**
197+
* Constructs a format using the given graph construction parameters.
198+
*
199+
* @param maxConn the maximum number of connections to a node in the HNSW graph
200+
* @param beamWidth the size of the queue maintained during graph construction.
201+
* @param numMergeWorkers number of workers (threads) that will be used when doing merge. If
202+
* larger than 1, a non-null {@link ExecutorService} must be passed as mergeExec
203+
* @param mergeExec the {@link ExecutorService} that will be used by ALL vector writers that are
204+
* generated by this format to do the merge. If null, the configured {@link
205+
* MergeScheduler#getIntraMergeExecutor(MergePolicy.OneMerge)} is used.
206+
* @param bypassTinySegments whether to bypass HNSW graph building for tiny segments (below {@link
207+
* #HNSW_GRAPH_THRESHOLD} vectors). When enabled, improves indexing performance for workloads
208+
* with frequent flushes.
209+
*/
210+
public Lucene99HnswVectorsFormat(
211+
int maxConn,
212+
int beamWidth,
213+
int numMergeWorkers,
214+
ExecutorService mergeExec,
215+
boolean bypassTinySegments) {
168216
super("Lucene99HnswVectorsFormat");
169217
if (maxConn <= 0 || maxConn > MAXIMUM_MAX_CONN) {
170218
throw new IllegalArgumentException(
@@ -182,6 +230,7 @@ public Lucene99HnswVectorsFormat(
182230
}
183231
this.maxConn = maxConn;
184232
this.beamWidth = beamWidth;
233+
this.bypassTinySegments = bypassTinySegments;
185234
if (numMergeWorkers == 1 && mergeExec != null) {
186235
throw new IllegalArgumentException(
187236
"No executor service is needed as we'll use single thread to merge");
@@ -202,12 +251,14 @@ public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException
202251
beamWidth,
203252
flatVectorsFormat.fieldsWriter(state),
204253
numMergeWorkers,
205-
mergeExec);
254+
mergeExec,
255+
bypassTinySegments);
206256
}
207257

208258
@Override
209259
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
210-
return new Lucene99HnswVectorsReader(state, flatVectorsFormat.fieldsReader(state));
260+
return new Lucene99HnswVectorsReader(
261+
state, flatVectorsFormat.fieldsReader(state), bypassTinySegments);
211262
}
212263

213264
@Override
@@ -221,6 +272,8 @@ public String toString() {
221272
+ maxConn
222273
+ ", beamWidth="
223274
+ beamWidth
275+
+ ", bypassTinySegments="
276+
+ bypassTinySegments
224277
+ ", flatVectorFormat="
225278
+ flatVectorsFormat
226279
+ ")";

lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,14 @@ public final class Lucene99HnswVectorsReader extends KnnVectorsReader
7676
private final FieldInfos fieldInfos;
7777
private final IntObjectHashMap<FieldEntry> fields;
7878
private final IndexInput vectorIndex;
79+
private final boolean bypassTinySegments;
7980

80-
public Lucene99HnswVectorsReader(SegmentReadState state, FlatVectorsReader flatVectorsReader)
81+
public Lucene99HnswVectorsReader(
82+
SegmentReadState state, FlatVectorsReader flatVectorsReader, boolean bypassTinySegments)
8183
throws IOException {
8284
this.fields = new IntObjectHashMap<>();
8385
this.flatVectorsReader = flatVectorsReader;
86+
this.bypassTinySegments = bypassTinySegments;
8487
this.fieldInfos = state.fieldInfos;
8588
String metaFileName =
8689
IndexFileNames.segmentFileName(
@@ -122,12 +125,18 @@ public Lucene99HnswVectorsReader(SegmentReadState state, FlatVectorsReader flatV
122125
}
123126
}
124127

128+
public Lucene99HnswVectorsReader(SegmentReadState state, FlatVectorsReader flatVectorsReader)
129+
throws IOException {
130+
this(state, flatVectorsReader, false);
131+
}
132+
125133
private Lucene99HnswVectorsReader(
126134
Lucene99HnswVectorsReader reader, FlatVectorsReader flatVectorsReader) {
127135
this.flatVectorsReader = flatVectorsReader;
128136
this.fieldInfos = reader.fieldInfos;
129137
this.fields = reader.fields;
130138
this.vectorIndex = reader.vectorIndex;
139+
this.bypassTinySegments = reader.bypassTinySegments;
131140
}
132141

133142
@Override
@@ -326,16 +335,19 @@ private void search(
326335
final KnnCollector collector =
327336
new OrdinalTranslatedKnnCollector(knnCollector, scorer::ordToDoc);
328337
final Bits acceptedOrds = scorer.getAcceptOrds(acceptDocs);
329-
HnswGraph graph = getGraph(fieldEntry);
330-
boolean doHnsw = knnCollector.k() < scorer.maxOrd();
338+
boolean doHnsw =
339+
knnCollector.k() < scorer.maxOrd()
340+
&& (bypassTinySegments == false
341+
|| fieldEntry.size() > Lucene99HnswVectorsFormat.HNSW_GRAPH_THRESHOLD);
331342
// Take into account if quantized? E.g. some scorer cost?
332343
int filteredDocCount = 0;
333344
// The approximate number of vectors that would be visited if we did not filter
334-
int unfilteredVisit = HnswGraphSearcher.expectedVisitedNodes(knnCollector.k(), graph.size());
345+
int unfilteredVisit =
346+
HnswGraphSearcher.expectedVisitedNodes(knnCollector.k(), fieldEntry.size());
335347
if (acceptDocs instanceof BitSet bitSet) {
336348
// Use approximate cardinality as this is good enough, but ensure we don't exceed the graph
337349
// size as that is illogical
338-
filteredDocCount = Math.min(bitSet.approximateCardinality(), graph.size());
350+
filteredDocCount = Math.min(bitSet.approximateCardinality(), fieldEntry.size());
339351
if (unfilteredVisit >= filteredDocCount) {
340352
doHnsw = false;
341353
}

0 commit comments

Comments
 (0)