@@ -115,6 +115,13 @@ public final class Lucene99HnswVectorsFormat extends KnnVectorsFormat {
115115 /** Default to use single thread merge */
116116 public static final int DEFAULT_NUM_MERGE_WORKER = 1 ;
117117
118+ /**
119+ * Threshold below which HNSW graph building is bypassed for tiny segments. Segments with fewer
120+ * vectors will use flat storage only, improving indexing performance when having frequent
121+ * flushes.
122+ */
123+ public static final int HNSW_GRAPH_THRESHOLD = 10_000 ;
124+
118125 static final int DIRECT_MONOTONIC_BLOCK_SHIFT = 16 ;
119126
120127 /**
@@ -137,9 +144,16 @@ public final class Lucene99HnswVectorsFormat extends KnnVectorsFormat {
137144 private final int numMergeWorkers ;
138145 private final TaskExecutor mergeExec ;
139146
147+ /**
148+ * Whether to bypass HNSW graph building for tiny segments (below {@link #HNSW_GRAPH_THRESHOLD}).
149+ * When enabled, segments with fewer than the threshold number of vectors will store only flat
150+ * vectors, significantly improving indexing performance for workloads with frequent flushes.
151+ */
152+ private final boolean bypassTinySegments ;
153+
140154 /** Constructs a format using default graph construction parameters */
141155 public Lucene99HnswVectorsFormat () {
142- this (DEFAULT_MAX_CONN , DEFAULT_BEAM_WIDTH , DEFAULT_NUM_MERGE_WORKER , null );
156+ this (DEFAULT_MAX_CONN , DEFAULT_BEAM_WIDTH , DEFAULT_NUM_MERGE_WORKER , null , false );
143157 }
144158
145159 /**
@@ -149,11 +163,22 @@ public Lucene99HnswVectorsFormat() {
149163 * @param beamWidth the size of the queue maintained during graph construction.
150164 */
151165 public Lucene99HnswVectorsFormat (int maxConn , int beamWidth ) {
152- this (maxConn , beamWidth , DEFAULT_NUM_MERGE_WORKER , null );
166+ this (maxConn , beamWidth , DEFAULT_NUM_MERGE_WORKER , null , false );
153167 }
154168
155169 /**
156- * Constructs a format using the given graph construction parameters and scalar quantization.
170+ * Constructs a format using the given graph construction parameters.
171+ *
172+ * @param maxConn the maximum number of connections to a node in the HNSW graph
173+ * @param beamWidth the size of the queue maintained during graph construction.
174+ * @param bypassTinySegments whether to bypass HNSW graph building for tiny segments
175+ */
176+ public Lucene99HnswVectorsFormat (int maxConn , int beamWidth , boolean bypassTinySegments ) {
177+ this (maxConn , beamWidth , DEFAULT_NUM_MERGE_WORKER , null , bypassTinySegments );
178+ }
179+
180+ /**
181+ * Constructs a format using the given graph construction parameters.
157182 *
158183 * @param maxConn the maximum number of connections to a node in the HNSW graph
159184 * @param beamWidth the size of the queue maintained during graph construction.
@@ -165,6 +190,29 @@ public Lucene99HnswVectorsFormat(int maxConn, int beamWidth) {
165190 */
166191 public Lucene99HnswVectorsFormat (
167192 int maxConn , int beamWidth , int numMergeWorkers , ExecutorService mergeExec ) {
193+ this (maxConn , beamWidth , numMergeWorkers , mergeExec , false );
194+ }
195+
196+ /**
197+ * Constructs a format using the given graph construction parameters.
198+ *
199+ * @param maxConn the maximum number of connections to a node in the HNSW graph
200+ * @param beamWidth the size of the queue maintained during graph construction.
201+ * @param numMergeWorkers number of workers (threads) that will be used when doing merge. If
202+ * larger than 1, a non-null {@link ExecutorService} must be passed as mergeExec
203+ * @param mergeExec the {@link ExecutorService} that will be used by ALL vector writers that are
204+ * generated by this format to do the merge. If null, the configured {@link
205+ * MergeScheduler#getIntraMergeExecutor(MergePolicy.OneMerge)} is used.
206+ * @param bypassTinySegments whether to bypass HNSW graph building for tiny segments (below {@link
207+ * #HNSW_GRAPH_THRESHOLD} vectors). When enabled, improves indexing performance for workloads
208+ * with frequent flushes.
209+ */
210+ public Lucene99HnswVectorsFormat (
211+ int maxConn ,
212+ int beamWidth ,
213+ int numMergeWorkers ,
214+ ExecutorService mergeExec ,
215+ boolean bypassTinySegments ) {
168216 super ("Lucene99HnswVectorsFormat" );
169217 if (maxConn <= 0 || maxConn > MAXIMUM_MAX_CONN ) {
170218 throw new IllegalArgumentException (
@@ -182,6 +230,7 @@ public Lucene99HnswVectorsFormat(
182230 }
183231 this .maxConn = maxConn ;
184232 this .beamWidth = beamWidth ;
233+ this .bypassTinySegments = bypassTinySegments ;
185234 if (numMergeWorkers == 1 && mergeExec != null ) {
186235 throw new IllegalArgumentException (
187236 "No executor service is needed as we'll use single thread to merge" );
@@ -202,12 +251,14 @@ public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException
202251 beamWidth ,
203252 flatVectorsFormat .fieldsWriter (state ),
204253 numMergeWorkers ,
205- mergeExec );
254+ mergeExec ,
255+ bypassTinySegments );
206256 }
207257
208258 @ Override
209259 public KnnVectorsReader fieldsReader (SegmentReadState state ) throws IOException {
210- return new Lucene99HnswVectorsReader (state , flatVectorsFormat .fieldsReader (state ));
260+ return new Lucene99HnswVectorsReader (
261+ state , flatVectorsFormat .fieldsReader (state ), bypassTinySegments );
211262 }
212263
213264 @ Override
@@ -221,6 +272,8 @@ public String toString() {
221272 + maxConn
222273 + ", beamWidth="
223274 + beamWidth
275+ + ", bypassTinySegments="
276+ + bypassTinySegments
224277 + ", flatVectorFormat="
225278 + flatVectorsFormat
226279 + ")" ;
0 commit comments