Skip to content

Commit c20fa4f

Browse files
committed
Add write buffer parameter; add log for write time vs doc add time
1 parent ac79d0c commit c20fa4f

File tree

3 files changed

+39
-9
lines changed

3 files changed

+39
-9
lines changed

qa/vector/src/main/java/org/elasticsearch/test/knn/CmdLineArgs.java

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@ record CmdLineArgs(
5353
VectorEncoding vectorEncoding,
5454
int dimensions,
5555
boolean earlyTermination,
56-
KnnIndexTester.MergePolicyType mergePolicy
56+
KnnIndexTester.MergePolicyType mergePolicy,
57+
double writerBufferSizeInMb
5758
) implements ToXContentObject {
5859

5960
static final ParseField DOC_VECTORS_FIELD = new ParseField("doc_vectors");
@@ -82,6 +83,9 @@ record CmdLineArgs(
8283
static final ParseField FILTER_SELECTIVITY_FIELD = new ParseField("filter_selectivity");
8384
static final ParseField SEED_FIELD = new ParseField("seed");
8485
static final ParseField MERGE_POLICY_FIELD = new ParseField("merge_policy");
86+
static final ParseField WRITER_BUFFER_FIELD = new ParseField("writer_buffer_mb");
87+
88+
static final double DEFAULT_WRITER_BUFFER_MB = 128;
8589

8690
static CmdLineArgs fromXContent(XContentParser parser) throws IOException {
8791
Builder builder = PARSER.apply(parser, null);
@@ -117,6 +121,7 @@ static CmdLineArgs fromXContent(XContentParser parser) throws IOException {
117121
PARSER.declareFloat(Builder::setFilterSelectivity, FILTER_SELECTIVITY_FIELD);
118122
PARSER.declareLong(Builder::setSeed, SEED_FIELD);
119123
PARSER.declareString(Builder::setMergePolicy, MERGE_POLICY_FIELD);
124+
PARSER.declareDouble(Builder::setWriterBufferMb, WRITER_BUFFER_FIELD);
120125
}
121126

122127
@Override
@@ -186,6 +191,7 @@ static class Builder {
186191
private float filterSelectivity = 1f;
187192
private long seed = 1751900822751L;
188193
private KnnIndexTester.MergePolicyType mergePolicy = null;
194+
private double writerBufferSizeInMb = DEFAULT_WRITER_BUFFER_MB;
189195

190196
public Builder setDocVectors(List<String> docVectors) {
191197
if (docVectors == null || docVectors.isEmpty()) {
@@ -316,6 +322,11 @@ public Builder setMergePolicy(String mergePolicy) {
316322
return this;
317323
}
318324

325+
public Builder setWriterBufferMb(double writerBufferSizeInMb) {
326+
this.writerBufferSizeInMb = writerBufferSizeInMb;
327+
return this;
328+
}
329+
319330
public CmdLineArgs build() {
320331
if (docVectors == null) {
321332
throw new IllegalArgumentException("Document vectors path must be provided");
@@ -350,7 +361,8 @@ public CmdLineArgs build() {
350361
vectorEncoding,
351362
dimensions,
352363
earlyTermination,
353-
mergePolicy
364+
mergePolicy,
365+
writerBufferSizeInMb
354366
);
355367
}
356368
}

qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,8 @@ public static void main(String[] args) throws Exception {
240240
cmdLineArgs.dimensions(),
241241
cmdLineArgs.vectorSpace(),
242242
cmdLineArgs.numDocs(),
243-
mergePolicy
243+
mergePolicy,
244+
cmdLineArgs.writerBufferSizeInMb()
244245
);
245246
if (cmdLineArgs.reindex() == false && Files.exists(indexPath) == false) {
246247
throw new IllegalArgumentException("Index path does not exist: " + indexPath);

qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexer.java

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.apache.lucene.index.ConcurrentMergeScheduler;
3030
import org.apache.lucene.index.IndexWriter;
3131
import org.apache.lucene.index.IndexWriterConfig;
32+
import org.apache.lucene.index.IndexableField;
3233
import org.apache.lucene.index.MergePolicy;
3334
import org.apache.lucene.index.VectorEncoding;
3435
import org.apache.lucene.index.VectorSimilarityFunction;
@@ -65,7 +66,6 @@
6566
import static org.elasticsearch.test.knn.KnnIndexTester.logger;
6667

6768
class KnnIndexer {
68-
private static final double WRITER_BUFFER_MB = 128;
6969
static final String ID_FIELD = "id";
7070
static final String VECTOR_FIELD = "vector";
7171

@@ -78,6 +78,7 @@ class KnnIndexer {
7878
private final int numDocs;
7979
private final int numIndexThreads;
8080
private final MergePolicy mergePolicy;
81+
private final double writerBufferSizeInMb;
8182

8283
KnnIndexer(
8384
List<Path> docsPath,
@@ -88,7 +89,8 @@ class KnnIndexer {
8889
int dim,
8990
VectorSimilarityFunction similarityFunction,
9091
int numDocs,
91-
MergePolicy mergePolicy
92+
MergePolicy mergePolicy,
93+
double writerBufferSizeInMb
9294
) {
9395
this.docsPath = docsPath;
9496
this.indexPath = indexPath;
@@ -99,12 +101,14 @@ class KnnIndexer {
99101
this.similarityFunction = similarityFunction;
100102
this.numDocs = numDocs;
101103
this.mergePolicy = mergePolicy;
104+
this.writerBufferSizeInMb = writerBufferSizeInMb;
102105
}
103106

104107
void createIndex(KnnIndexTester.Results result) throws IOException, InterruptedException, ExecutionException {
105108
IndexWriterConfig iwc = new IndexWriterConfig().setOpenMode(IndexWriterConfig.OpenMode.CREATE);
106109
iwc.setCodec(codec);
107-
iwc.setRAMBufferSizeMB(WRITER_BUFFER_MB);
110+
iwc.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
111+
iwc.setRAMBufferSizeMB(writerBufferSizeInMb);
108112
iwc.setUseCompoundFile(false);
109113
if (mergePolicy != null) {
110114
iwc.setMergePolicy(mergePolicy);
@@ -248,6 +252,9 @@ static class IndexerThread extends Thread {
248252
private final float[] floatVectorBuffer;
249253
private final VectorReader in;
250254

255+
long readTime;
256+
long docAddTime;
257+
251258
private IndexerThread(
252259
IndexWriter iw,
253260
VectorReader in,
@@ -283,6 +290,7 @@ public void run() {
283290
} catch (IOException ioe) {
284291
throw new UncheckedIOException(ioe);
285292
}
293+
logger.info("Index thread times: [{}] read, [{}] add doc", readTime, docAddTime);
286294
}
287295

288296
private void _run() throws IOException {
@@ -294,23 +302,32 @@ private void _run() throws IOException {
294302
continue;
295303
}
296304

297-
Document doc = new Document();
305+
var startRead = System.nanoTime();
306+
final IndexableField field;
298307
switch (vectorEncoding) {
299308
case BYTE -> {
300309
in.next(byteVectorBuffer);
301-
doc.add(new KnnByteVectorField(VECTOR_FIELD, byteVectorBuffer, fieldType));
310+
field = new KnnByteVectorField(VECTOR_FIELD, byteVectorBuffer, fieldType);
302311
}
303312
case FLOAT32 -> {
304313
in.next(floatVectorBuffer);
305-
doc.add(new KnnFloatVectorField(VECTOR_FIELD, floatVectorBuffer, fieldType));
314+
field = new KnnFloatVectorField(VECTOR_FIELD, floatVectorBuffer, fieldType);
306315
}
316+
default -> throw new UnsupportedOperationException();
307317
}
318+
long endRead = System.nanoTime();
319+
readTime += (endRead - startRead);
320+
321+
Document doc = new Document();
322+
doc.add(field);
308323

309324
if ((id + 1) % 25000 == 0) {
310325
logger.debug("Done indexing " + (id + 1) + " documents.");
311326
}
312327
doc.add(new StoredField(ID_FIELD, id));
313328
iw.addDocument(doc);
329+
330+
docAddTime += (System.nanoTime() - endRead);
314331
}
315332
}
316333
}

0 commit comments

Comments
 (0)