|
9 | 9 |
|
10 | 10 | package org.elasticsearch.test.knn; |
11 | 11 |
|
| 12 | +import org.apache.lucene.index.IndexWriterConfig; |
12 | 13 | import org.apache.lucene.index.VectorEncoding; |
13 | 14 | import org.apache.lucene.index.VectorSimilarityFunction; |
14 | 15 | import org.elasticsearch.common.Strings; |
15 | 16 | import org.elasticsearch.core.PathUtils; |
| 17 | +import org.elasticsearch.monitor.jvm.JvmInfo; |
16 | 18 | import org.elasticsearch.xcontent.ObjectParser; |
17 | 19 | import org.elasticsearch.xcontent.ParseField; |
18 | 20 | import org.elasticsearch.xcontent.ToXContentObject; |
@@ -54,7 +56,8 @@ record CmdLineArgs( |
54 | 56 | int dimensions, |
55 | 57 | boolean earlyTermination, |
56 | 58 | KnnIndexTester.MergePolicyType mergePolicy, |
57 | | - double writerBufferSizeInMb |
| 59 | + double writerBufferSizeInMb, |
| 60 | + int writerMaxBufferedDocs |
58 | 61 | ) implements ToXContentObject { |
59 | 62 |
|
60 | 63 | static final ParseField DOC_VECTORS_FIELD = new ParseField("doc_vectors"); |
@@ -83,9 +86,15 @@ record CmdLineArgs( |
83 | 86 | static final ParseField FILTER_SELECTIVITY_FIELD = new ParseField("filter_selectivity"); |
84 | 87 | static final ParseField SEED_FIELD = new ParseField("seed"); |
85 | 88 | static final ParseField MERGE_POLICY_FIELD = new ParseField("merge_policy"); |
86 | | - static final ParseField WRITER_BUFFER_FIELD = new ParseField("writer_buffer_mb"); |
| 89 | + static final ParseField WRITER_BUFFER_MB_FIELD = new ParseField("writer_buffer_mb"); |
| 90 | + static final ParseField WRITER_BUFFER_DOCS_FIELD = new ParseField("writer_buffer_docs"); |
87 | 91 |
|
88 | | - static final double DEFAULT_WRITER_BUFFER_MB = 128; |
| 92 | + /** By default, in ES the default writer buffer size is 10% of the heap space |
| 93 | + * (see {@code IndexingMemoryController.INDEX_BUFFER_SIZE_SETTING}). |
| 94 | + * We configure the Java heap size for this tool in {@code build.gradle}; currently we default to 16GB, so in that case |
| 95 | + * the buffer size would be 1.6GB. |
| 96 | + */ |
| 97 | + static final double DEFAULT_WRITER_BUFFER_MB = (JvmInfo.jvmInfo().getMem().getHeapMax().getBytes() / (1024.0 * 1024.0)) * 0.1; |
89 | 98 |
|
90 | 99 | static CmdLineArgs fromXContent(XContentParser parser) throws IOException { |
91 | 100 | Builder builder = PARSER.apply(parser, null); |
@@ -121,7 +130,8 @@ static CmdLineArgs fromXContent(XContentParser parser) throws IOException { |
121 | 130 | PARSER.declareFloat(Builder::setFilterSelectivity, FILTER_SELECTIVITY_FIELD); |
122 | 131 | PARSER.declareLong(Builder::setSeed, SEED_FIELD); |
123 | 132 | PARSER.declareString(Builder::setMergePolicy, MERGE_POLICY_FIELD); |
124 | | - PARSER.declareDouble(Builder::setWriterBufferMb, WRITER_BUFFER_FIELD); |
| 133 | + PARSER.declareDouble(Builder::setWriterBufferMb, WRITER_BUFFER_MB_FIELD); |
| 134 | + PARSER.declareInt(Builder::setWriterMaxBufferedDocs, WRITER_BUFFER_DOCS_FIELD); |
125 | 135 | } |
126 | 136 |
|
127 | 137 | @Override |
@@ -157,6 +167,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws |
157 | 167 | builder.field(EARLY_TERMINATION_FIELD.getPreferredName(), earlyTermination); |
158 | 168 | builder.field(FILTER_SELECTIVITY_FIELD.getPreferredName(), filterSelectivity); |
159 | 169 | builder.field(SEED_FIELD.getPreferredName(), seed); |
| 170 | + builder.field(WRITER_BUFFER_MB_FIELD.getPreferredName(), writerBufferSizeInMb); |
| 171 | + builder.field(WRITER_BUFFER_DOCS_FIELD.getPreferredName(), writerMaxBufferedDocs); |
160 | 172 | return builder.endObject(); |
161 | 173 | } |
162 | 174 |
|
@@ -193,6 +205,12 @@ static class Builder { |
193 | 205 | private KnnIndexTester.MergePolicyType mergePolicy = null; |
194 | 206 | private double writerBufferSizeInMb = DEFAULT_WRITER_BUFFER_MB; |
195 | 207 |
|
| 208 | + /** |
| 209 | + * Elasticsearch does not set this explicitly, and in Lucene this setting is |
| 210 | + * disabled by default (writer flushes by RAM usage). |
| 211 | + */ |
| 212 | + private int writerMaxBufferedDocs = IndexWriterConfig.DISABLE_AUTO_FLUSH; |
| 213 | + |
196 | 214 | public Builder setDocVectors(List<String> docVectors) { |
197 | 215 | if (docVectors == null || docVectors.isEmpty()) { |
198 | 216 | throw new IllegalArgumentException("Document vectors path must be provided"); |
@@ -327,6 +345,11 @@ public Builder setWriterBufferMb(double writerBufferSizeInMb) { |
327 | 345 | return this; |
328 | 346 | } |
329 | 347 |
|
| 348 | + public Builder setWriterMaxBufferedDocs(int writerMaxBufferedDocs) { |
| 349 | + this.writerMaxBufferedDocs = writerMaxBufferedDocs; |
| 350 | + return this; |
| 351 | + } |
| 352 | + |
330 | 353 | public CmdLineArgs build() { |
331 | 354 | if (docVectors == null) { |
332 | 355 | throw new IllegalArgumentException("Document vectors path must be provided"); |
@@ -362,7 +385,8 @@ public CmdLineArgs build() { |
362 | 385 | dimensions, |
363 | 386 | earlyTermination, |
364 | 387 | mergePolicy, |
365 | | - writerBufferSizeInMb |
| 388 | + writerBufferSizeInMb, |
| 389 | + writerMaxBufferedDocs |
366 | 390 | ); |
367 | 391 | } |
368 | 392 | } |
|
0 commit comments