Skip to content

Commit cb5820f

Browse files
docs: improve javadoc
1 parent 1a32600 commit cb5820f

File tree

1 file changed

+142
-72
lines changed

1 file changed

+142
-72
lines changed

benchmarks/src/main/java/org/elasticsearch/benchmark/search/query/range/DateFieldMapperDocValuesSkipperBenchmark.java

Lines changed: 142 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -59,45 +59,45 @@
5959

6060
/**
6161
* Benchmark for measuring query performance with and without doc values skipper in Elasticsearch.
62-
* <p>
63-
* <b>Goal:</b> This benchmark is designed to **mimic and benchmark the execution of a range query in LogsDB**,
64-
* with and without a **sparse doc values index** on the `host.name` and `@timestamp` fields.
65-
* <p>
66-
* <b>Document Structure:</b>
67-
* - `host.name`: A keyword field (sorted, non-stored).
68-
* - `@timestamp`: A numeric field, indexed for range queries and using doc values with or without a doc values sparse index.
69-
* <p>
70-
* <b>Index Sorting:</b>
71-
* The index is sorted primarily by `host.name` (ascending) and secondarily by `@timestamp` (descending).
62+
*
63+
* <p><b>Goal:</b> This benchmark is designed to mimic and benchmark the execution of a range query in LogsDB,
64+
* with and without a sparse doc values index on the {@code host.name} and {@code @timestamp} fields.
65+
*
66+
* <p><b>Document Structure:</b>
67+
* <ul>
68+
* <li>{@code host.name}: A keyword field (sorted, non-stored).</li>
69+
* <li>{@code @timestamp}: A numeric field, indexed for range queries and using doc values with or without a doc values sparse index.</li>
70+
* </ul>
71+
*
72+
* <p><b>Index Sorting:</b>
73+
* The index is sorted primarily by {@code host.name} (ascending) and secondarily by {@code @timestamp} (descending).
7274
* Documents are grouped into batches, where each hostname gets a dedicated batch of timestamps.
73-
* This is meant to simulate collection of logs from a set of hosts in a certain time interval.
74-
* <p>
75-
* <b>Batched Data Behavior:</b>
76-
* - The `host.name` value is generated in batches (e.g., "host-0", "host-1", ...).
77-
* - Each batch contains a fixed number of documents (`batchSize`).
78-
* - The `@timestamp` value resets to `BASE_TIMESTAMP` at the start of each batch.
79-
* - A random **timestamp delta** (0-{@code timestampIncrementMillis} ms) is added to ensure timestamps within each batch have slight
80-
* variation.
81-
* <p>
82-
* <b>Example Output:</b>
83-
* The table below shows a sample of generated documents (with a batch size of 10,000):
75+
* This is meant to simulate collecting logs from a set of hosts over a certain time interval.
76+
*
77+
* <p><b>Batched Data Behavior:</b>
78+
* <ul>
79+
* <li>The {@code host.name} value is generated in batches (e.g., "host-0", "host-1", ...).</li>
80+
* <li>Each batch contains a fixed number of documents ({@code batchSize}).</li>
81+
* <li>The {@code @timestamp} value resets to {@code BASE_TIMESTAMP} at the start of each batch.</li>
82+
* <li>A random timestamp delta (0–{@code deltaTime} ms) is added so that each document in a batch differs slightly.</li>
83+
* </ul>
8484
*
85+
* <p><b>Example Output:</b>
8586
* <pre>
8687
* | Document # | host.name | @timestamp (ms since epoch) |
87-
* |-----------|----------|---------------------------|
88-
* | 1 | host-0 | 1704067200005 |
89-
* | 2 | host-0 | 1704067201053 |
90-
* | 3 | host-0 | 1704067202091 |
91-
* | ... | ... | ... |
92-
* | 10000 | host-0 | 1704077199568 |
93-
* | 10001 | host-1 | 1704067200042 |
94-
* | 10002 | host-1 | 1704067201099 |
95-
* | ... | ... | ... |
88+
* |-----------|-----------|------------------------------|
89+
* | 1 | host-0 | 1704067200005 |
90+
* | 2 | host-0 | 1704067201053 |
91+
* | 3 | host-0 | 1704067202091 |
92+
* | ... | ... | ... |
93+
* | 10000 | host-0 | 1704077199568 |
94+
* | 10001 | host-1 | 1704067200042 |
95+
* | 10002 | host-1 | 1704067201099 |
96+
* | ... | ... | ... |
9697
* </pre>
9798
*
98-
* <p>
99-
* When running the range query we also retrieve just a fraction of the data, to simulate a real-world scenario where a
100-
* dashboard requires only the most recent logs.
99+
* <p>When running the range query, we retrieve only a fraction of the total data,
100+
* simulating a real-world scenario where a dashboard only needs the most recent logs.
101101
*/
102102
@BenchmarkMode(Mode.SampleTime)
103103
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@@ -108,29 +108,39 @@
108108
@Measurement(iterations = 5)
109109
public class DateFieldMapperDocValuesSkipperBenchmark {
110110

111-
public static void main(String[] args) throws RunnerException {
112-
final Options options = new OptionsBuilder().include(DateFieldMapperDocValuesSkipperBenchmark.class.getSimpleName())
113-
.addProfiler(AsyncProfiler.class)
114-
.build();
115-
116-
new Runner(options).run();
117-
}
118-
111+
/**
112+
* Total number of documents to index.
113+
*/
119114
@Param("1343120")
120115
private int nDocs;
121116

117+
/**
118+
* Number of documents per hostname batch.
119+
*/
122120
@Param({ "1340", "121300" })
123121
private int batchSize;
124122

123+
/**
124+
* Maximum random increment (in milliseconds) added to each doc's timestamp.
125+
*/
125126
@Param("1000")
126127
private int deltaTime;
127128

129+
/**
130+
* Fraction of the total time range (derived from {@code batchSize * deltaTime}) that the range query will cover.
131+
*/
128132
@Param({ "0.01", "0.2", "0.8" })
129133
private double queryRange;
130134

135+
/**
136+
* Number of docs to index before forcing a commit, thus creating multiple Lucene segments.
137+
*/
131138
@Param({ "7390", "398470" })
132139
private int commitEvery;
133140

141+
/**
142+
* Seed for random data generation.
143+
*/
134144
@Param("42")
135145
private int seed;
136146

@@ -143,122 +153,182 @@ public static void main(String[] args) throws RunnerException {
143153
private ExecutorService executorService;
144154

145155
/**
146-
* Sets up the benchmark by creating Lucene indexes with and without doc values skipper.
156+
* Main entry point for running this benchmark via JMH.
157+
*
158+
* @param args command line arguments (unused)
159+
* @throws RunnerException if the benchmark fails to run
160+
*/
161+
public static void main(String[] args) throws RunnerException {
162+
final Options options = new OptionsBuilder().include(DateFieldMapperDocValuesSkipperBenchmark.class.getSimpleName())
163+
.addProfiler(AsyncProfiler.class)
164+
.build();
165+
166+
new Runner(options).run();
167+
}
168+
169+
/**
170+
* Sets up the benchmark by creating Lucene indexes (with and without doc values skipper).
171+
* Sets up a single-threaded executor for searching the indexes and avoid concurrent search threads.
147172
*
148-
* @throws IOException if an error occurs during index creation.
173+
* @throws IOException if an error occurs while building the index
149174
*/
150175
@Setup(Level.Trial)
151176
public void setup() throws IOException {
152177
executorService = Executors.newSingleThreadExecutor();
153-
Directory tempDirectoryWithoutDocValuesSkipper = FSDirectory.open(Files.createTempDirectory("temp1-"));
154-
Directory tempDirectoryWithDocValuesSkipper = FSDirectory.open(Files.createTempDirectory("temp2-"));
178+
179+
final Directory tempDirectoryWithoutDocValuesSkipper = FSDirectory.open(Files.createTempDirectory("temp1-"));
180+
final Directory tempDirectoryWithDocValuesSkipper = FSDirectory.open(Files.createTempDirectory("temp2-"));
155181

156182
indexSearcherWithoutDocValuesSkipper = createIndex(tempDirectoryWithoutDocValuesSkipper, false, commitEvery);
157183
indexSearcherWithDocValuesSkipper = createIndex(tempDirectoryWithDocValuesSkipper, true, commitEvery);
158184
}
159185

160186
/**
161-
* Creates an {@link IndexSearcher} from a newly created {@link IndexWriter}. Documents
162-
* are added to the index and committed in batches of a specified size to generate multiple segments.
187+
* Creates an {@link IndexSearcher} after indexing documents in batches.
188+
* Each batch commit forces multiple segments to be created.
163189
*
164-
* @param directory the Lucene {@link Directory} where the index will be written
165-
* @param withDocValuesSkipper indicates whether certain fields should skip doc values
166-
* @param commitEvery the number of documents after which to force a commit
167-
* @return an {@link IndexSearcher} that can be used to query the newly created index
168-
* @throws IOException if an I/O error occurs during index writing or reading
190+
* @param directory the Lucene {@link Directory} for writing the index
191+
* @param withDocValuesSkipper true if we should enable doc values skipper on certain fields
192+
* @param commitEvery number of documents after which to commit (and thus segment)
193+
* @return an {@link IndexSearcher} for querying the newly built index
194+
* @throws IOException if an I/O error occurs during index writing
169195
*/
170196
private IndexSearcher createIndex(final Directory directory, final boolean withDocValuesSkipper, final int commitEvery)
171197
throws IOException {
198+
172199
final IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());
200+
// NOTE: index sort config matching LogsDB's sort order
173201
config.setIndexSort(
174202
new Sort(
175-
new SortField(HOSTNAME_FIELD, SortField.Type.STRING, false), // NOTE: `host.name` ascending
176-
new SortedNumericSortField(TIMESTAMP_FIELD, SortField.Type.LONG, true) // NOTE: `@timestamp` descending
203+
new SortField(HOSTNAME_FIELD, SortField.Type.STRING, false),
204+
new SortedNumericSortField(TIMESTAMP_FIELD, SortField.Type.LONG, true)
177205
)
178206
);
179207

180208
final Random random = new Random(seed);
209+
181210
try (IndexWriter indexWriter = new IndexWriter(directory, config)) {
182211
int docCountSinceLastCommit = 0;
212+
183213
for (int i = 0; i < nDocs; i++) {
184214
final Document doc = new Document();
185215
addFieldsToDocument(doc, i, withDocValuesSkipper, random);
186216
indexWriter.addDocument(doc);
187217
docCountSinceLastCommit++;
188218

189-
// NOTE: make sure we have multiple Lucene segments
219+
// Force commit periodically to create multiple Lucene segments
190220
if (docCountSinceLastCommit >= commitEvery) {
191221
indexWriter.commit();
192222
docCountSinceLastCommit = 0;
193223
}
194224
}
195225

196226
indexWriter.commit();
197-
final DirectoryReader reader = DirectoryReader.open(indexWriter);
198-
// NOTE: internally Elasticsearch runs multiple search threads concurrently, (at least) one per Lucene segment.
199-
// Here we simplify the benchmark making sure we have a single-threaded search execution using a single thread
200-
// executor Service.
227+
228+
// Open a reader and create a searcher on top of it using a single thread executor.
229+
DirectoryReader reader = DirectoryReader.open(indexWriter);
201230
return new IndexSearcher(reader, executorService);
202231
}
203232
}
204233

234+
/**
235+
* Populates the given {@link Document} with fields, optionally using doc values skipper.
236+
*
237+
* @param doc the Lucene document to fill
238+
* @param docIndex index of the document being added
239+
* @param withDocValuesSkipper true if doc values skipper is enabled
240+
* @param random seeded {@link Random} for data variation
241+
*/
205242
private void addFieldsToDocument(final Document doc, int docIndex, boolean withDocValuesSkipper, final Random random) {
243+
206244
final int batchIndex = docIndex / batchSize;
207245
final String hostName = "host-" + batchIndex;
208-
final long timestampDelta = random.nextInt(0, deltaTime);
209-
final long timestamp = BASE_TIMESTAMP + ((docIndex % batchSize) * deltaTime) + timestampDelta;
246+
247+
// Slightly vary the timestamp in each document
248+
final long timestamp = BASE_TIMESTAMP + ((docIndex % batchSize) * deltaTime) + random.nextInt(0, deltaTime);
210249

211250
if (withDocValuesSkipper) {
212-
doc.add(SortedNumericDocValuesField.indexedField(TIMESTAMP_FIELD, timestamp)); // NOTE: doc values skipper on `@timestamp`
213-
doc.add(SortedDocValuesField.indexedField(HOSTNAME_FIELD, new BytesRef(hostName))); // NOTE: doc values skipper on `host.name`
251+
// Sparse doc values index on `@timestamp` and `host.name`
252+
doc.add(SortedNumericDocValuesField.indexedField(TIMESTAMP_FIELD, timestamp));
253+
doc.add(SortedDocValuesField.indexedField(HOSTNAME_FIELD, new BytesRef(hostName)));
214254
} else {
255+
// Standard doc values, points and inverted index
215256
doc.add(new StringField(HOSTNAME_FIELD, hostName, Field.Store.NO));
216-
doc.add(new SortedDocValuesField(HOSTNAME_FIELD, new BytesRef(hostName))); // NOTE: doc values without the doc values skipper on
217-
// `host.name`
218-
doc.add(new LongPoint(TIMESTAMP_FIELD, timestamp)); // KDB tree on `@timestamp`
219-
doc.add(new SortedNumericDocValuesField(TIMESTAMP_FIELD, timestamp)); // NOTE: doc values without the doc values skipper on
220-
// `@timestamp`
257+
doc.add(new SortedDocValuesField(HOSTNAME_FIELD, new BytesRef(hostName)));
258+
doc.add(new LongPoint(TIMESTAMP_FIELD, timestamp));
259+
doc.add(new SortedNumericDocValuesField(TIMESTAMP_FIELD, timestamp));
221260
}
222261
}
223262

224263
/**
225-
* Computes a dynamic timestamp upper bound based on the batch size,
226-
* timestamp increment, and user-specified fraction.
264+
* Calculates the upper bound for the timestamp range query based on {@code batchSize},
265+
* {@code deltaTime}, and {@code queryRange}.
227266
*
228-
* @return The computed upper bound for the timestamp range query.
267+
* @return the computed upper bound for the timestamp range query
229268
*/
230269
private long rangeEndTimestamp() {
231-
return BASE_TIMESTAMP + ((long) (batchSize * deltaTime * queryRange));
270+
return BASE_TIMESTAMP + (long) (batchSize * deltaTime * queryRange);
232271
}
233272

273+
/**
274+
* Executes a range query without doc values skipper.
275+
*
276+
* @param bh the blackhole consuming the query result
277+
* @throws IOException if a search error occurs
278+
*/
234279
@Benchmark
235280
public void rangeQueryWithoutDocValuesSkipper(final Blackhole bh) throws IOException {
236281
bh.consume(rangeQuery(indexSearcherWithoutDocValuesSkipper, BASE_TIMESTAMP, rangeEndTimestamp(), true));
237282
}
238283

284+
/**
285+
* Executes a range query with doc values skipper enabled.
286+
*
287+
* @param bh the blackhole consuming the query result
288+
* @throws IOException if a search error occurs
289+
*/
239290
@Benchmark
240291
public void rangeQueryWithDocValuesSkipper(final Blackhole bh) throws IOException {
241292
bh.consume(rangeQuery(indexSearcherWithDocValuesSkipper, BASE_TIMESTAMP, rangeEndTimestamp(), false));
242293
}
243294

295+
/**
296+
* Runs the actual Lucene range query, optionally combining a {@link LongPoint} index query
297+
* with doc values ({@link SortedNumericDocValuesField}) via {@link IndexOrDocValuesQuery},
298+
* and then wrapping it with an {@link IndexSortSortedNumericDocValuesRangeQuery} to utilize the index sort.
299+
*
300+
* @param searcher the Lucene {@link IndexSearcher}
301+
* @param rangeStartTimestamp lower bound of the timestamp range
302+
* @param rangeEndTimestamp upper bound of the timestamp range
303+
* @param isIndexed true if we should combine indexed and doc value queries
304+
* @return the total number of matching documents
305+
* @throws IOException if a search error occurs
306+
*/
244307
private long rangeQuery(final IndexSearcher searcher, long rangeStartTimestamp, long rangeEndTimestamp, boolean isIndexed)
245308
throws IOException {
309+
246310
assert rangeEndTimestamp > rangeStartTimestamp;
311+
247312
final Query rangeQuery = isIndexed
248313
? new IndexOrDocValuesQuery(
249314
LongPoint.newRangeQuery(TIMESTAMP_FIELD, rangeStartTimestamp, rangeEndTimestamp),
250315
SortedNumericDocValuesField.newSlowRangeQuery(TIMESTAMP_FIELD, rangeStartTimestamp, rangeEndTimestamp)
251316
)
252317
: SortedNumericDocValuesField.newSlowRangeQuery(TIMESTAMP_FIELD, rangeStartTimestamp, rangeEndTimestamp);
318+
253319
final Query query = new IndexSortSortedNumericDocValuesRangeQuery(
254320
TIMESTAMP_FIELD,
255321
rangeStartTimestamp,
256322
rangeEndTimestamp,
257323
rangeQuery
258324
);
325+
259326
return searcher.count(query);
260327
}
261328

329+
/**
330+
* Shuts down the executor service after the trial completes.
331+
*/
262332
@TearDown(Level.Trial)
263333
public void tearDown() {
264334
if (executorService != null) {

0 commit comments

Comments
 (0)