Skip to content

Commit 69113dc

Browse files
feature: benchmark date field with doc values sparse index
1 parent 5de46d1 commit 69113dc

File tree

1 file changed

+255
-0
lines changed

1 file changed

+255
-0
lines changed
Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.benchmark.search.query.range;
11+
12+
import org.apache.lucene.analysis.standard.StandardAnalyzer;
13+
import org.apache.lucene.document.Document;
14+
import org.apache.lucene.document.Field;
15+
import org.apache.lucene.document.LongPoint;
16+
import org.apache.lucene.document.SortedDocValuesField;
17+
import org.apache.lucene.document.SortedNumericDocValuesField;
18+
import org.apache.lucene.document.StringField;
19+
import org.apache.lucene.index.DirectoryReader;
20+
import org.apache.lucene.index.IndexWriter;
21+
import org.apache.lucene.index.IndexWriterConfig;
22+
import org.apache.lucene.search.IndexOrDocValuesQuery;
23+
import org.apache.lucene.search.IndexSearcher;
24+
import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery;
25+
import org.apache.lucene.search.Query;
26+
import org.apache.lucene.search.ScoreMode;
27+
import org.apache.lucene.search.Scorer;
28+
import org.apache.lucene.search.Sort;
29+
import org.apache.lucene.search.SortField;
30+
import org.apache.lucene.search.SortedNumericSortField;
31+
import org.apache.lucene.search.Weight;
32+
import org.apache.lucene.store.Directory;
33+
import org.apache.lucene.store.FSDirectory;
34+
import org.apache.lucene.util.BytesRef;
35+
import org.openjdk.jmh.annotations.Benchmark;
36+
import org.openjdk.jmh.annotations.BenchmarkMode;
37+
import org.openjdk.jmh.annotations.Fork;
38+
import org.openjdk.jmh.annotations.Level;
39+
import org.openjdk.jmh.annotations.Measurement;
40+
import org.openjdk.jmh.annotations.Mode;
41+
import org.openjdk.jmh.annotations.OutputTimeUnit;
42+
import org.openjdk.jmh.annotations.Param;
43+
import org.openjdk.jmh.annotations.Scope;
44+
import org.openjdk.jmh.annotations.Setup;
45+
import org.openjdk.jmh.annotations.State;
46+
import org.openjdk.jmh.annotations.TearDown;
47+
import org.openjdk.jmh.annotations.Warmup;
48+
import org.openjdk.jmh.infra.Blackhole;
49+
import org.openjdk.jmh.profile.AsyncProfiler;
50+
import org.openjdk.jmh.runner.Runner;
51+
import org.openjdk.jmh.runner.RunnerException;
52+
import org.openjdk.jmh.runner.options.Options;
53+
import org.openjdk.jmh.runner.options.OptionsBuilder;
54+
55+
import java.io.IOException;
56+
import java.nio.file.Files;
57+
import java.util.Random;
58+
import java.util.concurrent.TimeUnit;
59+
60+
/**
61+
* Benchmark for measuring query performance with and without doc values skipper in Elasticsearch.
62+
* <p>
63+
* <b>Goal:</b> This benchmark is designed to **mimic and benchmark the execution of a range query in LogsDB**,
64+
* with and without a **sparse doc values index** on the `host.name` and `@timestamp` fields.
65+
* <p>
66+
* <b>Document Structure:</b>
67+
* - `host.name`: A keyword field (sorted, non-stored).
68+
* - `@timestamp`: A numeric field, indexed for range queries and using doc values with or without a doc values sparse index.
69+
* <p>
70+
* <b>Index Sorting:</b>
71+
* The index is sorted primarily by `host.name` (ascending) and secondarily by `@timestamp` (descending).
72+
* Documents are grouped into batches, where each hostname gets a dedicated batch of timestamps.
73+
* This is meant to simulate collection of logs from a set of hosts in a certain time interval.
74+
* <p>
75+
* <b>Batched Data Behavior:</b>
76+
* - The `host.name` value is generated in batches (e.g., "host-0", "host-1", ...).
77+
* - Each batch contains a fixed number of documents (`batchSize`).
78+
* - The `@timestamp` value resets to `BASE_TIMESTAMP` at the start of each batch.
79+
* - A random **timestamp delta** (0-{@code timestampIncrementMillis} ms) is added to ensure timestamps within each batch have slight
80+
* variation.
81+
* <p>
82+
* <b>Example Output:</b>
83+
* The table below shows a sample of generated documents (with a batch size of 10,000):
84+
*
85+
* <pre>
86+
* | Document # | host.name | @timestamp (ms since epoch) |
87+
* |-----------|----------|---------------------------|
88+
* | 1 | host-0 | 1704067200005 |
89+
* | 2 | host-0 | 1704067201053 |
90+
* | 3 | host-0 | 1704067202091 |
91+
* | ... | ... | ... |
92+
* | 10000 | host-0 | 1704077199568 |
93+
* | 10001 | host-1 | 1704067200042 |
94+
* | 10002 | host-1 | 1704067201099 |
95+
* | ... | ... | ... |
96+
* </pre>
97+
*
98+
* <p>
99+
* When running the range query we also retrieve just a fraction of the data, to simulate a real-world scenario where a
100+
* dashboard requires only the most recent logs.
101+
*/
102+
@BenchmarkMode(Mode.SampleTime)
103+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
104+
@State(Scope.Thread)
105+
@Fork(0)
106+
@Warmup(iterations = 10)
107+
@Measurement(iterations = 10)
108+
public class DateFieldMapperDocValuesSkipperBenchmark {
109+
110+
public static void main(String[] args) throws RunnerException {
111+
final Options options = new OptionsBuilder().include(DateFieldMapperDocValuesSkipperBenchmark.class.getSimpleName())
112+
.addProfiler(AsyncProfiler.class)
113+
.build();
114+
115+
new Runner(options).run();
116+
}
117+
118+
@Param("1000000")
119+
private int numberOfDocuments;
120+
121+
@Param("10000")
122+
private int batchSize;
123+
124+
@Param("1000")
125+
private int timestampIncrementMillis;
126+
127+
@Param({ "0.2" })
128+
private double timestampRangeFraction;
129+
130+
@Param("42")
131+
private int seed;
132+
133+
private static final String TIMESTAMP_FIELD = "@timestamp";
134+
private static final String HOSTNAME_FIELD = "host.name";
135+
private static final long BASE_TIMESTAMP = 1704067200000L;
136+
137+
private static final Sort QUERY_SORT = new Sort(new SortedNumericSortField(TIMESTAMP_FIELD, SortField.Type.LONG, true));
138+
139+
private Directory tempDirectoryWithoutDocValuesSkipper;
140+
private Directory tempDirectoryWithDocValuesSkipper;
141+
private IndexSearcher indexSearcherWithoutDocValuesSkipper;
142+
private IndexSearcher indexSearcherWithDocValuesSkipper;
143+
144+
/**
145+
* Sets up the benchmark by creating Lucene indexes with and without doc values skipper.
146+
*
147+
* @throws IOException if an error occurs during index creation.
148+
*/
149+
@Setup(Level.Trial)
150+
public void setup() throws IOException {
151+
tempDirectoryWithoutDocValuesSkipper = FSDirectory.open(Files.createTempDirectory("temp1-"));
152+
tempDirectoryWithDocValuesSkipper = FSDirectory.open(Files.createTempDirectory("temp2-"));
153+
154+
indexSearcherWithoutDocValuesSkipper = createIndex(tempDirectoryWithoutDocValuesSkipper, false);
155+
indexSearcherWithDocValuesSkipper = createIndex(tempDirectoryWithDocValuesSkipper, true);
156+
}
157+
158+
/**
159+
* Creates an index with a specified sorting order and document structure.
160+
*
161+
* @param directory The Lucene directory to store the index.
162+
* @param withDocValuesSkipper Whether to use a sparse doc values index.
163+
* @return An IndexSearcher instance for querying the created index.
164+
* @throws IOException if an error occurs during index writing.
165+
*/
166+
private IndexSearcher createIndex(final Directory directory, boolean withDocValuesSkipper) throws IOException {
167+
final IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());
168+
config.setIndexSort(
169+
new Sort(
170+
new SortField(HOSTNAME_FIELD, SortField.Type.STRING, false), // NOTE: `host.name` ascending
171+
new SortedNumericSortField(TIMESTAMP_FIELD, SortField.Type.LONG, true) // NOTE: `@timestamp` descending
172+
)
173+
);
174+
175+
final IndexWriter indexWriter = new IndexWriter(directory, config);
176+
final Random random = new Random(seed);
177+
178+
for (int i = 0; i < numberOfDocuments; i++) {
179+
final Document doc = new Document();
180+
addFieldsToDocument(doc, i, withDocValuesSkipper, random);
181+
indexWriter.addDocument(doc);
182+
}
183+
184+
indexWriter.commit();
185+
final DirectoryReader reader = DirectoryReader.open(indexWriter);
186+
indexWriter.close();
187+
return new IndexSearcher(reader);
188+
}
189+
190+
private void addFieldsToDocument(final Document doc, int docIndex, boolean withDocValuesSkipper, final Random random) {
191+
final int batchIndex = docIndex / batchSize;
192+
final String hostName = "host-" + batchIndex;
193+
final long timestampDelta = random.nextInt(0, timestampIncrementMillis);
194+
final long timestamp = BASE_TIMESTAMP + ((docIndex % batchSize) * timestampIncrementMillis) + timestampDelta;
195+
196+
if (withDocValuesSkipper) {
197+
doc.add(SortedNumericDocValuesField.indexedField(TIMESTAMP_FIELD, timestamp)); // NOTE: doc values skipper on `@timestamp`
198+
doc.add(SortedDocValuesField.indexedField(HOSTNAME_FIELD, new BytesRef(hostName))); // NOTE: doc values skipper on `host.name`
199+
} else {
200+
doc.add(new LongPoint(TIMESTAMP_FIELD, timestamp)); // BKD tree on `@timestamp`
201+
doc.add(new SortedNumericDocValuesField(TIMESTAMP_FIELD, timestamp)); // NOTE: doc values without the doc values skipper on
202+
// `@timestamp`
203+
doc.add(new SortedDocValuesField(HOSTNAME_FIELD, new BytesRef(hostName))); // NOTE: doc values without the doc values skipper on
204+
// `host.name`
205+
}
206+
207+
doc.add(new StringField(HOSTNAME_FIELD, hostName, Field.Store.NO));
208+
}
209+
210+
/**
211+
* Computes a dynamic timestamp upper bound based on the batch size,
212+
* timestamp increment, and user-specified fraction.
213+
*
214+
* @return The computed upper bound for the timestamp range query.
215+
*/
216+
private long rangeEndTimestamp() {
217+
return BASE_TIMESTAMP + ((long) (batchSize * timestampIncrementMillis * timestampRangeFraction));
218+
}
219+
220+
@Benchmark
221+
public void rangeQueryWithoutDocValuesSkipper(final Blackhole bh) throws IOException {
222+
bh.consume(rangeQuery(indexSearcherWithoutDocValuesSkipper, BASE_TIMESTAMP, rangeEndTimestamp(), true));
223+
}
224+
225+
@Benchmark
226+
public void rangeQueryWithDocValuesSkipper(final Blackhole bh) throws IOException {
227+
bh.consume(rangeQuery(indexSearcherWithDocValuesSkipper, BASE_TIMESTAMP, rangeEndTimestamp(), false));
228+
}
229+
230+
private long rangeQuery(final IndexSearcher searcher, long rangeStartTimestamp, long rangeEndTimestamp, boolean isIndexed)
231+
throws IOException {
232+
assert rangeEndTimestamp > rangeStartTimestamp;
233+
final Query rangeQuery = isIndexed
234+
? new IndexOrDocValuesQuery(
235+
LongPoint.newRangeQuery(TIMESTAMP_FIELD, rangeStartTimestamp, rangeEndTimestamp),
236+
SortedNumericDocValuesField.newSlowRangeQuery(TIMESTAMP_FIELD, rangeStartTimestamp, rangeEndTimestamp)
237+
)
238+
: SortedNumericDocValuesField.newSlowRangeQuery(TIMESTAMP_FIELD, rangeStartTimestamp, rangeEndTimestamp);
239+
final Query query = new IndexSortSortedNumericDocValuesRangeQuery(
240+
TIMESTAMP_FIELD,
241+
rangeStartTimestamp,
242+
rangeEndTimestamp,
243+
rangeQuery
244+
);
245+
return searcher.search(query, numberOfDocuments, QUERY_SORT).totalHits.value();
246+
}
247+
248+
@TearDown(Level.Trial)
249+
public void tearDown() throws IOException {
250+
indexSearcherWithoutDocValuesSkipper.getIndexReader().close();
251+
indexSearcherWithDocValuesSkipper.getIndexReader().close();
252+
tempDirectoryWithoutDocValuesSkipper.close();
253+
tempDirectoryWithDocValuesSkipper.close();
254+
}
255+
}

0 commit comments

Comments
 (0)