|
| 1 | +/* |
| 2 | + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one |
| 3 | + * or more contributor license agreements. Licensed under the "Elastic License |
| 4 | + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side |
| 5 | + * Public License v 1"; you may not use this file except in compliance with, at |
| 6 | + * your election, the "Elastic License 2.0", the "GNU Affero General Public |
| 7 | + * License v3.0 only", or the "Server Side Public License, v 1". |
| 8 | + */ |
| 9 | + |
| 10 | +package org.elasticsearch.benchmark.search.query.range; |
| 11 | + |
| 12 | +import org.apache.lucene.analysis.standard.StandardAnalyzer; |
| 13 | +import org.apache.lucene.document.Document; |
| 14 | +import org.apache.lucene.document.Field; |
| 15 | +import org.apache.lucene.document.LongPoint; |
| 16 | +import org.apache.lucene.document.SortedDocValuesField; |
| 17 | +import org.apache.lucene.document.SortedNumericDocValuesField; |
| 18 | +import org.apache.lucene.document.StringField; |
| 19 | +import org.apache.lucene.index.DirectoryReader; |
| 20 | +import org.apache.lucene.index.IndexWriter; |
| 21 | +import org.apache.lucene.index.IndexWriterConfig; |
| 22 | +import org.apache.lucene.search.IndexOrDocValuesQuery; |
| 23 | +import org.apache.lucene.search.IndexSearcher; |
| 24 | +import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; |
| 25 | +import org.apache.lucene.search.Query; |
| 26 | +import org.apache.lucene.search.ScoreMode; |
| 27 | +import org.apache.lucene.search.Scorer; |
| 28 | +import org.apache.lucene.search.Sort; |
| 29 | +import org.apache.lucene.search.SortField; |
| 30 | +import org.apache.lucene.search.SortedNumericSortField; |
| 31 | +import org.apache.lucene.search.Weight; |
| 32 | +import org.apache.lucene.store.Directory; |
| 33 | +import org.apache.lucene.store.FSDirectory; |
| 34 | +import org.apache.lucene.util.BytesRef; |
| 35 | +import org.openjdk.jmh.annotations.Benchmark; |
| 36 | +import org.openjdk.jmh.annotations.BenchmarkMode; |
| 37 | +import org.openjdk.jmh.annotations.Fork; |
| 38 | +import org.openjdk.jmh.annotations.Level; |
| 39 | +import org.openjdk.jmh.annotations.Measurement; |
| 40 | +import org.openjdk.jmh.annotations.Mode; |
| 41 | +import org.openjdk.jmh.annotations.OutputTimeUnit; |
| 42 | +import org.openjdk.jmh.annotations.Param; |
| 43 | +import org.openjdk.jmh.annotations.Scope; |
| 44 | +import org.openjdk.jmh.annotations.Setup; |
| 45 | +import org.openjdk.jmh.annotations.State; |
| 46 | +import org.openjdk.jmh.annotations.TearDown; |
| 47 | +import org.openjdk.jmh.annotations.Warmup; |
| 48 | +import org.openjdk.jmh.infra.Blackhole; |
| 49 | +import org.openjdk.jmh.profile.AsyncProfiler; |
| 50 | +import org.openjdk.jmh.runner.Runner; |
| 51 | +import org.openjdk.jmh.runner.RunnerException; |
| 52 | +import org.openjdk.jmh.runner.options.Options; |
| 53 | +import org.openjdk.jmh.runner.options.OptionsBuilder; |
| 54 | + |
| 55 | +import java.io.IOException; |
| 56 | +import java.nio.file.Files; |
| 57 | +import java.util.Random; |
| 58 | +import java.util.concurrent.TimeUnit; |
| 59 | + |
| 60 | +/** |
| 61 | + * Benchmark for measuring query performance with and without doc values skipper in Elasticsearch. |
| 62 | + * <p> |
| 63 | + * <b>Goal:</b> This benchmark is designed to **mimic and benchmark the execution of a range query in LogsDB**, |
| 64 | + * with and without a **sparse doc values index** on the `host.name` and `@timestamp` fields. |
| 65 | + * <p> |
| 66 | + * <b>Document Structure:</b> |
| 67 | + * - `host.name`: A keyword field (sorted, non-stored). |
| 68 | + * - `@timestamp`: A numeric field, indexed for range queries and using doc values with or without a doc values sparse index. |
| 69 | + * <p> |
| 70 | + * <b>Index Sorting:</b> |
| 71 | + * The index is sorted primarily by `host.name` (ascending) and secondarily by `@timestamp` (descending). |
| 72 | + * Documents are grouped into batches, where each hostname gets a dedicated batch of timestamps. |
| 73 | + * This is meant to simulate collection of logs from a set of hosts in a certain time interval. |
| 74 | + * <p> |
| 75 | + * <b>Batched Data Behavior:</b> |
| 76 | + * - The `host.name` value is generated in batches (e.g., "host-0", "host-1", ...). |
| 77 | + * - Each batch contains a fixed number of documents (`batchSize`). |
| 78 | + * - The `@timestamp` value resets to `BASE_TIMESTAMP` at the start of each batch. |
| 79 | + * - A random **timestamp delta** (0-{@code timestampIncrementMillis} ms) is added to ensure timestamps within each batch have slight |
| 80 | + * variation. |
| 81 | + * <p> |
| 82 | + * <b>Example Output:</b> |
| 83 | + * The table below shows a sample of generated documents (with a batch size of 10,000): |
| 84 | + * |
| 85 | + * <pre> |
| 86 | + * | Document # | host.name | @timestamp (ms since epoch) | |
| 87 | + * |-----------|----------|---------------------------| |
| 88 | + * | 1 | host-0 | 1704067200005 | |
| 89 | + * | 2 | host-0 | 1704067201053 | |
| 90 | + * | 3 | host-0 | 1704067202091 | |
| 91 | + * | ... | ... | ... | |
| 92 | + * | 10000 | host-0 | 1704077199568 | |
| 93 | + * | 10001 | host-1 | 1704067200042 | |
| 94 | + * | 10002 | host-1 | 1704067201099 | |
| 95 | + * | ... | ... | ... | |
| 96 | + * </pre> |
| 97 | + * |
| 98 | + * <p> |
| 99 | + * When running the range query we also retrieve just a fraction of the data, to simulate a real-world scenario where a |
| 100 | + * dashboard requires only the most recent logs. |
| 101 | + */ |
| 102 | +@BenchmarkMode(Mode.SampleTime) |
| 103 | +@OutputTimeUnit(TimeUnit.MILLISECONDS) |
| 104 | +@State(Scope.Thread) |
| 105 | +@Fork(0) |
| 106 | +@Warmup(iterations = 10) |
| 107 | +@Measurement(iterations = 10) |
| 108 | +public class DateFieldMapperDocValuesSkipperBenchmark { |
| 109 | + |
| 110 | + public static void main(String[] args) throws RunnerException { |
| 111 | + final Options options = new OptionsBuilder().include(DateFieldMapperDocValuesSkipperBenchmark.class.getSimpleName()) |
| 112 | + .addProfiler(AsyncProfiler.class) |
| 113 | + .build(); |
| 114 | + |
| 115 | + new Runner(options).run(); |
| 116 | + } |
| 117 | + |
| 118 | + @Param("1000000") |
| 119 | + private int numberOfDocuments; |
| 120 | + |
| 121 | + @Param("10000") |
| 122 | + private int batchSize; |
| 123 | + |
| 124 | + @Param("1000") |
| 125 | + private int timestampIncrementMillis; |
| 126 | + |
| 127 | + @Param({ "0.2" }) |
| 128 | + private double timestampRangeFraction; |
| 129 | + |
| 130 | + @Param("42") |
| 131 | + private int seed; |
| 132 | + |
| 133 | + private static final String TIMESTAMP_FIELD = "@timestamp"; |
| 134 | + private static final String HOSTNAME_FIELD = "host.name"; |
| 135 | + private static final long BASE_TIMESTAMP = 1704067200000L; |
| 136 | + |
| 137 | + private static final Sort QUERY_SORT = new Sort(new SortedNumericSortField(TIMESTAMP_FIELD, SortField.Type.LONG, true)); |
| 138 | + |
| 139 | + private Directory tempDirectoryWithoutDocValuesSkipper; |
| 140 | + private Directory tempDirectoryWithDocValuesSkipper; |
| 141 | + private IndexSearcher indexSearcherWithoutDocValuesSkipper; |
| 142 | + private IndexSearcher indexSearcherWithDocValuesSkipper; |
| 143 | + |
| 144 | + /** |
| 145 | + * Sets up the benchmark by creating Lucene indexes with and without doc values skipper. |
| 146 | + * |
| 147 | + * @throws IOException if an error occurs during index creation. |
| 148 | + */ |
| 149 | + @Setup(Level.Trial) |
| 150 | + public void setup() throws IOException { |
| 151 | + tempDirectoryWithoutDocValuesSkipper = FSDirectory.open(Files.createTempDirectory("temp1-")); |
| 152 | + tempDirectoryWithDocValuesSkipper = FSDirectory.open(Files.createTempDirectory("temp2-")); |
| 153 | + |
| 154 | + indexSearcherWithoutDocValuesSkipper = createIndex(tempDirectoryWithoutDocValuesSkipper, false); |
| 155 | + indexSearcherWithDocValuesSkipper = createIndex(tempDirectoryWithDocValuesSkipper, true); |
| 156 | + } |
| 157 | + |
| 158 | + /** |
| 159 | + * Creates an index with a specified sorting order and document structure. |
| 160 | + * |
| 161 | + * @param directory The Lucene directory to store the index. |
| 162 | + * @param withDocValuesSkipper Whether to use a sparse doc values index. |
| 163 | + * @return An IndexSearcher instance for querying the created index. |
| 164 | + * @throws IOException if an error occurs during index writing. |
| 165 | + */ |
| 166 | + private IndexSearcher createIndex(final Directory directory, boolean withDocValuesSkipper) throws IOException { |
| 167 | + final IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer()); |
| 168 | + config.setIndexSort( |
| 169 | + new Sort( |
| 170 | + new SortField(HOSTNAME_FIELD, SortField.Type.STRING, false), // NOTE: `host.name` ascending |
| 171 | + new SortedNumericSortField(TIMESTAMP_FIELD, SortField.Type.LONG, true) // NOTE: `@timestamp` descending |
| 172 | + ) |
| 173 | + ); |
| 174 | + |
| 175 | + final IndexWriter indexWriter = new IndexWriter(directory, config); |
| 176 | + final Random random = new Random(seed); |
| 177 | + |
| 178 | + for (int i = 0; i < numberOfDocuments; i++) { |
| 179 | + final Document doc = new Document(); |
| 180 | + addFieldsToDocument(doc, i, withDocValuesSkipper, random); |
| 181 | + indexWriter.addDocument(doc); |
| 182 | + } |
| 183 | + |
| 184 | + indexWriter.commit(); |
| 185 | + final DirectoryReader reader = DirectoryReader.open(indexWriter); |
| 186 | + indexWriter.close(); |
| 187 | + return new IndexSearcher(reader); |
| 188 | + } |
| 189 | + |
| 190 | + private void addFieldsToDocument(final Document doc, int docIndex, boolean withDocValuesSkipper, final Random random) { |
| 191 | + final int batchIndex = docIndex / batchSize; |
| 192 | + final String hostName = "host-" + batchIndex; |
| 193 | + final long timestampDelta = random.nextInt(0, timestampIncrementMillis); |
| 194 | + final long timestamp = BASE_TIMESTAMP + ((docIndex % batchSize) * timestampIncrementMillis) + timestampDelta; |
| 195 | + |
| 196 | + if (withDocValuesSkipper) { |
| 197 | + doc.add(SortedNumericDocValuesField.indexedField(TIMESTAMP_FIELD, timestamp)); // NOTE: doc values skipper on `@timestamp` |
| 198 | + doc.add(SortedDocValuesField.indexedField(HOSTNAME_FIELD, new BytesRef(hostName))); // NOTE: doc values skipper on `host.name` |
| 199 | + } else { |
| 200 | + doc.add(new LongPoint(TIMESTAMP_FIELD, timestamp)); // BKD tree on `@timestamp` |
| 201 | + doc.add(new SortedNumericDocValuesField(TIMESTAMP_FIELD, timestamp)); // NOTE: doc values without the doc values skipper on |
| 202 | + // `@timestamp` |
| 203 | + doc.add(new SortedDocValuesField(HOSTNAME_FIELD, new BytesRef(hostName))); // NOTE: doc values without the doc values skipper on |
| 204 | + // `host.name` |
| 205 | + } |
| 206 | + |
| 207 | + doc.add(new StringField(HOSTNAME_FIELD, hostName, Field.Store.NO)); |
| 208 | + } |
| 209 | + |
| 210 | + /** |
| 211 | + * Computes a dynamic timestamp upper bound based on the batch size, |
| 212 | + * timestamp increment, and user-specified fraction. |
| 213 | + * |
| 214 | + * @return The computed upper bound for the timestamp range query. |
| 215 | + */ |
| 216 | + private long rangeEndTimestamp() { |
| 217 | + return BASE_TIMESTAMP + ((long) (batchSize * timestampIncrementMillis * timestampRangeFraction)); |
| 218 | + } |
| 219 | + |
| 220 | + @Benchmark |
| 221 | + public void rangeQueryWithoutDocValuesSkipper(final Blackhole bh) throws IOException { |
| 222 | + bh.consume(rangeQuery(indexSearcherWithoutDocValuesSkipper, BASE_TIMESTAMP, rangeEndTimestamp(), true)); |
| 223 | + } |
| 224 | + |
| 225 | + @Benchmark |
| 226 | + public void rangeQueryWithDocValuesSkipper(final Blackhole bh) throws IOException { |
| 227 | + bh.consume(rangeQuery(indexSearcherWithDocValuesSkipper, BASE_TIMESTAMP, rangeEndTimestamp(), false)); |
| 228 | + } |
| 229 | + |
| 230 | + private long rangeQuery(final IndexSearcher searcher, long rangeStartTimestamp, long rangeEndTimestamp, boolean isIndexed) |
| 231 | + throws IOException { |
| 232 | + assert rangeEndTimestamp > rangeStartTimestamp; |
| 233 | + final Query rangeQuery = isIndexed |
| 234 | + ? new IndexOrDocValuesQuery( |
| 235 | + LongPoint.newRangeQuery(TIMESTAMP_FIELD, rangeStartTimestamp, rangeEndTimestamp), |
| 236 | + SortedNumericDocValuesField.newSlowRangeQuery(TIMESTAMP_FIELD, rangeStartTimestamp, rangeEndTimestamp) |
| 237 | + ) |
| 238 | + : SortedNumericDocValuesField.newSlowRangeQuery(TIMESTAMP_FIELD, rangeStartTimestamp, rangeEndTimestamp); |
| 239 | + final Query query = new IndexSortSortedNumericDocValuesRangeQuery( |
| 240 | + TIMESTAMP_FIELD, |
| 241 | + rangeStartTimestamp, |
| 242 | + rangeEndTimestamp, |
| 243 | + rangeQuery |
| 244 | + ); |
| 245 | + return searcher.search(query, numberOfDocuments, QUERY_SORT).totalHits.value(); |
| 246 | + } |
| 247 | + |
| 248 | + @TearDown(Level.Trial) |
| 249 | + public void tearDown() throws IOException { |
| 250 | + indexSearcherWithoutDocValuesSkipper.getIndexReader().close(); |
| 251 | + indexSearcherWithDocValuesSkipper.getIndexReader().close(); |
| 252 | + tempDirectoryWithoutDocValuesSkipper.close(); |
| 253 | + tempDirectoryWithDocValuesSkipper.close(); |
| 254 | + } |
| 255 | +} |
0 commit comments