diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesSparseMergeBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesSparseMergeBenchmark.java new file mode 100644 index 0000000000000..ddc4cc4d3c5d5 --- /dev/null +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesSparseMergeBenchmark.java @@ -0,0 +1,188 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.benchmark.index.codec.tsdb; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.lucene101.Lucene101Codec; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.SortedNumericSortField; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.profile.AsyncProfiler; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import java.io.IOException; +import java.nio.file.Files; +import java.util.Random; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +@BenchmarkMode(Mode.SampleTime) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@State(Scope.Benchmark) +@Fork(1) +@Threads(1) +@Warmup(iterations = 0) +@Measurement(iterations = 1) +public class TSDBDocValuesSparseMergeBenchmark { + + @Param("20431204") + private int nDocs; + + @Param("1000") + private int deltaTime; + + @Param("42") + private int seed; + + private static final String TIMESTAMP_FIELD = "@timestamp"; + private static final String HOSTNAME_FIELD = "host.name"; + private static final long BASE_TIMESTAMP = 1704067200000L; + + private IndexWriter indexWriter; + private ExecutorService executorService; + + public static void main(String[] args) throws RunnerException { + final Options options = new OptionsBuilder().include(TSDBDocValuesSparseMergeBenchmark.class.getSimpleName()) + .addProfiler(AsyncProfiler.class) + .build(); + + new Runner(options).run(); + } + + @Setup(Level.Trial) + public void setup() throws IOException { + executorService = Executors.newSingleThreadExecutor(); + + final Directory tempDirectoryWithoutDocValuesSkipper = FSDirectory.open(Files.createTempDirectory("temp1-")); + indexWriter = createIndex(tempDirectoryWithoutDocValuesSkipper); + } + + private IndexWriter createIndex(final Directory directory) throws IOException { + + final IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer()); + // NOTE: index sort config matching LogsDB's sort order + config.setIndexSort( + new Sort( + new SortField(HOSTNAME_FIELD, SortField.Type.STRING, false), + new SortedNumericSortField(TIMESTAMP_FIELD, SortField.Type.LONG, true) + ) + ); + ES87TSDBDocValuesFormat docValuesFormat = new ES87TSDBDocValuesFormat(); + config.setCodec(new Lucene101Codec() { + + @Override + public DocValuesFormat getDocValuesFormatForField(String field) { + return docValuesFormat; + } + }); + + long counter1 = 0; + long counter2 = 10_000_000; + long[] gauge1Values = new long[] { 2, 4, 6, 8, 10, 12, 14, 16 }; + long[] gauge2Values = new long[] { -2, -4, -6, -8, -10, -12, -14, -16 }; + int numHosts = 1000; + String[] tags = new String[] { "tag_1", "tag_2", "tag_3", "tag_4", "tag_5", "tag_6", "tag_7", "tag_8" }; + + final Random random = new Random(seed); + IndexWriter indexWriter = new IndexWriter(directory, config); + for (int i = 0; i < nDocs; i++) { + final Document doc = new Document(); + + final int batchIndex = i / numHosts; + final String hostName = "host-" + batchIndex; + // Slightly vary the timestamp in each document + final long timestamp = BASE_TIMESTAMP + ((i % numHosts) * deltaTime) + random.nextInt(0, deltaTime); + + doc.add(new SortedDocValuesField(HOSTNAME_FIELD, new BytesRef(hostName))); + doc.add(new SortedNumericDocValuesField(TIMESTAMP_FIELD, timestamp)); + + if (i % 2 == 0) { + doc.add(new NumericDocValuesField("counter_1", counter1++)); + } + if (i % 2 == 1) { + doc.add(new SortedNumericDocValuesField("counter_2", counter2++)); + } + if (i % 2 == 0) { + doc.add(new SortedNumericDocValuesField("gauge_1", gauge1Values[i % gauge1Values.length])); + } + if (i % 2 == 1) { + doc.add(new SortedNumericDocValuesField("gauge_2", gauge2Values[i % gauge1Values.length])); + } + if (i % 2 == 0) { + int numTags = tags.length % (i + 1); + for (int j = 0; j < numTags; j++) { + doc.add(new SortedSetDocValuesField("tags", new BytesRef(tags[j]))); + } + } + if (i % 2 == 1) { + int randomIndex = i % tags.length; + doc.add(new SortedDocValuesField("other_tag", new BytesRef(tags[randomIndex]))); + } + + indexWriter.addDocument(doc); + } + indexWriter.commit(); + return indexWriter; + } + + @Benchmark + public void forceMerge() throws IOException { + forceMerge(indexWriter); + } + + private void forceMerge(final IndexWriter indexWriter) throws IOException { + indexWriter.forceMerge(1); + } + + @TearDown(Level.Trial) + public void tearDown() { + if (executorService != null) { + executorService.shutdown(); + try { + if (executorService.awaitTermination(30, TimeUnit.SECONDS) == false) { + executorService.shutdownNow(); + } + } catch (InterruptedException e) { + executorService.shutdownNow(); + Thread.currentThread().interrupt(); + } + } + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesConsumer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesConsumer.java index dc73428a07c7c..5bc2bec0233ee 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesConsumer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesConsumer.java @@ -35,6 +35,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.LongsRef; +import org.apache.lucene.util.RoaringDocIdSet; import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.compress.LZ4; import org.apache.lucene.util.packed.DirectMonotonicWriter; @@ -124,28 +125,11 @@ private long[] writeField(FieldInfo field, DocValuesProducer valuesProducer, lon final int count = values.docValueCount(); numValues += count; } - - if (numDocsWithValue == 0) { // meta[-2, 0]: No documents with values - meta.writeLong(-2); // docsWithFieldOffset - meta.writeLong(0L); // docsWithFieldLength - meta.writeShort((short) -1); // jumpTableEntryCount - meta.writeByte((byte) -1); // denseRankPower - } else if (numDocsWithValue == maxDoc) { // meta[-1, 0]: All documents have values - meta.writeLong(-1); // docsWithFieldOffset - meta.writeLong(0L); // docsWithFieldLength - meta.writeShort((short) -1); // jumpTableEntryCount - meta.writeByte((byte) -1); // denseRankPower - } else { // meta[data.offset, data.length]: IndexedDISI structure for documents with values - long offset = data.getFilePointer(); - meta.writeLong(offset); // docsWithFieldOffset - values = valuesProducer.getSortedNumeric(field); - final short jumpTableEntryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER); - meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength - meta.writeShort(jumpTableEntryCount); - meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER); - } meta.writeLong(numValues); + // TODO: write bwc tests + // TODO: write DISI to temp file and append it later to data part: + var docIdSetBuilder = new RoaringDocIdSet.Builder(maxDoc); if (numValues > 0) { // Special case for maxOrd of 1, signal -1 that no blocks will be written meta.writeInt(maxOrd != 1 ? ES87TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT : -1); @@ -166,6 +150,7 @@ private long[] writeField(FieldInfo field, DocValuesProducer valuesProducer, lon values = valuesProducer.getSortedNumeric(field); final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1; for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { + docIdSetBuilder.add(doc); final int count = values.docValueCount(); for (int i = 0; i < count; ++i) { buffer[bufferSize++] = values.nextValue(); @@ -205,6 +190,35 @@ private long[] writeField(FieldInfo field, DocValuesProducer valuesProducer, lon meta.writeLong(valuesDataOffset); meta.writeLong(valuesDataLength); } + if (numDocsWithValue == 0) { // meta[-2, 0]: No documents with values + meta.writeLong(-2); // docsWithFieldOffset + meta.writeLong(0L); // docsWithFieldLength + meta.writeShort((short) -1); // jumpTableEntryCount + meta.writeByte((byte) -1); // denseRankPower + } else if (numDocsWithValue == maxDoc) { // meta[-1, 0]: All documents have values + meta.writeLong(-1); // docsWithFieldOffset + meta.writeLong(0L); // docsWithFieldLength + meta.writeShort((short) -1); // jumpTableEntryCount + meta.writeByte((byte) -1); // denseRankPower + } else { // meta[data.offset, data.length]: IndexedDISI structure for documents with values + long offset = data.getFilePointer(); + meta.writeLong(offset); // docsWithFieldOffset + final short jumpTableEntryCount; + if (maxOrd != 1) { + var bitSet = docIdSetBuilder.build(); + var iterator = bitSet.iterator(); + if (iterator == null) { + iterator = DocIdSetIterator.empty(); + } + jumpTableEntryCount = IndexedDISI.writeBitSet(iterator, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER); + } else { + values = valuesProducer.getSortedNumeric(field); + jumpTableEntryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER); + } + meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength + meta.writeShort(jumpTableEntryCount); + meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER); + } return new long[] { numDocsWithValue, numValues }; } diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormat.java index 496c41b42869a..5dbe3c222077e 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormat.java @@ -28,7 +28,8 @@ public class ES87TSDBDocValuesFormat extends org.apache.lucene.codecs.DocValuesF static final String META_CODEC = "ES87TSDBDocValuesMetadata"; static final String META_EXTENSION = "dvm"; static final int VERSION_START = 0; - static final int VERSION_CURRENT = VERSION_START; + static final int VERSION_DISI_CHANGE = 1; + static final int VERSION_CURRENT = VERSION_DISI_CHANGE; static final byte NUMERIC = 0; static final byte BINARY = 1; static final byte SORTED = 2; diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java index 8a8095ecf6d21..2aa183dc70246 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java @@ -89,7 +89,7 @@ public class ES87TSDBDocValuesProducer extends DocValuesProducer { state.segmentSuffix ); - readFields(in, state.fieldInfos); + readFields(in, version, state.fieldInfos); } catch (Throwable exception) { priorE = exception; @@ -861,7 +861,7 @@ public void close() throws IOException { data.close(); } - private void readFields(IndexInput meta, FieldInfos infos) throws IOException { + private void readFields(IndexInput meta, int version, FieldInfos infos) throws IOException { for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) { FieldInfo info = infos.fieldInfo(fieldNumber); if (info == null) { @@ -872,24 +872,24 @@ private void readFields(IndexInput meta, FieldInfos infos) throws IOException { skippers.put(info.name, readDocValueSkipperMeta(meta)); } if (type == ES87TSDBDocValuesFormat.NUMERIC) { - numerics.put(info.name, readNumeric(meta)); + numerics.put(info.name, readNumeric(meta, version)); } else if (type == ES87TSDBDocValuesFormat.BINARY) { binaries.put(info.name, readBinary(meta)); } else if (type == ES87TSDBDocValuesFormat.SORTED) { - sorted.put(info.name, readSorted(meta)); + sorted.put(info.name, readSorted(meta, version)); } else if (type == ES87TSDBDocValuesFormat.SORTED_SET) { - sortedSets.put(info.name, readSortedSet(meta)); + sortedSets.put(info.name, readSortedSet(meta, version)); } else if (type == ES87TSDBDocValuesFormat.SORTED_NUMERIC) { - sortedNumerics.put(info.name, readSortedNumeric(meta)); + sortedNumerics.put(info.name, readSortedNumeric(meta, version)); } else { throw new CorruptIndexException("invalid type: " + type, meta); } } } - private static NumericEntry readNumeric(IndexInput meta) throws IOException { + private static NumericEntry readNumeric(IndexInput meta, int version) throws IOException { NumericEntry entry = new NumericEntry(); - readNumeric(meta, entry); + readNumeric(meta, entry, version); return entry; } @@ -904,11 +904,13 @@ private static DocValuesSkipperEntry readDocValueSkipperMeta(IndexInput meta) th return new DocValuesSkipperEntry(offset, length, minValue, maxValue, docCount, maxDocID); } - private static void readNumeric(IndexInput meta, NumericEntry entry) throws IOException { - entry.docsWithFieldOffset = meta.readLong(); - entry.docsWithFieldLength = meta.readLong(); - entry.jumpTableEntryCount = meta.readShort(); - entry.denseRankPower = meta.readByte(); + private static void readNumeric(IndexInput meta, NumericEntry entry, int version) throws IOException { + if (version == ES87TSDBDocValuesFormat.VERSION_START) { + entry.docsWithFieldOffset = meta.readLong(); + entry.docsWithFieldLength = meta.readLong(); + entry.jumpTableEntryCount = meta.readShort(); + entry.denseRankPower = meta.readByte(); + } entry.numValues = meta.readLong(); if (entry.numValues > 0) { final int indexBlockShift = meta.readInt(); @@ -926,6 +928,12 @@ private static void readNumeric(IndexInput meta, NumericEntry entry) throws IOEx entry.valuesOffset = meta.readLong(); entry.valuesLength = meta.readLong(); } + if (version == ES87TSDBDocValuesFormat.VERSION_DISI_CHANGE) { + entry.docsWithFieldOffset = meta.readLong(); + entry.docsWithFieldLength = meta.readLong(); + entry.jumpTableEntryCount = meta.readShort(); + entry.denseRankPower = meta.readByte(); + } } private BinaryEntry readBinary(IndexInput meta) throws IOException { @@ -952,14 +960,14 @@ private BinaryEntry readBinary(IndexInput meta) throws IOException { return entry; } - private static SortedNumericEntry readSortedNumeric(IndexInput meta) throws IOException { + private static SortedNumericEntry readSortedNumeric(IndexInput meta, int version) throws IOException { SortedNumericEntry entry = new SortedNumericEntry(); - readSortedNumeric(meta, entry); + readSortedNumeric(meta, entry, version); return entry; } - private static SortedNumericEntry readSortedNumeric(IndexInput meta, SortedNumericEntry entry) throws IOException { - readNumeric(meta, entry); + private static SortedNumericEntry readSortedNumeric(IndexInput meta, SortedNumericEntry entry, int version) throws IOException { + readNumeric(meta, entry, version); entry.numDocsWithField = meta.readInt(); if (entry.numDocsWithField != entry.numValues) { entry.addressesOffset = meta.readLong(); @@ -970,21 +978,21 @@ private static SortedNumericEntry readSortedNumeric(IndexInput meta, SortedNumer return entry; } - private SortedEntry readSorted(IndexInput meta) throws IOException { + private static SortedEntry readSorted(IndexInput meta, int version) throws IOException { SortedEntry entry = new SortedEntry(); entry.ordsEntry = new NumericEntry(); - readNumeric(meta, entry.ordsEntry); + readNumeric(meta, entry.ordsEntry, version); entry.termsDictEntry = new TermsDictEntry(); readTermDict(meta, entry.termsDictEntry); return entry; } - private SortedSetEntry readSortedSet(IndexInput meta) throws IOException { + private static SortedSetEntry readSortedSet(IndexInput meta, int version) throws IOException { SortedSetEntry entry = new SortedSetEntry(); byte multiValued = meta.readByte(); switch (multiValued) { case 0: // singlevalued - entry.singleValueEntry = readSorted(meta); + entry.singleValueEntry = readSorted(meta, version); return entry; case 1: // multivalued break; @@ -992,7 +1000,7 @@ private SortedSetEntry readSortedSet(IndexInput meta) throws IOException { throw new CorruptIndexException("Invalid multiValued flag: " + multiValued, meta); } entry.ordsEntry = new SortedNumericEntry(); - readSortedNumeric(meta, entry.ordsEntry); + readSortedNumeric(meta, entry.ordsEntry, version); entry.termsDictEntry = new TermsDictEntry(); readTermDict(meta, entry.termsDictEntry); return entry; diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormatTests.java index 71ca8a37098f5..8c0893ef0e08c 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormatTests.java @@ -13,11 +13,13 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -27,6 +29,9 @@ import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.StoredFields; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.SortedNumericSortField; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.analysis.MockAnalyzer; import org.apache.lucene.tests.index.BaseDocValuesFormatTestCase; @@ -39,6 +44,7 @@ import java.util.Arrays; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import static org.hamcrest.Matchers.equalTo; @@ -246,4 +252,148 @@ public void testManyDocsWithManyValues() throws Exception { } } } + + public void testForceMergeSparseCase() throws Exception { + String timestampField = "@timestamp"; + String hostnameField = "host.name"; + long baseTimestamp = 1704067200000L; + + var config = new IndexWriterConfig(); + config.setIndexSort( + new Sort( + new SortField(hostnameField, SortField.Type.STRING, false), + new SortedNumericSortField(timestampField, SortField.Type.LONG, true) + ) + ); + config.setCodec(getCodec()); + try (var dir = newDirectory(); var iw = new IndexWriter(dir, config)) { + long counter1 = 0; + long counter2 = 10_000_000; + long[] gauge1Values = new long[] { 2, 4, 6, 8, 10, 12, 14, 16 }; + long[] gauge2Values = new long[] { -2, -4, -6, -8, -10, -12, -14, -16 }; + String[] tags = new String[] { "tag_1", "tag_2", "tag_3", "tag_4", "tag_5", "tag_6", "tag_7", "tag_8" }; + + int numDocs = 256 + random().nextInt(1024); + int numHosts = numDocs / 20; + for (int i = 0; i < numDocs; i++) { + var d = new Document(); + + int batchIndex = i / numHosts; + String hostName = String.format(Locale.ROOT, "host-%03d", batchIndex); + long timestamp = baseTimestamp + (1000L * i); + + d.add(new SortedDocValuesField(hostnameField, new BytesRef(hostName))); + // Index sorting doesn't work with NumericDocValuesField: + d.add(new SortedNumericDocValuesField(timestampField, timestamp)); + + if (random().nextBoolean()) { + d.add(new NumericDocValuesField("counter_1", counter1++)); + } + if (random().nextBoolean()) { + d.add(new SortedNumericDocValuesField("counter_2", counter2++)); + } + if (random().nextBoolean()) { + d.add(new SortedNumericDocValuesField("gauge_1", gauge1Values[i % gauge1Values.length])); + } + if (random().nextBoolean()) { + d.add(new SortedNumericDocValuesField("gauge_2", gauge2Values[i % gauge1Values.length])); + } + if (random().nextBoolean()) { + int numTags = 1 + random().nextInt(8); + for (int j = 0; j < numTags; j++) { + d.add(new SortedSetDocValuesField("tags", new BytesRef(tags[j]))); + } + } + if (random().nextBoolean()) { + int randomIndex = random().nextInt(tags.length); + d.add(new SortedDocValuesField("other_tag", new BytesRef(tags[randomIndex]))); + } + + iw.addDocument(d); + if (i % 100 == 0) { + iw.commit(); + } + } + iw.commit(); + + iw.forceMerge(1); + + // For asserting using binary search later on: + Arrays.sort(gauge2Values); + + try (var reader = DirectoryReader.open(iw)) { + assertEquals(1, reader.leaves().size()); + assertEquals(numDocs, reader.maxDoc()); + var leaf = reader.leaves().get(0).reader(); + var hostNameDV = leaf.getSortedDocValues(hostnameField); + assertNotNull(hostNameDV); + var timestampDV = DocValues.unwrapSingleton(leaf.getSortedNumericDocValues(timestampField)); + assertNotNull(timestampDV); + var counterOneDV = leaf.getNumericDocValues("counter_1"); + assertNotNull(counterOneDV); + var counterTwoDV = leaf.getSortedNumericDocValues("counter_2"); + assertNotNull(counterTwoDV); + var gaugeOneDV = leaf.getSortedNumericDocValues("gauge_1"); + assertNotNull(gaugeOneDV); + var gaugeTwoDV = leaf.getSortedNumericDocValues("gauge_2"); + assertNotNull(gaugeTwoDV); + var tagsDV = leaf.getSortedSetDocValues("tags"); + assertNotNull(tagsDV); + var otherTagDV = leaf.getSortedDocValues("other_tag"); + assertNotNull(otherTagDV); + for (int i = 0; i < numDocs; i++) { + assertEquals(i, hostNameDV.nextDoc()); + int batchIndex = i / numHosts; + assertEquals(batchIndex, hostNameDV.ordValue()); + String expectedHostName = String.format(Locale.ROOT, "host-%03d", batchIndex); + assertEquals(expectedHostName, hostNameDV.lookupOrd(hostNameDV.ordValue()).utf8ToString()); + + assertEquals(i, timestampDV.nextDoc()); + long timestamp = timestampDV.longValue(); + long lowerBound = baseTimestamp; + long upperBound = baseTimestamp + (1000L * numDocs); + assertTrue( + "unexpected timestamp [" + timestamp + "], expected between [" + lowerBound + "] and [" + upperBound + "]", + timestamp >= lowerBound && timestamp < upperBound + ); + + if (counterOneDV.advanceExact(i)) { + long counterOneValue = counterOneDV.longValue(); + assertTrue("unexpected counter [" + counterOneValue + "]", counterOneValue >= 0 && counterOneValue < counter1); + } + + if (counterTwoDV.advanceExact(i)) { + assertEquals(1, counterTwoDV.docValueCount()); + long counterTwoValue = counterTwoDV.nextValue(); + assertTrue("unexpected counter [" + counterTwoValue + "]", counterTwoValue > 0 && counterTwoValue <= counter2); + } + + if (gaugeOneDV.advanceExact(i)) { + assertEquals(1, gaugeOneDV.docValueCount()); + long gaugeOneValue = gaugeOneDV.nextValue(); + assertTrue("unexpected gauge [" + gaugeOneValue + "]", Arrays.binarySearch(gauge1Values, gaugeOneValue) >= 0); + } + + if (gaugeTwoDV.advanceExact(i)) { + assertEquals(1, gaugeTwoDV.docValueCount()); + long gaugeTwoValue = gaugeTwoDV.nextValue(); + assertTrue("unexpected gauge [" + gaugeTwoValue + "]", Arrays.binarySearch(gauge2Values, gaugeTwoValue) >= 0); + } + + if (tagsDV.advanceExact(i)) { + for (int j = 0; j < tagsDV.docValueCount(); j++) { + long ordinal = tagsDV.nextOrd(); + String actualTag = tagsDV.lookupOrd(ordinal).utf8ToString(); + assertTrue("unexpected tag [" + actualTag + "]", Arrays.binarySearch(tags, actualTag) >= 0); + } + } + if (otherTagDV.advanceExact(i)) { + int ordinal = otherTagDV.ordValue(); + String actualTag = otherTagDV.lookupOrd(ordinal).utf8ToString(); + assertTrue("unexpected tag [" + actualTag + "]", Arrays.binarySearch(tags, actualTag) >= 0); + } + } + } + } + } }