diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java index 71164e35ad557..1637cd33ac0af 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java @@ -258,7 +258,7 @@ private static IndexWriterConfig createIndexWriterConfig(boolean optimizedMergeE ); config.setLeafSorter(DataStream.TIMESERIES_LEAF_READERS_SORTER); config.setMergePolicy(new LogByteSizeMergePolicy()); - var docValuesFormat = new ES819TSDBDocValuesFormat(4096, optimizedMergeEnabled); + var docValuesFormat = new ES819TSDBDocValuesFormat(4096, 512, optimizedMergeEnabled); config.setCodec(new Elasticsearch900Lucene101Codec() { @Override diff --git a/docs/changelog/133018.yaml b/docs/changelog/133018.yaml new file mode 100644 index 0000000000000..d469f99f92c74 --- /dev/null +++ b/docs/changelog/133018.yaml @@ -0,0 +1,5 @@ +pr: 133018 +summary: Add ordinal range encode for tsid +area: TSDB +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java index 3651be472051f..8aaef4329d6c4 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java @@ -62,11 +62,14 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer { final int maxDoc; private byte[] termsDictBuffer; private final int skipIndexIntervalSize; + private final int minDocsPerOrdinalForOrdinalRangeEncoding; final boolean enableOptimizedMerge; + private final int primarySortFieldNumber; ES819TSDBDocValuesConsumer( SegmentWriteState state, int skipIndexIntervalSize, + int minDocsPerOrdinalForOrdinalRangeEncoding, boolean enableOptimizedMerge, String dataCodec, String dataExtension, @@ -75,6 +78,8 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer { ) throws IOException { this.termsDictBuffer = new byte[1 << 14]; this.dir = state.directory; + this.minDocsPerOrdinalForOrdinalRangeEncoding = minDocsPerOrdinalForOrdinalRangeEncoding; + this.primarySortFieldNumber = ES819TSDBDocValuesProducer.primarySortFieldNumber(state.segmentInfo, state.fieldInfos); this.context = state.context; boolean success = false; try { @@ -124,6 +129,12 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOExcepti writeField(field, producer, -1, null); } + private boolean shouldEncodeOrdinalRange(FieldInfo field, long maxOrd, int numDocsWithValue) { + return maxDoc > 1 + && field.number == primarySortFieldNumber + && (numDocsWithValue / maxOrd) >= minDocsPerOrdinalForOrdinalRangeEncoding; + } + private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer, long maxOrd, OffsetsAccumulator offsetsAccumulator) throws IOException { int numDocsWithValue = 0; @@ -149,19 +160,53 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer, try { if (numValues > 0) { assert numDocsWithValue > 0; - // Special case for maxOrd of 1, signal -1 that no blocks will be written - meta.writeInt(maxOrd != 1 ? ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT : -1); final ByteBuffersDataOutput indexOut = new ByteBuffersDataOutput(); - final DirectMonotonicWriter indexWriter = DirectMonotonicWriter.getInstance( - meta, - new ByteBuffersIndexOutput(indexOut, "temp-dv-index", "temp-dv-index"), - 1L + ((numValues - 1) >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT), - ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT - ); + DirectMonotonicWriter indexWriter = null; final long valuesDataOffset = data.getFilePointer(); - // Special case for maxOrd of 1, skip writing the blocks - if (maxOrd != 1) { + if (maxOrd == 1) { + // Special case for maxOrd of 1, signal -1 that no blocks will be written + meta.writeInt(-1); + } else if (shouldEncodeOrdinalRange(field, maxOrd, numDocsWithValue)) { + // When a field is sorted, use ordinal range encode for long runs of the same ordinal. + meta.writeInt(-2); + meta.writeVInt(Math.toIntExact(maxOrd)); + meta.writeByte((byte) ES819TSDBDocValuesFormat.ORDINAL_RANGE_ENCODING_BLOCK_SHIFT); + values = valuesProducer.getSortedNumeric(field); + if (enableOptimizedMerge && numDocsWithValue < maxDoc) { + disiAccumulator = new DISIAccumulator(dir, context, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER); + } + DirectMonotonicWriter startDocs = DirectMonotonicWriter.getInstance( + meta, + data, + maxOrd + 1, + ES819TSDBDocValuesFormat.ORDINAL_RANGE_ENCODING_BLOCK_SHIFT + ); + long lastOrd = 0; + startDocs.add(0); + for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { + if (disiAccumulator != null) { + disiAccumulator.addDocId(doc); + } + if (offsetsAccumulator != null) { + offsetsAccumulator.addDoc(1); + } + final long nextOrd = values.nextValue(); + if (nextOrd != lastOrd) { + lastOrd = nextOrd; + startDocs.add(doc); + } + } + startDocs.add(maxDoc); + startDocs.finish(); + } else { + indexWriter = DirectMonotonicWriter.getInstance( + meta, + new ByteBuffersIndexOutput(indexOut, "temp-dv-index", "temp-dv-index"), + 1L + ((numValues - 1) >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT), + ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT + ); + meta.writeInt(DIRECT_MONOTONIC_BLOCK_SHIFT); final long[] buffer = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; int bufferSize = 0; final TSDBDocValuesEncoder encoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE); @@ -204,8 +249,7 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer, } final long valuesDataLength = data.getFilePointer() - valuesDataOffset; - if (maxOrd != 1) { - // Special case for maxOrd of 1, indexWriter isn't really used, so no need to invoke finish() method. + if (indexWriter != null) { indexWriter.finish(); } final long indexDataOffset = data.getFilePointer(); diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java index 1a937e75ad5f9..fbdef488b8318 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java @@ -104,21 +104,35 @@ private static boolean getOptimizedMergeEnabledDefault() { return Boolean.parseBoolean(System.getProperty(OPTIMIZED_MERGE_ENABLED_NAME, Boolean.TRUE.toString())); } + /** + * The default minimum number of documents per ordinal required to use ordinal range encoding. + * If the average number of documents per ordinal is below this threshold, it is more efficient to encode doc values in blocks. + * A much smaller value may be used in tests to exercise ordinal range encoding more frequently. + */ + public static final int ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL = 512; + + /** + * The block shift used in DirectMonotonicWriter when encoding the start docs of each ordinal with ordinal range encoding. + */ + public static final int ORDINAL_RANGE_ENCODING_BLOCK_SHIFT = 12; + final int skipIndexIntervalSize; + final int minDocsPerOrdinalForRangeEncoding; private final boolean enableOptimizedMerge; /** Default constructor. */ public ES819TSDBDocValuesFormat() { - this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, OPTIMIZED_MERGE_ENABLE_DEFAULT); + this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, OPTIMIZED_MERGE_ENABLE_DEFAULT); } /** Doc values fields format with specified skipIndexIntervalSize. */ - public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, boolean enableOptimizedMerge) { + public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, int minDocsPerOrdinalForRangeEncoding, boolean enableOptimizedMerge) { super(CODEC_NAME); if (skipIndexIntervalSize < 2) { throw new IllegalArgumentException("skipIndexIntervalSize must be > 1, got [" + skipIndexIntervalSize + "]"); } this.skipIndexIntervalSize = skipIndexIntervalSize; + this.minDocsPerOrdinalForRangeEncoding = minDocsPerOrdinalForRangeEncoding; this.enableOptimizedMerge = enableOptimizedMerge; } @@ -127,6 +141,7 @@ public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOExcept return new ES819TSDBDocValuesConsumer( state, skipIndexIntervalSize, + minDocsPerOrdinalForRangeEncoding, enableOptimizedMerge, DATA_CODEC, DATA_EXTENSION, diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index 2ef0a5aaa1660..3ddeca1cd25a5 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -24,6 +24,7 @@ import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; @@ -31,6 +32,7 @@ import org.apache.lucene.index.TermsEnum; import org.apache.lucene.internal.hppc.IntObjectHashMap; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.SortField; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.DataInput; @@ -53,7 +55,7 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { final IntObjectHashMap numerics; - private int primarySortFieldNumber = -1; + private final int primarySortFieldNumber; final IntObjectHashMap binaries; final IntObjectHashMap sorted; final IntObjectHashMap sortedSets; @@ -73,11 +75,13 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { this.sortedNumerics = new IntObjectHashMap<>(); this.skippers = new IntObjectHashMap<>(); this.maxDoc = state.segmentInfo.maxDoc(); + this.primarySortFieldNumber = primarySortFieldNumber(state.segmentInfo, state.fieldInfos); this.merging = false; // read in the entries from the metadata file. int version = -1; String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); + try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName)) { Throwable priorE = null; @@ -92,14 +96,7 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { ); readFields(in, state.fieldInfos); - final var indexSort = state.segmentInfo.getIndexSort(); - if (indexSort != null && indexSort.getSort().length > 0) { - var primarySortField = indexSort.getSort()[0]; - var sortField = state.fieldInfos.fieldInfo(primarySortField.getField()); - if (sortField != null) { - primarySortFieldNumber = sortField.number; - } - } + } catch (Throwable exception) { priorE = exception; } finally { @@ -148,6 +145,7 @@ private ES819TSDBDocValuesProducer( IndexInput data, int maxDoc, int version, + int primarySortFieldNumber, boolean merging ) { this.numerics = numerics; @@ -159,6 +157,7 @@ private ES819TSDBDocValuesProducer( this.data = data.clone(); this.maxDoc = maxDoc; this.version = version; + this.primarySortFieldNumber = primarySortFieldNumber; this.merging = merging; } @@ -174,6 +173,7 @@ public DocValuesProducer getMergeInstance() { data, maxDoc, version, + primarySortFieldNumber, true ); } @@ -467,6 +467,47 @@ BlockLoader.Block tryReadAHead(BlockLoader.BlockFactory factory, BlockLoader.Doc } abstract static class BaseDenseNumericValues extends NumericDocValues implements BlockLoader.OptionalColumnAtATimeReader { + private final int maxDoc; + protected int doc = -1; + + BaseDenseNumericValues(int maxDoc) { + this.maxDoc = maxDoc; + } + + @Override + public final int docID() { + return doc; + } + + @Override + public final int nextDoc() throws IOException { + return advance(doc + 1); + } + + @Override + public final int advance(int target) throws IOException { + if (target >= maxDoc) { + return doc = NO_MORE_DOCS; + } + return doc = target; + } + + @Override + public final boolean advanceExact(int target) { + doc = target; + return true; + } + + @Override + public final long cost() { + return maxDoc; + } + + @Override + public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException { + return null; + } + abstract long lookAheadValueAt(int targetDoc) throws IOException; BlockLoader.Block tryRead(BlockLoader.SingletonLongBuilder builder, BlockLoader.Docs docs, int offset) throws IOException { @@ -474,6 +515,39 @@ BlockLoader.Block tryRead(BlockLoader.SingletonLongBuilder builder, BlockLoader. } } + abstract static class BaseSparseNumericValues extends NumericDocValues { + protected final IndexedDISI disi; + + BaseSparseNumericValues(IndexedDISI disi) { + this.disi = disi; + } + + @Override + public final int advance(int target) throws IOException { + return disi.advance(target); + } + + @Override + public final boolean advanceExact(int target) throws IOException { + return disi.advanceExact(target); + } + + @Override + public final int nextDoc() throws IOException { + return disi.nextDoc(); + } + + @Override + public final int docID() { + return disi.docID(); + } + + @Override + public final long cost() { + return disi.cost(); + } + } + abstract static class BaseSortedSetDocValues extends SortedSetDocValues { final SortedSetEntry entry; @@ -939,6 +1013,24 @@ public void close() throws IOException { data.close(); } + /** + * Returns the field number of the primary sort field for the given segment, + * if the field is sorted in ascending order. Returns {@code -1} if not found. + */ + static int primarySortFieldNumber(SegmentInfo segmentInfo, FieldInfos fieldInfos) { + final var indexSort = segmentInfo.getIndexSort(); + if (indexSort != null && indexSort.getSort().length > 0) { + SortField sortField = indexSort.getSort()[0]; + if (sortField.getReverse() == false) { + FieldInfo fieldInfo = fieldInfos.fieldInfo(sortField.getField()); + if (fieldInfo != null) { + return fieldInfo.number; + } + } + } + return -1; + } + private void readFields(IndexInput meta, FieldInfos infos) throws IOException { for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) { FieldInfo info = infos.fieldInfo(fieldNumber); @@ -988,9 +1080,14 @@ private static void readNumeric(IndexInput meta, NumericEntry entry) throws IOEx entry.numDocsWithField = meta.readInt(); if (entry.numValues > 0) { final int indexBlockShift = meta.readInt(); - // Special case, -1 means there are no blocks, so no need to load the metadata for it - // -1 is written when there the cardinality of a field is exactly one. - if (indexBlockShift != -1) { + if (indexBlockShift == -1) { + // single ordinal, no block index + } else if (indexBlockShift == -2) { + // encoded ordinal range, no block index + final int numOrds = meta.readVInt(); + final int blockShift = meta.readByte(); + entry.sortedOrdinals = DirectMonotonicReader.loadMeta(meta, numOrds + 1, blockShift); + } else { entry.indexMeta = DirectMonotonicReader.loadMeta( meta, 1 + ((entry.numValues - 1) >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT), @@ -1103,6 +1200,49 @@ private abstract static class NumericValues { abstract long advance(long index) throws IOException; } + static final class SortedOrdinalReader { + final long maxOrd; + final DirectMonotonicReader startDocs; + private long currentIndex = -1; + private long rangeEndExclusive = -1; + + SortedOrdinalReader(long maxOrd, DirectMonotonicReader startDocs) { + this.maxOrd = maxOrd; + this.startDocs = startDocs; + } + + long readValueAndAdvance(int doc) { + if (doc < rangeEndExclusive) { + return currentIndex; + } + // move to the next range + if (doc == rangeEndExclusive) { + currentIndex++; + } else { + currentIndex = searchRange(doc); + } + rangeEndExclusive = startDocs.get(currentIndex + 1); + return currentIndex; + } + + private long searchRange(int doc) { + long index = startDocs.binarySearch(currentIndex + 1, maxOrd, doc); + if (index < 0) { + index = -2 - index; + } + assert index < maxOrd : "invalid range " + index + " for doc " + doc + " in maxOrd " + maxOrd; + return index; + } + + long lookAheadValue(int targetDoc) { + if (targetDoc < rangeEndExclusive) { + return currentIndex; + } else { + return searchRange(targetDoc); + } + } + } + private NumericDocValues getNumeric(NumericEntry entry, long maxOrd) throws IOException { if (entry.docsWithFieldOffset == -2) { // empty @@ -1113,56 +1253,17 @@ private NumericDocValues getNumeric(NumericEntry entry, long maxOrd) throws IOEx // Special case for maxOrd 1, no need to read blocks and use ordinal 0 as only value if (entry.docsWithFieldOffset == -1) { // Special case when all docs have a value - return new BaseDenseNumericValues() { - - private final int maxDoc = ES819TSDBDocValuesProducer.this.maxDoc; - private int doc = -1; - + return new BaseDenseNumericValues(maxDoc) { @Override public long longValue() { // Only one ordinal! return 0L; } - @Override - public int docID() { - return doc; - } - - @Override - public int nextDoc() throws IOException { - return advance(doc + 1); - } - - @Override - public int advance(int target) throws IOException { - if (target >= maxDoc) { - return doc = NO_MORE_DOCS; - } - return doc = target; - } - - @Override - public boolean advanceExact(int target) { - doc = target; - return true; - } - - @Override - public long cost() { - return maxDoc; - } - @Override long lookAheadValueAt(int targetDoc) throws IOException { return 0L; // Only one ordinal! } - - @Override - public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) - throws IOException { - return null; - } }; } else { final IndexedDISI disi = new IndexedDISI( @@ -1173,36 +1274,47 @@ public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.D entry.denseRankPower, entry.numValues ); - return new NumericDocValues() { - - @Override - public int advance(int target) throws IOException { - return disi.advance(target); - } - + return new BaseSparseNumericValues(disi) { @Override - public boolean advanceExact(int target) throws IOException { - return disi.advanceExact(target); - } - - @Override - public int nextDoc() throws IOException { - return disi.nextDoc(); + public long longValue() throws IOException { + return 0L; // Only one ordinal! } - + }; + } + } else if (entry.sortedOrdinals != null) { + final var ordinalsReader = new SortedOrdinalReader( + maxOrd, + DirectMonotonicReader.getInstance( + entry.sortedOrdinals, + data.randomAccessSlice(entry.valuesOffset, entry.valuesLength), + true + ) + ); + if (entry.docsWithFieldOffset == -1) { + return new BaseDenseNumericValues(maxDoc) { @Override - public int docID() { - return disi.docID(); + long lookAheadValueAt(int targetDoc) { + return ordinalsReader.lookAheadValue(targetDoc); } @Override - public long cost() { - return disi.cost(); + public long longValue() { + return ordinalsReader.readValueAndAdvance(doc); } - + }; + } else { + final var disi = new IndexedDISI( + data, + entry.docsWithFieldOffset, + entry.docsWithFieldLength, + entry.jumpTableEntryCount, + entry.denseRankPower, + entry.numValues + ); + return new BaseSparseNumericValues(disi) { @Override public long longValue() { - return 0L; + return ordinalsReader.readValueAndAdvance(disi.docID()); } }; } @@ -1218,10 +1330,7 @@ public long longValue() { final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1; if (entry.docsWithFieldOffset == -1) { // dense - return new BaseDenseNumericValues() { - - private final int maxDoc = ES819TSDBDocValuesProducer.this.maxDoc; - private int doc = -1; + return new BaseDenseNumericValues(maxDoc) { private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE); private long currentBlockIndex = -1; private final long[] currentBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; @@ -1230,35 +1339,6 @@ public long longValue() { private long[] lookaheadBlock; private IndexInput lookaheadData = null; - @Override - public int docID() { - return doc; - } - - @Override - public int nextDoc() throws IOException { - return advance(doc + 1); - } - - @Override - public int advance(int target) throws IOException { - if (target >= maxDoc) { - return doc = NO_MORE_DOCS; - } - return doc = target; - } - - @Override - public boolean advanceExact(int target) { - doc = target; - return true; - } - - @Override - public long cost() { - return maxDoc; - } - @Override public long longValue() throws IOException { final int index = doc; @@ -1374,37 +1454,11 @@ static boolean isDense(int firstDocId, int lastDocId, int length) { entry.denseRankPower, entry.numValues ); - return new NumericDocValues() { - + return new BaseSparseNumericValues(disi) { private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE); private long currentBlockIndex = -1; private final long[] currentBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; - @Override - public int advance(int target) throws IOException { - return disi.advance(target); - } - - @Override - public boolean advanceExact(int target) throws IOException { - return disi.advanceExact(target); - } - - @Override - public int nextDoc() throws IOException { - return disi.nextDoc(); - } - - @Override - public int docID() { - return disi.docID(); - } - - @Override - public long cost() { - return disi.cost(); - } - @Override public long longValue() throws IOException { final int index = disi.index(); @@ -1610,6 +1664,7 @@ static class NumericEntry { DirectMonotonicReader.Meta indexMeta; long valuesOffset; long valuesLength; + DirectMonotonicReader.Meta sortedOrdinals; } static class BinaryEntry { diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java index f0ce28f11a51a..1036d822c0a21 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java @@ -58,7 +58,11 @@ public void testDuel() throws IOException { Codec codec = new Elasticsearch900Lucene101Codec() { final DocValuesFormat docValuesFormat = randomBoolean() - ? new ES819TSDBDocValuesFormat() + ? new ES819TSDBDocValuesFormat( + ESTestCase.randomIntBetween(1, 4096), + ESTestCase.randomIntBetween(1, 512), + random().nextBoolean() + ) : new TestES87TSDBDocValuesFormat(); @Override diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java index 9c41e7a80ed66..37a62b3605c2c 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java @@ -13,6 +13,7 @@ import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.document.Document; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; @@ -21,10 +22,14 @@ import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LogByteSizeMergePolicy; import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.NoMergePolicy; +import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SegmentReader; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortedNumericSortField; @@ -45,6 +50,9 @@ import java.util.Arrays; import java.util.Locale; import java.util.Map; +import java.util.function.IntSupplier; + +import static org.hamcrest.Matchers.equalTo; public class TsdbDocValueBwcTests extends ESTestCase { @@ -260,6 +268,105 @@ void testMixedIndex(Codec oldCodec, Codec newCodec) throws IOException, NoSuchFi } } + public void testEncodeOrdinalRange() throws IOException { + try (var dir = newDirectory()) { + int iters = between(5, 20); + for (int iter = 0; iter < iters; iter++) { + var config = new IndexWriterConfig(); + String hostNameField = "host.name"; + String hostIdField = "host.id"; + config.setIndexSort(new Sort(new SortField(hostNameField, SortField.Type.STRING, false))); + int thresholdRange = random().nextInt(3); + IntSupplier nextOrdinalRangeThreshold = () -> { + if (thresholdRange == 0) { + return between(1, 5); + } else if (thresholdRange == 1) { + return between(5, 20); + } else { + return Integer.MAX_VALUE; + } + }; + config.setCodec( + TestUtil.alwaysDocValuesFormat( + new ES819TSDBDocValuesFormat( + random().nextInt(16, 128), + nextOrdinalRangeThreshold.getAsInt(), + random().nextBoolean() + ) + ) + ); + try (IndexWriter writer = new IndexWriter(dir, config)) { + int numDocs = between(50, 500); + for (int d = 0; d < numDocs; d++) { + Document doc = new Document(); + int hostId = random().nextInt(100); + if (random().nextInt(100) <= 10) { + writer.deleteDocuments(LongPoint.newExactQuery(hostIdField, hostId)); + } else { + String hostName = String.format(Locale.ROOT, "host-%02d", hostId); + doc.add(new LongPoint("host.id", hostId)); + doc.add(new SortedDocValuesField(hostNameField, new BytesRef(hostName))); + doc.add(new NumericDocValuesField(hostIdField, hostId)); + writer.addDocument(doc); + } + + if (random().nextInt(100) <= 5) { + Document dummy = new Document(); + dummy.add(new SortedDocValuesField("dummy", new BytesRef("dummy"))); + writer.addDocument(dummy); + } + if (random().nextInt(100) <= 10) { + writer.flush(); + } + if (random().nextInt(100) <= 5) { + writer.forceMerge(between(1, 10)); + } + } + } + try (DirectoryReader reader = DirectoryReader.open(dir)) { + for (LeafReaderContext leaf : reader.leaves()) { + // sequential + NumericDocValues hostIdDv = leaf.reader().getNumericDocValues(hostIdField); + SortedDocValues hostNameDv = leaf.reader().getSortedDocValues(hostNameField); + if (hostIdDv == null) { + assertNull(hostNameDv); + continue; + } + { + int docId; + while ((docId = hostIdDv.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + assertTrue(hostNameDv.advanceExact(docId)); + String hostName = hostNameDv.lookupOrd(hostNameDv.ordValue()).utf8ToString(); + String expectedHostName = String.format(Locale.ROOT, "host-%02d", hostIdDv.longValue()); + assertThat(hostName, equalTo(expectedHostName)); + } + } + int checkIters = between(1, 20); + int nextDoc = 0; + for (int n = 0; n < checkIters; n++) { + if (nextDoc >= leaf.reader().maxDoc()) { + nextDoc = 0; + } + nextDoc = nextDoc + random().nextInt(leaf.reader().maxDoc() - nextDoc); + if (hostIdDv.docID() == DocIdSetIterator.NO_MORE_DOCS || nextDoc > hostIdDv.docID()) { + hostIdDv = leaf.reader().getNumericDocValues(hostIdField); + hostNameDv = leaf.reader().getSortedDocValues(hostNameField); + } + if (hostIdDv.advanceExact(nextDoc)) { + assertTrue(hostNameDv.advanceExact(nextDoc)); + String hostName = hostNameDv.lookupOrd(hostNameDv.ordValue()).utf8ToString(); + String expectedHostName = String.format(Locale.ROOT, "host-%02d", hostIdDv.longValue()); + assertThat(hostName, equalTo(expectedHostName)); + } else { + assertFalse(hostNameDv.advanceExact(nextDoc)); + } + } + } + } + } + } + } + private IndexWriterConfig getTimeSeriesIndexWriterConfig(String hostnameField, String timestampField, Codec codec) { var config = new IndexWriterConfig(); config.setIndexSort( diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java index d21f9b0af7e3c..27a421a37239a 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java @@ -65,7 +65,11 @@ public class ES819TSDBDocValuesFormatTests extends ES87TSDBDocValuesFormatTests private final Codec codec = new Elasticsearch900Lucene101Codec() { - final ES819TSDBDocValuesFormat docValuesFormat = new ES819TSDBDocValuesFormat(); + final ES819TSDBDocValuesFormat docValuesFormat = new ES819TSDBDocValuesFormat( + ESTestCase.randomIntBetween(1, 4096), + ESTestCase.randomIntBetween(1, 512), + random().nextBoolean() + ); @Override public DocValuesFormat getDocValuesFormatForField(String field) { diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java index d158236ecc7ac..247b75f2977b5 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java @@ -18,13 +18,15 @@ public class ES819TSDBDocValuesFormatVariableSkipIntervalTests extends ES87TSDBD @Override protected Codec getCodec() { // small interval size to test with many intervals - return TestUtil.alwaysDocValuesFormat(new ES819TSDBDocValuesFormat(random().nextInt(4, 16), random().nextBoolean())); + return TestUtil.alwaysDocValuesFormat( + new ES819TSDBDocValuesFormat(random().nextInt(4, 16), random().nextInt(1, 32), random().nextBoolean()) + ); } public void testSkipIndexIntervalSize() { IllegalArgumentException ex = expectThrows( IllegalArgumentException.class, - () -> new ES819TSDBDocValuesFormat(random().nextInt(Integer.MIN_VALUE, 2), random().nextBoolean()) + () -> new ES819TSDBDocValuesFormat(random().nextInt(Integer.MIN_VALUE, 2), random().nextInt(1, 32), random().nextBoolean()) ); assertTrue(ex.getMessage().contains("skipIndexIntervalSize must be > 1")); }