From a5e4ee0c6d54b6f73e8ffd9bb25cfe9693134efc Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Fri, 15 Aug 2025 21:18:38 -0700 Subject: [PATCH 01/13] Add ordinal range encode for tsid --- .../es819/ES819TSDBDocValuesConsumer.java | 71 +++- .../tsdb/es819/ES819TSDBDocValuesFormat.java | 7 +- .../es819/ES819TSDBDocValuesProducer.java | 302 ++++++++++-------- .../codec/tsdb/DocValuesCodecDuelTests.java | 6 +- .../es819/ES819TSDBDocValuesFormatTests.java | 6 +- ...ValuesFormatVariableSkipIntervalTests.java | 6 +- 6 files changed, 248 insertions(+), 150 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java index 3651be472051f..06f2d0b512b5e 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java @@ -62,11 +62,14 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer { final int maxDoc; private byte[] termsDictBuffer; private final int skipIndexIntervalSize; + private final int minDocsPerOrdinalForOrdinalRangeEncoding; final boolean enableOptimizedMerge; + private int primarySortField = -1; ES819TSDBDocValuesConsumer( SegmentWriteState state, int skipIndexIntervalSize, + int minDocsPerOrdinalForOrdinalRangeEncoding, boolean enableOptimizedMerge, String dataCodec, String dataExtension, @@ -75,6 +78,7 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer { ) throws IOException { this.termsDictBuffer = new byte[1 << 14]; this.dir = state.directory; + this.minDocsPerOrdinalForOrdinalRangeEncoding = minDocsPerOrdinalForOrdinalRangeEncoding; this.context = state.context; boolean success = false; try { @@ -99,6 +103,13 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer { maxDoc = state.segmentInfo.maxDoc(); this.skipIndexIntervalSize = skipIndexIntervalSize; this.enableOptimizedMerge = enableOptimizedMerge; + final var indexSort = state.segmentInfo.getIndexSort(); + if (indexSort != null && indexSort.getSort().length > 0 && indexSort.getSort()[0].getReverse() == false) { + var sortField = state.fieldInfos.fieldInfo(indexSort.getSort()[0].getField()); + if (sortField != null) { + primarySortField = sortField.number; + } + } success = true; } finally { if (success == false) { @@ -124,6 +135,10 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOExcepti writeField(field, producer, -1, null); } + private boolean shouldEncodeOrdinalRange(FieldInfo field, long maxOrd, int numDocsWithValue) { + return maxDoc > 1 && field.number == primarySortField && (numDocsWithValue / maxOrd) >= minDocsPerOrdinalForOrdinalRangeEncoding; + } + private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer, long maxOrd, OffsetsAccumulator offsetsAccumulator) throws IOException { int numDocsWithValue = 0; @@ -149,19 +164,52 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer, try { if (numValues > 0) { assert numDocsWithValue > 0; - // Special case for maxOrd of 1, signal -1 that no blocks will be written - meta.writeInt(maxOrd != 1 ? ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT : -1); final ByteBuffersDataOutput indexOut = new ByteBuffersDataOutput(); - final DirectMonotonicWriter indexWriter = DirectMonotonicWriter.getInstance( - meta, - new ByteBuffersIndexOutput(indexOut, "temp-dv-index", "temp-dv-index"), - 1L + ((numValues - 1) >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT), - ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT - ); + DirectMonotonicWriter indexWriter = null; final long valuesDataOffset = data.getFilePointer(); - // Special case for maxOrd of 1, skip writing the blocks - if (maxOrd != 1) { + if (maxOrd == 1) { + // Special case for maxOrd of 1, signal -1 that no blocks will be written + meta.writeInt(-1); + } else if (shouldEncodeOrdinalRange(field, maxOrd, numDocsWithValue)) { + // When a field is sorted, use ordinal range encode for long runs of the same ordinal. + meta.writeInt(-2); + meta.writeVInt(Math.toIntExact(maxOrd)); + values = valuesProducer.getSortedNumeric(field); + if (enableOptimizedMerge && numDocsWithValue < maxDoc) { + disiAccumulator = new DISIAccumulator(dir, context, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER); + } + DirectMonotonicWriter startDocs = DirectMonotonicWriter.getInstance( + meta, + data, + maxOrd + 1, + ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT + ); + long lastOrd = 0; + startDocs.add(0); + for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { + if (disiAccumulator != null) { + disiAccumulator.addDocId(doc); + } + if (offsetsAccumulator != null) { + offsetsAccumulator.addDoc(1); + } + final long nextOrd = values.nextValue(); + if (nextOrd != lastOrd) { + lastOrd = nextOrd; + startDocs.add(doc); + } + } + startDocs.add(maxDoc); + startDocs.finish(); + } else { + indexWriter = DirectMonotonicWriter.getInstance( + meta, + new ByteBuffersIndexOutput(indexOut, "temp-dv-index", "temp-dv-index"), + 1L + ((numValues - 1) >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT), + ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT + ); + meta.writeInt(DIRECT_MONOTONIC_BLOCK_SHIFT); final long[] buffer = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; int bufferSize = 0; final TSDBDocValuesEncoder encoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE); @@ -204,8 +252,7 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer, } final long valuesDataLength = data.getFilePointer() - valuesDataOffset; - if (maxOrd != 1) { - // Special case for maxOrd of 1, indexWriter isn't really used, so no need to invoke finish() method. + if (indexWriter != null) { indexWriter.finish(); } final long indexDataOffset = data.getFilePointer(); diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java index 1a937e75ad5f9..ccdc10786f65f 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java @@ -105,20 +105,22 @@ private static boolean getOptimizedMergeEnabledDefault() { } final int skipIndexIntervalSize; + final int minDocsPerOrdinalForOrdinalRangeEncoding; private final boolean enableOptimizedMerge; /** Default constructor. */ public ES819TSDBDocValuesFormat() { - this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, OPTIMIZED_MERGE_ENABLE_DEFAULT); + this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, NUMERIC_BLOCK_SIZE, OPTIMIZED_MERGE_ENABLE_DEFAULT); } /** Doc values fields format with specified skipIndexIntervalSize. */ - public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, boolean enableOptimizedMerge) { + public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, int minDocsPerOrdinalForOrdinalRangeEncoding, boolean enableOptimizedMerge) { super(CODEC_NAME); if (skipIndexIntervalSize < 2) { throw new IllegalArgumentException("skipIndexIntervalSize must be > 1, got [" + skipIndexIntervalSize + "]"); } this.skipIndexIntervalSize = skipIndexIntervalSize; + this.minDocsPerOrdinalForOrdinalRangeEncoding = minDocsPerOrdinalForOrdinalRangeEncoding; this.enableOptimizedMerge = enableOptimizedMerge; } @@ -127,6 +129,7 @@ public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOExcept return new ES819TSDBDocValuesConsumer( state, skipIndexIntervalSize, + minDocsPerOrdinalForOrdinalRangeEncoding, enableOptimizedMerge, DATA_CODEC, DATA_EXTENSION, diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index ff875b4ef1c8a..bf77378805226 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -47,6 +47,7 @@ import java.io.IOException; +import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT; import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.SKIP_INDEX_JUMP_LENGTH_PER_LEVEL; import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.SKIP_INDEX_MAX_LEVEL; import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.TERMS_DICT_BLOCK_LZ4_SHIFT; @@ -82,6 +83,14 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { Throwable priorE = null; try { + final var indexSort = state.segmentInfo.getIndexSort(); + if (indexSort != null && indexSort.getSort().length > 0) { + var primarySortField = indexSort.getSort()[0]; + var sortField = state.fieldInfos.fieldInfo(primarySortField.getField()); + if (sortField != null) { + primarySortFieldNumber = sortField.number; + } + } version = CodecUtil.checkIndexHeader( in, metaCodec, @@ -92,14 +101,7 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { ); readFields(in, state.fieldInfos); - final var indexSort = state.segmentInfo.getIndexSort(); - if (indexSort != null && indexSort.getSort().length > 0) { - var primarySortField = indexSort.getSort()[0]; - var sortField = state.fieldInfos.fieldInfo(primarySortField.getField()); - if (sortField != null) { - primarySortFieldNumber = sortField.number; - } - } + } catch (Throwable exception) { priorE = exception; } finally { @@ -441,9 +443,83 @@ public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.D } abstract static class BaseDenseNumericValues extends NumericDocValues implements BlockLoader.OptionalColumnAtATimeReader { + private final int maxDoc; + protected int doc = -1; + + BaseDenseNumericValues(int maxDoc) { + this.maxDoc = maxDoc; + } + + @Override + public final int docID() { + return doc; + } + + @Override + public final int nextDoc() throws IOException { + return advance(doc + 1); + } + + @Override + public final int advance(int target) throws IOException { + if (target >= maxDoc) { + return doc = NO_MORE_DOCS; + } + return doc = target; + } + + @Override + public final boolean advanceExact(int target) { + doc = target; + return true; + } + + @Override + public final long cost() { + return maxDoc; + } + + @Override + public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException { + return null; + } + abstract long lookAheadValueAt(int targetDoc) throws IOException; } + abstract static class BaseNumericValuesWithDISI extends NumericDocValues { + protected final IndexedDISI disi; + + BaseNumericValuesWithDISI(IndexedDISI disi) { + this.disi = disi; + } + + @Override + public final int advance(int target) throws IOException { + return disi.advance(target); + } + + @Override + public final boolean advanceExact(int target) throws IOException { + return disi.advanceExact(target); + } + + @Override + public final int nextDoc() throws IOException { + return disi.nextDoc(); + } + + @Override + public final int docID() { + return disi.docID(); + } + + @Override + public final long cost() { + return disi.cost(); + } + } + abstract static class BaseSortedSetDocValues extends SortedSetDocValues { final SortedSetEntry entry; @@ -958,9 +1034,13 @@ private static void readNumeric(IndexInput meta, NumericEntry entry) throws IOEx entry.numDocsWithField = meta.readInt(); if (entry.numValues > 0) { final int indexBlockShift = meta.readInt(); - // Special case, -1 means there are no blocks, so no need to load the metadata for it - // -1 is written when there the cardinality of a field is exactly one. - if (indexBlockShift != -1) { + if (indexBlockShift == -1) { + // single ordinal, no block index + } else if (indexBlockShift == -2) { + // encoded ordinal range, no block index + final int numOrds = meta.readVInt(); + entry.sortedOrdinals = DirectMonotonicReader.loadMeta(meta, numOrds + 1, DIRECT_MONOTONIC_BLOCK_SHIFT); + } else { entry.indexMeta = DirectMonotonicReader.loadMeta( meta, 1 + ((entry.numValues - 1) >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT), @@ -1073,6 +1153,49 @@ private abstract static class NumericValues { abstract long advance(long index) throws IOException; } + static final class SortedOrdinalReader { + final long maxOrd; + final DirectMonotonicReader startDocs; + private long currentIndex = -1; + private long rangeEndExclusive = -1; + + SortedOrdinalReader(long maxOrd, DirectMonotonicReader startDocs) { + this.maxOrd = Math.toIntExact(maxOrd); + this.startDocs = startDocs; + } + + long readValueAndAdvance(int doc) { + if (doc < rangeEndExclusive) { + return currentIndex; + } + // move to the next range + if (doc == rangeEndExclusive) { + currentIndex++; + } else { + currentIndex = searchRange(doc); + } + rangeEndExclusive = startDocs.get(currentIndex + 1); + return currentIndex; + } + + private long searchRange(int doc) { + long index = startDocs.binarySearch(currentIndex + 1, maxOrd, doc); + if (index < 0) { + index = -2 - index; + } + assert index < maxOrd : "invalid range " + index + " for doc " + doc + " in maxOrd " + maxOrd; + return index; + } + + long lookAheadValue(int targetDoc) { + if (targetDoc < rangeEndExclusive) { + return currentIndex; + } else { + return searchRange(targetDoc); + } + } + } + private NumericDocValues getNumeric(NumericEntry entry, long maxOrd) throws IOException { if (entry.docsWithFieldOffset == -2) { // empty @@ -1083,56 +1206,17 @@ private NumericDocValues getNumeric(NumericEntry entry, long maxOrd) throws IOEx // Special case for maxOrd 1, no need to read blocks and use ordinal 0 as only value if (entry.docsWithFieldOffset == -1) { // Special case when all docs have a value - return new BaseDenseNumericValues() { - - private final int maxDoc = ES819TSDBDocValuesProducer.this.maxDoc; - private int doc = -1; - + return new BaseDenseNumericValues(maxDoc) { @Override public long longValue() { // Only one ordinal! return 0L; } - @Override - public int docID() { - return doc; - } - - @Override - public int nextDoc() throws IOException { - return advance(doc + 1); - } - - @Override - public int advance(int target) throws IOException { - if (target >= maxDoc) { - return doc = NO_MORE_DOCS; - } - return doc = target; - } - - @Override - public boolean advanceExact(int target) { - doc = target; - return true; - } - - @Override - public long cost() { - return maxDoc; - } - @Override long lookAheadValueAt(int targetDoc) throws IOException { return 0L; // Only one ordinal! } - - @Override - public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) - throws IOException { - return null; - } }; } else { final IndexedDISI disi = new IndexedDISI( @@ -1143,36 +1227,47 @@ public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.D entry.denseRankPower, entry.numValues ); - return new NumericDocValues() { - - @Override - public int advance(int target) throws IOException { - return disi.advance(target); - } - - @Override - public boolean advanceExact(int target) throws IOException { - return disi.advanceExact(target); - } - + return new BaseNumericValuesWithDISI(disi) { @Override - public int nextDoc() throws IOException { - return disi.nextDoc(); + public long longValue() throws IOException { + return 0L; // Only one ordinal! } - + }; + } + } else if (entry.sortedOrdinals != null) { + final var ordinalsReader = new SortedOrdinalReader( + maxOrd, + DirectMonotonicReader.getInstance( + entry.sortedOrdinals, + data.randomAccessSlice(entry.valuesOffset, entry.valuesLength), + false + ) + ); + if (entry.docsWithFieldOffset == -1) { + return new BaseDenseNumericValues(maxDoc) { @Override - public int docID() { - return disi.docID(); + long lookAheadValueAt(int targetDoc) { + return ordinalsReader.lookAheadValue(targetDoc); } @Override - public long cost() { - return disi.cost(); + public long longValue() { + return ordinalsReader.readValueAndAdvance(doc); } - + }; + } else { + final var disi = new IndexedDISI( + data, + entry.docsWithFieldOffset, + entry.docsWithFieldLength, + entry.jumpTableEntryCount, + entry.denseRankPower, + entry.numValues + ); + return new BaseNumericValuesWithDISI(disi) { @Override public long longValue() { - return 0L; + return ordinalsReader.readValueAndAdvance(disi.docID()); } }; } @@ -1188,10 +1283,7 @@ public long longValue() { final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1; if (entry.docsWithFieldOffset == -1) { // dense - return new BaseDenseNumericValues() { - - private final int maxDoc = ES819TSDBDocValuesProducer.this.maxDoc; - private int doc = -1; + return new BaseDenseNumericValues(maxDoc) { private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE); private long currentBlockIndex = -1; private final long[] currentBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; @@ -1200,35 +1292,6 @@ public long longValue() { private long[] lookaheadBlock; private IndexInput lookaheadData = null; - @Override - public int docID() { - return doc; - } - - @Override - public int nextDoc() throws IOException { - return advance(doc + 1); - } - - @Override - public int advance(int target) throws IOException { - if (target >= maxDoc) { - return doc = NO_MORE_DOCS; - } - return doc = target; - } - - @Override - public boolean advanceExact(int target) { - doc = target; - return true; - } - - @Override - public long cost() { - return maxDoc; - } - @Override public long longValue() throws IOException { final int index = doc; @@ -1336,37 +1399,11 @@ static boolean isDense(int firstDocId, int lastDocId, int length) { entry.denseRankPower, entry.numValues ); - return new NumericDocValues() { - + return new BaseNumericValuesWithDISI(disi) { private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE); private long currentBlockIndex = -1; private final long[] currentBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; - @Override - public int advance(int target) throws IOException { - return disi.advance(target); - } - - @Override - public boolean advanceExact(int target) throws IOException { - return disi.advanceExact(target); - } - - @Override - public int nextDoc() throws IOException { - return disi.nextDoc(); - } - - @Override - public int docID() { - return disi.docID(); - } - - @Override - public long cost() { - return disi.cost(); - } - @Override public long longValue() throws IOException { final int index = disi.index(); @@ -1572,6 +1609,7 @@ static class NumericEntry { DirectMonotonicReader.Meta indexMeta; long valuesOffset; long valuesLength; + DirectMonotonicReader.Meta sortedOrdinals; } static class BinaryEntry { diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java index f0ce28f11a51a..1036d822c0a21 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java @@ -58,7 +58,11 @@ public void testDuel() throws IOException { Codec codec = new Elasticsearch900Lucene101Codec() { final DocValuesFormat docValuesFormat = randomBoolean() - ? new ES819TSDBDocValuesFormat() + ? new ES819TSDBDocValuesFormat( + ESTestCase.randomIntBetween(1, 4096), + ESTestCase.randomIntBetween(1, 512), + random().nextBoolean() + ) : new TestES87TSDBDocValuesFormat(); @Override diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java index 28a7f08bb8d27..03927cdba1452 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java @@ -62,7 +62,11 @@ public class ES819TSDBDocValuesFormatTests extends ES87TSDBDocValuesFormatTests private final Codec codec = new Elasticsearch900Lucene101Codec() { - final ES819TSDBDocValuesFormat docValuesFormat = new ES819TSDBDocValuesFormat(); + final ES819TSDBDocValuesFormat docValuesFormat = new ES819TSDBDocValuesFormat( + ESTestCase.randomIntBetween(1, 4096), + ESTestCase.randomIntBetween(1, 512), + random().nextBoolean() + ); @Override public DocValuesFormat getDocValuesFormatForField(String field) { diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java index d158236ecc7ac..247b75f2977b5 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java @@ -18,13 +18,15 @@ public class ES819TSDBDocValuesFormatVariableSkipIntervalTests extends ES87TSDBD @Override protected Codec getCodec() { // small interval size to test with many intervals - return TestUtil.alwaysDocValuesFormat(new ES819TSDBDocValuesFormat(random().nextInt(4, 16), random().nextBoolean())); + return TestUtil.alwaysDocValuesFormat( + new ES819TSDBDocValuesFormat(random().nextInt(4, 16), random().nextInt(1, 32), random().nextBoolean()) + ); } public void testSkipIndexIntervalSize() { IllegalArgumentException ex = expectThrows( IllegalArgumentException.class, - () -> new ES819TSDBDocValuesFormat(random().nextInt(Integer.MIN_VALUE, 2), random().nextBoolean()) + () -> new ES819TSDBDocValuesFormat(random().nextInt(Integer.MIN_VALUE, 2), random().nextInt(1, 32), random().nextBoolean()) ); assertTrue(ex.getMessage().contains("skipIndexIntervalSize must be > 1")); } From 08813b6cc5b7138e3a6dcb252fdb9afaf9b32d76 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Sun, 17 Aug 2025 16:28:33 -0700 Subject: [PATCH 02/13] Increase the threshold --- .../index/codec/tsdb/TSDBDocValuesMergeBenchmark.java | 2 +- .../index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java index 71164e35ad557..1637cd33ac0af 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java @@ -258,7 +258,7 @@ private static IndexWriterConfig createIndexWriterConfig(boolean optimizedMergeE ); config.setLeafSorter(DataStream.TIMESERIES_LEAF_READERS_SORTER); config.setMergePolicy(new LogByteSizeMergePolicy()); - var docValuesFormat = new ES819TSDBDocValuesFormat(4096, optimizedMergeEnabled); + var docValuesFormat = new ES819TSDBDocValuesFormat(4096, 512, optimizedMergeEnabled); config.setCodec(new Elasticsearch900Lucene101Codec() { @Override diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java index ccdc10786f65f..598ad41edb807 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java @@ -33,6 +33,7 @@ public class ES819TSDBDocValuesFormat extends org.apache.lucene.codecs.DocValues static final int NUMERIC_BLOCK_SHIFT = 7; public static final int NUMERIC_BLOCK_SIZE = 1 << NUMERIC_BLOCK_SHIFT; + public static final int MIN_DOC_PER_ORDINAL_FOR_ORDINAL_RANGE_ENCODING = 512; static final int NUMERIC_BLOCK_MASK = NUMERIC_BLOCK_SIZE - 1; static final int DIRECT_MONOTONIC_BLOCK_SHIFT = 16; static final String CODEC_NAME = "ES819TSDB"; @@ -110,7 +111,7 @@ private static boolean getOptimizedMergeEnabledDefault() { /** Default constructor. */ public ES819TSDBDocValuesFormat() { - this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, NUMERIC_BLOCK_SIZE, OPTIMIZED_MERGE_ENABLE_DEFAULT); + this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, MIN_DOC_PER_ORDINAL_FOR_ORDINAL_RANGE_ENCODING, OPTIMIZED_MERGE_ENABLE_DEFAULT); } /** Doc values fields format with specified skipIndexIntervalSize. */ From 40c30dac53062b7cbd99b8acc3fc25319205441e Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Sun, 17 Aug 2025 19:11:32 -0700 Subject: [PATCH 03/13] blockshift --- .../index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java | 3 ++- .../index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java | 5 +++-- .../index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java index 06f2d0b512b5e..c035abb6bbe43 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java @@ -175,6 +175,7 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer, // When a field is sorted, use ordinal range encode for long runs of the same ordinal. meta.writeInt(-2); meta.writeVInt(Math.toIntExact(maxOrd)); + meta.writeByte((byte) ES819TSDBDocValuesFormat.ORDINAL_RANGE_ENCODING_BLOCK_SHIFT); values = valuesProducer.getSortedNumeric(field); if (enableOptimizedMerge && numDocsWithValue < maxDoc) { disiAccumulator = new DISIAccumulator(dir, context, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER); @@ -183,7 +184,7 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer, meta, data, maxOrd + 1, - ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT + ES819TSDBDocValuesFormat.ORDINAL_RANGE_ENCODING_BLOCK_SHIFT ); long lastOrd = 0; startDocs.add(0); diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java index 598ad41edb807..27afdd2fb29e1 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java @@ -33,9 +33,10 @@ public class ES819TSDBDocValuesFormat extends org.apache.lucene.codecs.DocValues static final int NUMERIC_BLOCK_SHIFT = 7; public static final int NUMERIC_BLOCK_SIZE = 1 << NUMERIC_BLOCK_SHIFT; - public static final int MIN_DOC_PER_ORDINAL_FOR_ORDINAL_RANGE_ENCODING = 512; static final int NUMERIC_BLOCK_MASK = NUMERIC_BLOCK_SIZE - 1; static final int DIRECT_MONOTONIC_BLOCK_SHIFT = 16; + public static final int ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL = 512; + public static final int ORDINAL_RANGE_ENCODING_BLOCK_SHIFT = 12; static final String CODEC_NAME = "ES819TSDB"; static final String DATA_CODEC = "ES819TSDBDocValuesData"; static final String DATA_EXTENSION = "dvd"; @@ -111,7 +112,7 @@ private static boolean getOptimizedMergeEnabledDefault() { /** Default constructor. */ public ES819TSDBDocValuesFormat() { - this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, MIN_DOC_PER_ORDINAL_FOR_ORDINAL_RANGE_ENCODING, OPTIMIZED_MERGE_ENABLE_DEFAULT); + this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, OPTIMIZED_MERGE_ENABLE_DEFAULT); } /** Doc values fields format with specified skipIndexIntervalSize. */ diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index 12dec1f9c00d1..2e9b7877af782 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -47,7 +47,6 @@ import java.io.IOException; -import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT; import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.SKIP_INDEX_JUMP_LENGTH_PER_LEVEL; import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.SKIP_INDEX_MAX_LEVEL; import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.TERMS_DICT_BLOCK_LZ4_SHIFT; @@ -1069,7 +1068,8 @@ private static void readNumeric(IndexInput meta, NumericEntry entry) throws IOEx } else if (indexBlockShift == -2) { // encoded ordinal range, no block index final int numOrds = meta.readVInt(); - entry.sortedOrdinals = DirectMonotonicReader.loadMeta(meta, numOrds + 1, DIRECT_MONOTONIC_BLOCK_SHIFT); + final int blockShift = meta.readByte(); + entry.sortedOrdinals = DirectMonotonicReader.loadMeta(meta, numOrds + 1, blockShift); } else { entry.indexMeta = DirectMonotonicReader.loadMeta( meta, From 148094528c03c3cd1266bd3be8f6bda6d5aeae34 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Sun, 17 Aug 2025 19:40:53 -0700 Subject: [PATCH 04/13] enable merging --- .../index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index 2e9b7877af782..710cf061356b0 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -1270,7 +1270,7 @@ public long longValue() throws IOException { DirectMonotonicReader.getInstance( entry.sortedOrdinals, data.randomAccessSlice(entry.valuesOffset, entry.valuesLength), - false + true ) ); if (entry.docsWithFieldOffset == -1) { From f1824d970754a35fba5ca3275bf372902b8f9776 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Mon, 18 Aug 2025 09:26:24 -0700 Subject: [PATCH 05/13] renaming --- .../codec/tsdb/es819/ES819TSDBDocValuesProducer.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index 710cf061356b0..6565127deb46b 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -516,10 +516,10 @@ BlockLoader.Block tryRead(BlockLoader.SingletonLongBuilder builder, BlockLoader. } } - abstract static class BaseNumericValuesWithDISI extends NumericDocValues { + abstract static class BaseSparseNumericValues extends NumericDocValues { protected final IndexedDISI disi; - BaseNumericValuesWithDISI(IndexedDISI disi) { + BaseSparseNumericValues(IndexedDISI disi) { this.disi = disi; } @@ -1257,7 +1257,7 @@ long lookAheadValueAt(int targetDoc) throws IOException { entry.denseRankPower, entry.numValues ); - return new BaseNumericValuesWithDISI(disi) { + return new BaseSparseNumericValues(disi) { @Override public long longValue() throws IOException { return 0L; // Only one ordinal! @@ -1294,7 +1294,7 @@ public long longValue() { entry.denseRankPower, entry.numValues ); - return new BaseNumericValuesWithDISI(disi) { + return new BaseSparseNumericValues(disi) { @Override public long longValue() { return ordinalsReader.readValueAndAdvance(disi.docID()); @@ -1437,7 +1437,7 @@ static boolean isDense(int firstDocId, int lastDocId, int length) { entry.denseRankPower, entry.numValues ); - return new BaseNumericValuesWithDISI(disi) { + return new BaseSparseNumericValues(disi) { private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE); private long currentBlockIndex = -1; private final long[] currentBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; From d9bdf5eaddf9e5a679eed6e25878aaf086e1261b Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Mon, 18 Aug 2025 09:36:20 -0700 Subject: [PATCH 06/13] primarySortFieldNumber --- .../es819/ES819TSDBDocValuesConsumer.java | 14 +++----- .../es819/ES819TSDBDocValuesProducer.java | 35 ++++++++++++++----- 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java index c035abb6bbe43..8aaef4329d6c4 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java @@ -64,7 +64,7 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer { private final int skipIndexIntervalSize; private final int minDocsPerOrdinalForOrdinalRangeEncoding; final boolean enableOptimizedMerge; - private int primarySortField = -1; + private final int primarySortFieldNumber; ES819TSDBDocValuesConsumer( SegmentWriteState state, @@ -79,6 +79,7 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer { this.termsDictBuffer = new byte[1 << 14]; this.dir = state.directory; this.minDocsPerOrdinalForOrdinalRangeEncoding = minDocsPerOrdinalForOrdinalRangeEncoding; + this.primarySortFieldNumber = ES819TSDBDocValuesProducer.primarySortFieldNumber(state.segmentInfo, state.fieldInfos); this.context = state.context; boolean success = false; try { @@ -103,13 +104,6 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer { maxDoc = state.segmentInfo.maxDoc(); this.skipIndexIntervalSize = skipIndexIntervalSize; this.enableOptimizedMerge = enableOptimizedMerge; - final var indexSort = state.segmentInfo.getIndexSort(); - if (indexSort != null && indexSort.getSort().length > 0 && indexSort.getSort()[0].getReverse() == false) { - var sortField = state.fieldInfos.fieldInfo(indexSort.getSort()[0].getField()); - if (sortField != null) { - primarySortField = sortField.number; - } - } success = true; } finally { if (success == false) { @@ -136,7 +130,9 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOExcepti } private boolean shouldEncodeOrdinalRange(FieldInfo field, long maxOrd, int numDocsWithValue) { - return maxDoc > 1 && field.number == primarySortField && (numDocsWithValue / maxOrd) >= minDocsPerOrdinalForOrdinalRangeEncoding; + return maxDoc > 1 + && field.number == primarySortFieldNumber + && (numDocsWithValue / maxOrd) >= minDocsPerOrdinalForOrdinalRangeEncoding; } private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer, long maxOrd, OffsetsAccumulator offsetsAccumulator) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index 6565127deb46b..5c33f75fc3a99 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -24,6 +24,7 @@ import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; @@ -31,6 +32,7 @@ import org.apache.lucene.index.TermsEnum; import org.apache.lucene.internal.hppc.IntObjectHashMap; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.SortField; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.DataInput; @@ -53,7 +55,7 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { final IntObjectHashMap numerics; - private int primarySortFieldNumber = -1; + private final int primarySortFieldNumber; final IntObjectHashMap binaries; final IntObjectHashMap sorted; final IntObjectHashMap sortedSets; @@ -73,23 +75,17 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { this.sortedNumerics = new IntObjectHashMap<>(); this.skippers = new IntObjectHashMap<>(); this.maxDoc = state.segmentInfo.maxDoc(); + this.primarySortFieldNumber = primarySortFieldNumber(state.segmentInfo, state.fieldInfos); this.merging = false; // read in the entries from the metadata file. int version = -1; String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); + try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName)) { Throwable priorE = null; try { - final var indexSort = state.segmentInfo.getIndexSort(); - if (indexSort != null && indexSort.getSort().length > 0) { - var primarySortField = indexSort.getSort()[0]; - var sortField = state.fieldInfos.fieldInfo(primarySortField.getField()); - if (sortField != null) { - primarySortFieldNumber = sortField.number; - } - } version = CodecUtil.checkIndexHeader( in, metaCodec, @@ -149,6 +145,7 @@ private ES819TSDBDocValuesProducer( IndexInput data, int maxDoc, int version, + int primarySortFieldNumber, boolean merging ) { this.numerics = numerics; @@ -160,6 +157,7 @@ private ES819TSDBDocValuesProducer( this.data = data.clone(); this.maxDoc = maxDoc; this.version = version; + this.primarySortFieldNumber = primarySortFieldNumber; this.merging = merging; } @@ -175,6 +173,7 @@ public DocValuesProducer getMergeInstance() { data, maxDoc, version, + primarySortFieldNumber, true ); } @@ -1014,6 +1013,24 @@ public void close() throws IOException { data.close(); } + /** + * Returns the field number of the primary sort field for the given segment, + * if the field is sorted in ascending order. Returns {@code -1} if not found. + */ + static int primarySortFieldNumber(SegmentInfo segmentInfo, FieldInfos fieldInfos) { + final var indexSort = segmentInfo.getIndexSort(); + if (indexSort != null || indexSort.getSort().length > 0) { + SortField sortField = indexSort.getSort()[0]; + if (sortField.getReverse() == false) { + FieldInfo fieldInfo = fieldInfos.fieldInfo(sortField.getField()); + if (fieldInfo != null) { + return fieldInfo.number; + } + } + } + return -1; + } + private void readFields(IndexInput meta, FieldInfos infos) throws IOException { for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) { FieldInfo info = infos.fieldInfo(fieldNumber); From 10c91f955c44ba9f02540d17a410d0a82d1872e7 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Mon, 18 Aug 2025 09:52:01 -0700 Subject: [PATCH 07/13] javadoc --- .../codec/tsdb/es819/ES819TSDBDocValuesFormat.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java index 27afdd2fb29e1..911a5832461e8 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java @@ -35,8 +35,6 @@ public class ES819TSDBDocValuesFormat extends org.apache.lucene.codecs.DocValues public static final int NUMERIC_BLOCK_SIZE = 1 << NUMERIC_BLOCK_SHIFT; static final int NUMERIC_BLOCK_MASK = NUMERIC_BLOCK_SIZE - 1; static final int DIRECT_MONOTONIC_BLOCK_SHIFT = 16; - public static final int ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL = 512; - public static final int ORDINAL_RANGE_ENCODING_BLOCK_SHIFT = 12; static final String CODEC_NAME = "ES819TSDB"; static final String DATA_CODEC = "ES819TSDBDocValuesData"; static final String DATA_EXTENSION = "dvd"; @@ -106,6 +104,18 @@ private static boolean getOptimizedMergeEnabledDefault() { return Boolean.parseBoolean(System.getProperty(OPTIMIZED_MERGE_ENABLED_NAME, Boolean.TRUE.toString())); } + /** + * The default minimum number of documents per ordinal required to use ordinal range encoding. + * If the average number of documents per ordinal is below this threshold, it is more efficient to encode doc values in blocks. + * A much smaller value may be used in tests to exercise ordinal range encoding more frequently. + */ + public static final int ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL = 512; + + /** + * The block shift used in DirectMonotonicWriter when encoding the start docs of each ordinal with ordinal range encoding. + */ + public static final int ORDINAL_RANGE_ENCODING_BLOCK_SHIFT = 12; + final int skipIndexIntervalSize; final int minDocsPerOrdinalForOrdinalRangeEncoding; private final boolean enableOptimizedMerge; From 90a6567bc68a29f7403c590e9ed4d88bb0198f24 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Mon, 18 Aug 2025 09:58:27 -0700 Subject: [PATCH 08/13] naming --- .../index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java index 911a5832461e8..fbdef488b8318 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java @@ -117,7 +117,7 @@ private static boolean getOptimizedMergeEnabledDefault() { public static final int ORDINAL_RANGE_ENCODING_BLOCK_SHIFT = 12; final int skipIndexIntervalSize; - final int minDocsPerOrdinalForOrdinalRangeEncoding; + final int minDocsPerOrdinalForRangeEncoding; private final boolean enableOptimizedMerge; /** Default constructor. */ @@ -126,13 +126,13 @@ public ES819TSDBDocValuesFormat() { } /** Doc values fields format with specified skipIndexIntervalSize. */ - public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, int minDocsPerOrdinalForOrdinalRangeEncoding, boolean enableOptimizedMerge) { + public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, int minDocsPerOrdinalForRangeEncoding, boolean enableOptimizedMerge) { super(CODEC_NAME); if (skipIndexIntervalSize < 2) { throw new IllegalArgumentException("skipIndexIntervalSize must be > 1, got [" + skipIndexIntervalSize + "]"); } this.skipIndexIntervalSize = skipIndexIntervalSize; - this.minDocsPerOrdinalForOrdinalRangeEncoding = minDocsPerOrdinalForOrdinalRangeEncoding; + this.minDocsPerOrdinalForRangeEncoding = minDocsPerOrdinalForRangeEncoding; this.enableOptimizedMerge = enableOptimizedMerge; } @@ -141,7 +141,7 @@ public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOExcept return new ES819TSDBDocValuesConsumer( state, skipIndexIntervalSize, - minDocsPerOrdinalForOrdinalRangeEncoding, + minDocsPerOrdinalForRangeEncoding, enableOptimizedMerge, DATA_CODEC, DATA_EXTENSION, From 3a0148a1e9246e1e40b79cd5661084417d6850ca Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Mon, 18 Aug 2025 11:09:58 -0700 Subject: [PATCH 09/13] oops --- .../index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index 3c8e8b0003ae4..50f2367641220 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -1019,7 +1019,7 @@ public void close() throws IOException { */ static int primarySortFieldNumber(SegmentInfo segmentInfo, FieldInfos fieldInfos) { final var indexSort = segmentInfo.getIndexSort(); - if (indexSort != null || indexSort.getSort().length > 0) { + if (indexSort != null && indexSort.getSort().length > 0) { SortField sortField = indexSort.getSort()[0]; if (sortField.getReverse() == false) { FieldInfo fieldInfo = fieldInfos.fieldInfo(sortField.getField()); From 6bdfd698fe18e306dec9552d95827934bcd3cd34 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Mon, 18 Aug 2025 22:24:42 -0700 Subject: [PATCH 10/13] Add tests --- .../codec/tsdb/TsdbDocValueBwcTests.java | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java index 9c41e7a80ed66..a96011b2bcfe2 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java @@ -21,10 +21,14 @@ import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LogByteSizeMergePolicy; import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.NoMergePolicy; +import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SegmentReader; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortedNumericSortField; @@ -45,6 +49,9 @@ import java.util.Arrays; import java.util.Locale; import java.util.Map; +import java.util.function.IntSupplier; + +import static org.hamcrest.Matchers.equalTo; public class TsdbDocValueBwcTests extends ESTestCase { @@ -260,6 +267,95 @@ void testMixedIndex(Codec oldCodec, Codec newCodec) throws IOException, NoSuchFi } } + public void testEncodeOrdinalRange() throws IOException { + try (var dir = newDirectory()) { + int iters = between(5, 20); + for (int iter = 0; iter < iters; iter++) { + var config = new IndexWriterConfig(); + String hostNameField = "host.name"; + String hostIdField = "host.id"; + config.setIndexSort(new Sort(new SortField(hostNameField, SortField.Type.STRING, false))); + int thresholdRange = random().nextInt(3); + IntSupplier nextOrdinalRangeThreshold = () -> { + if (thresholdRange == 0) { + return between(1, 5); + } else if (thresholdRange == 1) { + return between(5, 20); + } else { + return Integer.MAX_VALUE; + } + }; + config.setCodec( + TestUtil.alwaysDocValuesFormat( + new ES819TSDBDocValuesFormat( + random().nextInt(16, 128), + nextOrdinalRangeThreshold.getAsInt(), + random().nextBoolean() + ) + ) + ); + try (IndexWriter writer = new IndexWriter(dir, config)) { + int numDocs = between(50, 500); + for (int d = 0; d < numDocs; d++) { + Document doc = new Document(); + int hostId = random().nextInt(100); + String hostName = String.format(Locale.ROOT, "host-%02d", hostId); + doc.add(new SortedDocValuesField(hostNameField, new BytesRef(hostName))); + doc.add(new NumericDocValuesField(hostIdField, hostId)); + writer.addDocument(doc); + if (random().nextInt(100) <= 5) { + Document dummy = new Document(); + dummy.add(new SortedDocValuesField("dummy", new BytesRef("dummy"))); + writer.addDocument(dummy); + } + if (random().nextInt(100) <= 10) { + writer.flush(); + } + if (random().nextInt(100) <= 5) { + writer.forceMerge(between(1, 10)); + } + } + } + try (DirectoryReader reader = DirectoryReader.open(dir)) { + for (LeafReaderContext leaf : reader.leaves()) { + // sequential + NumericDocValues hostIdDv = leaf.reader().getNumericDocValues(hostIdField); + SortedDocValues hostNameDv = leaf.reader().getSortedDocValues(hostNameField); + { + int docId; + while ((docId = hostIdDv.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + assertTrue(hostNameDv.advanceExact(docId)); + String hostName = hostNameDv.lookupOrd(hostNameDv.ordValue()).utf8ToString(); + String expectedHostName = String.format(Locale.ROOT, "host-%02d", hostIdDv.longValue()); + assertThat(hostName, equalTo(expectedHostName)); + } + } + int checkIters = between(1, 20); + int nextDoc = 0; + for (int n = 0; n < checkIters; n++) { + if (nextDoc >= leaf.reader().maxDoc()) { + nextDoc = 0; + } + nextDoc = nextDoc + random().nextInt(leaf.reader().maxDoc() - nextDoc); + if (hostIdDv.docID() == DocIdSetIterator.NO_MORE_DOCS || nextDoc > hostIdDv.docID()) { + hostIdDv = leaf.reader().getNumericDocValues(hostIdField); + hostNameDv = leaf.reader().getSortedDocValues(hostNameField); + } + if (hostIdDv.advanceExact(nextDoc)) { + assertTrue(hostNameDv.advanceExact(nextDoc)); + String hostName = hostNameDv.lookupOrd(hostNameDv.ordValue()).utf8ToString(); + String expectedHostName = String.format(Locale.ROOT, "host-%02d", hostIdDv.longValue()); + assertThat(hostName, equalTo(expectedHostName)); + } else { + assertFalse(hostNameDv.advanceExact(nextDoc)); + } + } + } + } + } + } + } + private IndexWriterConfig getTimeSeriesIndexWriterConfig(String hostnameField, String timestampField, Codec codec) { var config = new IndexWriterConfig(); config.setIndexSort( From 8dc9641bbe80d42f55ec837490b4647c828a6740 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Mon, 18 Aug 2025 22:26:22 -0700 Subject: [PATCH 11/13] remove toIntExact --- .../index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index 50f2367641220..3ddeca1cd25a5 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -1207,7 +1207,7 @@ static final class SortedOrdinalReader { private long rangeEndExclusive = -1; SortedOrdinalReader(long maxOrd, DirectMonotonicReader startDocs) { - this.maxOrd = Math.toIntExact(maxOrd); + this.maxOrd = maxOrd; this.startDocs = startDocs; } From 2023dc01a48013b283e900b98ca07546b4cb87cc Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Mon, 18 Aug 2025 22:36:42 -0700 Subject: [PATCH 12/13] deleted docs --- .../codec/tsdb/TsdbDocValueBwcTests.java | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java index a96011b2bcfe2..37a62b3605c2c 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java @@ -13,6 +13,7 @@ import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.document.Document; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; @@ -299,10 +300,16 @@ public void testEncodeOrdinalRange() throws IOException { for (int d = 0; d < numDocs; d++) { Document doc = new Document(); int hostId = random().nextInt(100); - String hostName = String.format(Locale.ROOT, "host-%02d", hostId); - doc.add(new SortedDocValuesField(hostNameField, new BytesRef(hostName))); - doc.add(new NumericDocValuesField(hostIdField, hostId)); - writer.addDocument(doc); + if (random().nextInt(100) <= 10) { + writer.deleteDocuments(LongPoint.newExactQuery(hostIdField, hostId)); + } else { + String hostName = String.format(Locale.ROOT, "host-%02d", hostId); + doc.add(new LongPoint("host.id", hostId)); + doc.add(new SortedDocValuesField(hostNameField, new BytesRef(hostName))); + doc.add(new NumericDocValuesField(hostIdField, hostId)); + writer.addDocument(doc); + } + if (random().nextInt(100) <= 5) { Document dummy = new Document(); dummy.add(new SortedDocValuesField("dummy", new BytesRef("dummy"))); @@ -321,6 +328,10 @@ public void testEncodeOrdinalRange() throws IOException { // sequential NumericDocValues hostIdDv = leaf.reader().getNumericDocValues(hostIdField); SortedDocValues hostNameDv = leaf.reader().getSortedDocValues(hostNameField); + if (hostIdDv == null) { + assertNull(hostNameDv); + continue; + } { int docId; while ((docId = hostIdDv.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { From 030196850939225993dcc826ca8b68d0364014dc Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Mon, 18 Aug 2025 22:54:40 -0700 Subject: [PATCH 13/13] Update docs/changelog/133018.yaml --- docs/changelog/133018.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/133018.yaml diff --git a/docs/changelog/133018.yaml b/docs/changelog/133018.yaml new file mode 100644 index 0000000000000..d469f99f92c74 --- /dev/null +++ b/docs/changelog/133018.yaml @@ -0,0 +1,5 @@ +pr: 133018 +summary: Add ordinal range encode for tsid +area: TSDB +type: enhancement +issues: []