Skip to content

Commit 3d48dd5

Browse files
authored
Add ordinal range encode for tsid (#133018)
When a keyword is the primary sort field, we store the starting document of each ordinal instead of blocks of ordinals. By default, this is not enabled if the average number of documents per ordinal is less than 512, as storing block values may be more efficient and safer. Reading a large range of documents—a common pattern in ES|QL—can be more efficient with this approach.
1 parent e469514 commit 3d48dd5

File tree

9 files changed

+388
-152
lines changed

9 files changed

+388
-152
lines changed

benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ private static IndexWriterConfig createIndexWriterConfig(boolean optimizedMergeE
258258
);
259259
config.setLeafSorter(DataStream.TIMESERIES_LEAF_READERS_SORTER);
260260
config.setMergePolicy(new LogByteSizeMergePolicy());
261-
var docValuesFormat = new ES819TSDBDocValuesFormat(4096, optimizedMergeEnabled);
261+
var docValuesFormat = new ES819TSDBDocValuesFormat(4096, 512, optimizedMergeEnabled);
262262
config.setCodec(new Elasticsearch900Lucene101Codec() {
263263

264264
@Override

docs/changelog/133018.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 133018
2+
summary: Add ordinal range encode for tsid
3+
area: TSDB
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java

Lines changed: 56 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,14 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
6262
final int maxDoc;
6363
private byte[] termsDictBuffer;
6464
private final int skipIndexIntervalSize;
65+
private final int minDocsPerOrdinalForOrdinalRangeEncoding;
6566
final boolean enableOptimizedMerge;
67+
private final int primarySortFieldNumber;
6668

6769
ES819TSDBDocValuesConsumer(
6870
SegmentWriteState state,
6971
int skipIndexIntervalSize,
72+
int minDocsPerOrdinalForOrdinalRangeEncoding,
7073
boolean enableOptimizedMerge,
7174
String dataCodec,
7275
String dataExtension,
@@ -75,6 +78,8 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
7578
) throws IOException {
7679
this.termsDictBuffer = new byte[1 << 14];
7780
this.dir = state.directory;
81+
this.minDocsPerOrdinalForOrdinalRangeEncoding = minDocsPerOrdinalForOrdinalRangeEncoding;
82+
this.primarySortFieldNumber = ES819TSDBDocValuesProducer.primarySortFieldNumber(state.segmentInfo, state.fieldInfos);
7883
this.context = state.context;
7984
boolean success = false;
8085
try {
@@ -124,6 +129,12 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOExcepti
124129
writeField(field, producer, -1, null);
125130
}
126131

132+
private boolean shouldEncodeOrdinalRange(FieldInfo field, long maxOrd, int numDocsWithValue) {
133+
return maxDoc > 1
134+
&& field.number == primarySortFieldNumber
135+
&& (numDocsWithValue / maxOrd) >= minDocsPerOrdinalForOrdinalRangeEncoding;
136+
}
137+
127138
private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer, long maxOrd, OffsetsAccumulator offsetsAccumulator)
128139
throws IOException {
129140
int numDocsWithValue = 0;
@@ -149,19 +160,53 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer,
149160
try {
150161
if (numValues > 0) {
151162
assert numDocsWithValue > 0;
152-
// Special case for maxOrd of 1, signal -1 that no blocks will be written
153-
meta.writeInt(maxOrd != 1 ? ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT : -1);
154163
final ByteBuffersDataOutput indexOut = new ByteBuffersDataOutput();
155-
final DirectMonotonicWriter indexWriter = DirectMonotonicWriter.getInstance(
156-
meta,
157-
new ByteBuffersIndexOutput(indexOut, "temp-dv-index", "temp-dv-index"),
158-
1L + ((numValues - 1) >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT),
159-
ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT
160-
);
164+
DirectMonotonicWriter indexWriter = null;
161165

162166
final long valuesDataOffset = data.getFilePointer();
163-
// Special case for maxOrd of 1, skip writing the blocks
164-
if (maxOrd != 1) {
167+
if (maxOrd == 1) {
168+
// Special case for maxOrd of 1, signal -1 that no blocks will be written
169+
meta.writeInt(-1);
170+
} else if (shouldEncodeOrdinalRange(field, maxOrd, numDocsWithValue)) {
171+
// When a field is sorted, use ordinal range encode for long runs of the same ordinal.
172+
meta.writeInt(-2);
173+
meta.writeVInt(Math.toIntExact(maxOrd));
174+
meta.writeByte((byte) ES819TSDBDocValuesFormat.ORDINAL_RANGE_ENCODING_BLOCK_SHIFT);
175+
values = valuesProducer.getSortedNumeric(field);
176+
if (enableOptimizedMerge && numDocsWithValue < maxDoc) {
177+
disiAccumulator = new DISIAccumulator(dir, context, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
178+
}
179+
DirectMonotonicWriter startDocs = DirectMonotonicWriter.getInstance(
180+
meta,
181+
data,
182+
maxOrd + 1,
183+
ES819TSDBDocValuesFormat.ORDINAL_RANGE_ENCODING_BLOCK_SHIFT
184+
);
185+
long lastOrd = 0;
186+
startDocs.add(0);
187+
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
188+
if (disiAccumulator != null) {
189+
disiAccumulator.addDocId(doc);
190+
}
191+
if (offsetsAccumulator != null) {
192+
offsetsAccumulator.addDoc(1);
193+
}
194+
final long nextOrd = values.nextValue();
195+
if (nextOrd != lastOrd) {
196+
lastOrd = nextOrd;
197+
startDocs.add(doc);
198+
}
199+
}
200+
startDocs.add(maxDoc);
201+
startDocs.finish();
202+
} else {
203+
indexWriter = DirectMonotonicWriter.getInstance(
204+
meta,
205+
new ByteBuffersIndexOutput(indexOut, "temp-dv-index", "temp-dv-index"),
206+
1L + ((numValues - 1) >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT),
207+
ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT
208+
);
209+
meta.writeInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
165210
final long[] buffer = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
166211
int bufferSize = 0;
167212
final TSDBDocValuesEncoder encoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
@@ -204,8 +249,7 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer,
204249
}
205250

206251
final long valuesDataLength = data.getFilePointer() - valuesDataOffset;
207-
if (maxOrd != 1) {
208-
// Special case for maxOrd of 1, indexWriter isn't really used, so no need to invoke finish() method.
252+
if (indexWriter != null) {
209253
indexWriter.finish();
210254
}
211255
final long indexDataOffset = data.getFilePointer();

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,21 +104,35 @@ private static boolean getOptimizedMergeEnabledDefault() {
104104
return Boolean.parseBoolean(System.getProperty(OPTIMIZED_MERGE_ENABLED_NAME, Boolean.TRUE.toString()));
105105
}
106106

107+
/**
108+
* The default minimum number of documents per ordinal required to use ordinal range encoding.
109+
* If the average number of documents per ordinal is below this threshold, it is more efficient to encode doc values in blocks.
110+
* A much smaller value may be used in tests to exercise ordinal range encoding more frequently.
111+
*/
112+
public static final int ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL = 512;
113+
114+
/**
115+
* The block shift used in DirectMonotonicWriter when encoding the start docs of each ordinal with ordinal range encoding.
116+
*/
117+
public static final int ORDINAL_RANGE_ENCODING_BLOCK_SHIFT = 12;
118+
107119
final int skipIndexIntervalSize;
120+
final int minDocsPerOrdinalForRangeEncoding;
108121
private final boolean enableOptimizedMerge;
109122

110123
/** Default constructor. */
111124
public ES819TSDBDocValuesFormat() {
112-
this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, OPTIMIZED_MERGE_ENABLE_DEFAULT);
125+
this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, OPTIMIZED_MERGE_ENABLE_DEFAULT);
113126
}
114127

115128
/** Doc values fields format with specified skipIndexIntervalSize. */
116-
public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, boolean enableOptimizedMerge) {
129+
public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, int minDocsPerOrdinalForRangeEncoding, boolean enableOptimizedMerge) {
117130
super(CODEC_NAME);
118131
if (skipIndexIntervalSize < 2) {
119132
throw new IllegalArgumentException("skipIndexIntervalSize must be > 1, got [" + skipIndexIntervalSize + "]");
120133
}
121134
this.skipIndexIntervalSize = skipIndexIntervalSize;
135+
this.minDocsPerOrdinalForRangeEncoding = minDocsPerOrdinalForRangeEncoding;
122136
this.enableOptimizedMerge = enableOptimizedMerge;
123137
}
124138

@@ -127,6 +141,7 @@ public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOExcept
127141
return new ES819TSDBDocValuesConsumer(
128142
state,
129143
skipIndexIntervalSize,
144+
minDocsPerOrdinalForRangeEncoding,
130145
enableOptimizedMerge,
131146
DATA_CODEC,
132147
DATA_EXTENSION,

0 commit comments

Comments
 (0)