Skip to content

Commit 30f327e

Browse files
kkrik-eselasticsearchmachine
andauthored
Introduce large block doc values format (#138182)
* Add accessors for block size constants in tsdb codec * add large block format * update codec name * sync * use new version * [CI] Auto commit changes from spotless * fix * fix and tests * assert * refactor * fix --------- Co-authored-by: elasticsearchmachine <[email protected]>
1 parent 7bb054f commit 30f327e

File tree

8 files changed

+139
-75
lines changed

8 files changed

+139
-75
lines changed

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
7373
private final int minDocsPerOrdinalForOrdinalRangeEncoding;
7474
final boolean enableOptimizedMerge;
7575
private final int primarySortFieldNumber;
76+
private final int numericBlockShift;
77+
private final int numericBlockSize;
7678
final SegmentWriteState state;
7779
final BinaryDVCompressionMode binaryDVCompressionMode;
7880
private final boolean enablePerBlockCompression; // only false for testing
@@ -84,6 +86,7 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
8486
int skipIndexIntervalSize,
8587
int minDocsPerOrdinalForOrdinalRangeEncoding,
8688
boolean enableOptimizedMerge,
89+
int numericBlockShift,
8790
String dataCodec,
8891
String dataExtension,
8992
String metaCodec,
@@ -97,6 +100,9 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
97100
this.minDocsPerOrdinalForOrdinalRangeEncoding = minDocsPerOrdinalForOrdinalRangeEncoding;
98101
this.primarySortFieldNumber = ES819TSDBDocValuesProducer.primarySortFieldNumber(state.segmentInfo, state.fieldInfos);
99102
this.context = state.context;
103+
this.numericBlockShift = numericBlockShift;
104+
this.numericBlockSize = 1 << numericBlockShift;
105+
100106
boolean success = false;
101107
try {
102108
final String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
@@ -108,6 +114,7 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
108114
state.segmentInfo.getId(),
109115
state.segmentSuffix
110116
);
117+
111118
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
112119
meta = state.directory.createOutput(metaName, state.context);
113120
CodecUtil.writeIndexHeader(
@@ -117,6 +124,8 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
117124
state.segmentInfo.getId(),
118125
state.segmentSuffix
119126
);
127+
meta.writeByte((byte) numericBlockShift);
128+
120129
maxDoc = state.segmentInfo.maxDoc();
121130
this.skipIndexIntervalSize = skipIndexIntervalSize;
122131
this.enableOptimizedMerge = enableOptimizedMerge;
@@ -218,13 +227,13 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer,
218227
indexWriter = DirectMonotonicWriter.getInstance(
219228
meta,
220229
new ByteBuffersIndexOutput(indexOut, "temp-dv-index", "temp-dv-index"),
221-
1L + ((numValues - 1) >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT),
230+
1L + ((numValues - 1) >>> numericBlockShift),
222231
ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT
223232
);
224233
meta.writeInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
225-
final long[] buffer = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
234+
final long[] buffer = new long[numericBlockSize];
226235
int bufferSize = 0;
227-
final TSDBDocValuesEncoder encoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
236+
final TSDBDocValuesEncoder encoder = new TSDBDocValuesEncoder(numericBlockSize);
228237
values = valuesProducer.getSortedNumeric(field);
229238
final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1;
230239
if (valuesProducer.mergeStats.supported() && numDocsWithValue < maxDoc) {
@@ -240,7 +249,7 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer,
240249
}
241250
for (int i = 0; i < count; ++i) {
242251
buffer[bufferSize++] = values.nextValue();
243-
if (bufferSize == ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE) {
252+
if (bufferSize == numericBlockSize) {
244253
indexWriter.add(data.getFilePointer() - valuesDataOffset);
245254
if (maxOrd >= 0) {
246255
encoder.encodeOrdinals(buffer, data, bitsPerOrd);
@@ -254,7 +263,7 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer,
254263
if (bufferSize > 0) {
255264
indexWriter.add(data.getFilePointer() - valuesDataOffset);
256265
// Fill unused slots in the block with zeroes rather than junk
257-
Arrays.fill(buffer, bufferSize, ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE, 0L);
266+
Arrays.fill(buffer, bufferSize, numericBlockSize, 0L);
258267
if (maxOrd >= 0) {
259268
encoder.encodeOrdinals(buffer, data, bitsPerOrd);
260269
} else {

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,7 @@
3737
public class ES819TSDBDocValuesFormat extends org.apache.lucene.codecs.DocValuesFormat {
3838

3939
static final int NUMERIC_BLOCK_SHIFT = 7;
40-
public static final int NUMERIC_BLOCK_SIZE = 1 << NUMERIC_BLOCK_SHIFT;
41-
static final int NUMERIC_BLOCK_MASK = NUMERIC_BLOCK_SIZE - 1;
40+
static final int NUMERIC_LARGE_BLOCK_SHIFT = 9;
4241
static final int DIRECT_MONOTONIC_BLOCK_SHIFT = 16;
4342
static final String CODEC_NAME = "ES819TSDB";
4443
static final String DATA_CODEC = "ES819TSDBDocValuesData";
@@ -53,7 +52,8 @@ public class ES819TSDBDocValuesFormat extends org.apache.lucene.codecs.DocValues
5352

5453
static final int VERSION_START = 0;
5554
static final int VERSION_BINARY_DV_COMPRESSION = 1;
56-
static final int VERSION_CURRENT = VERSION_BINARY_DV_COMPRESSION;
55+
static final int VERSION_NUMERIC_LARGE_BLOCKS = 2;
56+
static final int VERSION_CURRENT = VERSION_NUMERIC_LARGE_BLOCKS;
5757

5858
static final int TERMS_DICT_BLOCK_LZ4_SHIFT = 6;
5959
static final int TERMS_DICT_BLOCK_LZ4_SIZE = 1 << TERMS_DICT_BLOCK_LZ4_SHIFT;
@@ -130,14 +130,18 @@ private static boolean getOptimizedMergeEnabledDefault() {
130130
*/
131131
public static final int ORDINAL_RANGE_ENCODING_BLOCK_SHIFT = 12;
132132

133+
final int numericBlockShift;
133134
final int skipIndexIntervalSize;
134135
final int minDocsPerOrdinalForRangeEncoding;
135136
final boolean enableOptimizedMerge;
136137
final BinaryDVCompressionMode binaryDVCompressionMode;
137138
final boolean enablePerBlockCompression;
138139

139-
/** Default constructor. */
140140
public ES819TSDBDocValuesFormat() {
141+
this(NUMERIC_BLOCK_SHIFT);
142+
}
143+
144+
public ES819TSDBDocValuesFormat(int numericBlockShift) {
141145
this(
142146
DEFAULT_SKIP_INDEX_INTERVAL_SIZE,
143147
ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL,
@@ -153,7 +157,8 @@ public ES819TSDBDocValuesFormat(BinaryDVCompressionMode binaryDVCompressionMode)
153157
ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL,
154158
OPTIMIZED_MERGE_ENABLE_DEFAULT,
155159
binaryDVCompressionMode,
156-
true
160+
true,
161+
NUMERIC_BLOCK_SHIFT
157162
);
158163
}
159164

@@ -163,7 +168,8 @@ public ES819TSDBDocValuesFormat(BinaryDVCompressionMode binaryDVCompressionMode,
163168
ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL,
164169
OPTIMIZED_MERGE_ENABLE_DEFAULT,
165170
binaryDVCompressionMode,
166-
enablePerBlockCompression
171+
enablePerBlockCompression,
172+
NUMERIC_BLOCK_SHIFT
167173
);
168174
}
169175

@@ -174,8 +180,27 @@ public ES819TSDBDocValuesFormat(
174180
boolean enableOptimizedMerge,
175181
BinaryDVCompressionMode binaryDVCompressionMode,
176182
final boolean enablePerBlockCompression
183+
) {
184+
this(
185+
skipIndexIntervalSize,
186+
minDocsPerOrdinalForRangeEncoding,
187+
enableOptimizedMerge,
188+
binaryDVCompressionMode,
189+
enablePerBlockCompression,
190+
NUMERIC_BLOCK_SHIFT
191+
);
192+
}
193+
194+
public ES819TSDBDocValuesFormat(
195+
int skipIndexIntervalSize,
196+
int minDocsPerOrdinalForRangeEncoding,
197+
boolean enableOptimizedMerge,
198+
BinaryDVCompressionMode binaryDVCompressionMode,
199+
final boolean enablePerBlockCompression,
200+
final int numericBlockShift
177201
) {
178202
super(CODEC_NAME);
203+
assert numericBlockShift == NUMERIC_BLOCK_SHIFT || numericBlockShift == NUMERIC_LARGE_BLOCK_SHIFT : numericBlockShift;
179204
if (skipIndexIntervalSize < 2) {
180205
throw new IllegalArgumentException("skipIndexIntervalSize must be > 1, got [" + skipIndexIntervalSize + "]");
181206
}
@@ -184,6 +209,7 @@ public ES819TSDBDocValuesFormat(
184209
this.enableOptimizedMerge = enableOptimizedMerge;
185210
this.binaryDVCompressionMode = binaryDVCompressionMode;
186211
this.enablePerBlockCompression = enablePerBlockCompression;
212+
this.numericBlockShift = numericBlockShift;
187213
}
188214

189215
@Override
@@ -195,6 +221,7 @@ public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOExcept
195221
skipIndexIntervalSize,
196222
minDocsPerOrdinalForRangeEncoding,
197223
enableOptimizedMerge,
224+
numericBlockShift,
198225
DATA_CODEC,
199226
DATA_EXTENSION,
200227
META_CODEC,

0 commit comments

Comments
 (0)