Skip to content

Commit 52d6839

Browse files
authored
Prepare tsdb doc values format for merging optimizations. (#125933)
The change contains the following changes: - The numDocsWithField field moved from SortedNumericEntry to NumericEntry. Making this statistic always available. - Store jump table after values in ES87TSDBDocValuesConsumer#writeField(...). Currently it is stored before storing values. This will allow us later to iterate over the SortedNumericDocValues once. When merging, this is expensive as a merge sort on the fly is being executed. This change will allow all the optimizations that are listed in #125403
1 parent 40dd91b commit 52d6839

16 files changed

+2774
-13
lines changed

server/src/main/java/module-info.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,10 @@
443443
org.elasticsearch.index.codec.bloomfilter.ES85BloomFilterPostingsFormat,
444444
org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat,
445445
org.elasticsearch.index.codec.postings.ES812PostingsFormat;
446-
provides org.apache.lucene.codecs.DocValuesFormat with org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat;
446+
provides org.apache.lucene.codecs.DocValuesFormat
447+
with
448+
org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat,
449+
org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat;
447450
provides org.apache.lucene.codecs.KnnVectorsFormat
448451
with
449452
org.elasticsearch.index.codec.vectors.ES813FlatVectorFormat,

server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import org.elasticsearch.index.IndexSettings;
2020
import org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat;
2121
import org.elasticsearch.index.codec.postings.ES812PostingsFormat;
22-
import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat;
22+
import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat;
2323
import org.elasticsearch.index.mapper.CompletionFieldMapper;
2424
import org.elasticsearch.index.mapper.IdFieldMapper;
2525
import org.elasticsearch.index.mapper.Mapper;
@@ -34,7 +34,7 @@ public class PerFieldFormatSupplier {
3434

3535
private static final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat();
3636
private static final KnnVectorsFormat knnVectorsFormat = new Lucene99HnswVectorsFormat();
37-
private static final ES87TSDBDocValuesFormat tsdbDocValuesFormat = new ES87TSDBDocValuesFormat();
37+
private static final ES819TSDBDocValuesFormat tsdbDocValuesFormat = new ES819TSDBDocValuesFormat();
3838
private static final ES812PostingsFormat es812PostingsFormat = new ES812PostingsFormat();
3939
private static final PostingsFormat completionPostingsFormat = PostingsFormat.forName("Completion101");
4040

server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormat.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ public class ES87TSDBDocValuesFormat extends org.apache.lucene.codecs.DocValuesF
7575
}
7676
}
7777

78-
private final int skipIndexIntervalSize;
78+
final int skipIndexIntervalSize;
7979

8080
/** Default constructor. */
8181
public ES87TSDBDocValuesFormat() {
@@ -93,7 +93,7 @@ public ES87TSDBDocValuesFormat(int skipIndexIntervalSize) {
9393

9494
@Override
9595
public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
96-
return new ES87TSDBDocValuesConsumer(state, skipIndexIntervalSize, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
96+
throw new UnsupportedOperationException("writing es87 doc values is no longer supported");
9797
}
9898

9999
@Override

server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
import static org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat.SKIP_INDEX_MAX_LEVEL;
5050
import static org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat.TERMS_DICT_BLOCK_LZ4_SHIFT;
5151

52-
public class ES87TSDBDocValuesProducer extends DocValuesProducer {
52+
final class ES87TSDBDocValuesProducer extends DocValuesProducer {
5353
private final IntObjectHashMap<NumericEntry> numerics;
5454
private final IntObjectHashMap<BinaryEntry> binaries;
5555
private final IntObjectHashMap<SortedEntry> sorted;

server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBDocValuesEncoder.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ private void forEncode(int token, int tokenBits, long[] in, DataOutput out) thro
175175
/**
176176
* Encode the given longs using a combination of delta-coding, GCD factorization and bit packing.
177177
*/
178-
void encode(long[] in, DataOutput out) throws IOException {
178+
public void encode(long[] in, DataOutput out) throws IOException {
179179
assert in.length == numericBlockSize;
180180

181181
deltaEncode(0, 0, in, out);
@@ -193,7 +193,7 @@ void encode(long[] in, DataOutput out) throws IOException {
193193
* <li>3: cycle</li>
194194
* </ul>
195195
*/
196-
void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOException {
196+
public void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOException {
197197
assert in.length == numericBlockSize;
198198
int numRuns = 1;
199199
long firstValue = in[0];
@@ -260,7 +260,7 @@ void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOExceptio
260260
}
261261
}
262262

263-
void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException {
263+
public void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException {
264264
assert out.length == numericBlockSize : out.length;
265265

266266
long v1 = in.readVLong();
@@ -294,7 +294,7 @@ void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException
294294
}
295295

296296
/** Decode longs that have been encoded with {@link #encode}. */
297-
void decode(DataInput in, long[] out) throws IOException {
297+
public void decode(DataInput in, long[] out) throws IOException {
298298
assert out.length == numericBlockSize : out.length;
299299

300300
final int token = in.readVInt();

0 commit comments

Comments
 (0)