Skip to content

Commit 834f4df

Browse files
authored
[8.x] Prepare tsdb doc values format for merging optimizations. (#126261)
Backporting #125933 to 8.x branch. The change contains the following changes: - The numDocsWithField field moved from SortedNumericEntry to NumericEntry. Making this statistic always available. - Store jump table after values in ES87TSDBDocValuesConsumer#writeField(...). Currently it is stored before storing values. This will allow us later to iterate over the SortedNumericDocValues once. When merging, this is expensive as a merge sort on the fly is being executed. This change will allow all the optimizations that are listed in #125403
1 parent cf5292e commit 834f4df

14 files changed

+2408
-12
lines changed

server/src/main/java/module-info.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -447,7 +447,10 @@
447447
org.elasticsearch.index.codec.bloomfilter.ES85BloomFilterPostingsFormat,
448448
org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat,
449449
org.elasticsearch.index.codec.postings.ES812PostingsFormat;
450-
provides org.apache.lucene.codecs.DocValuesFormat with org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat;
450+
provides org.apache.lucene.codecs.DocValuesFormat
451+
with
452+
org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat,
453+
org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat;
451454
provides org.apache.lucene.codecs.KnnVectorsFormat
452455
with
453456
org.elasticsearch.index.codec.vectors.ES813FlatVectorFormat,

server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import org.elasticsearch.index.IndexSettings;
2020
import org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat;
2121
import org.elasticsearch.index.codec.postings.ES812PostingsFormat;
22-
import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat;
22+
import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat;
2323
import org.elasticsearch.index.mapper.IdFieldMapper;
2424
import org.elasticsearch.index.mapper.Mapper;
2525
import org.elasticsearch.index.mapper.MapperService;
@@ -31,18 +31,17 @@
3131
*/
3232
public class PerFieldFormatSupplier {
3333

34+
private static final ES819TSDBDocValuesFormat tsdbDocValuesFormat = new ES819TSDBDocValuesFormat();
3435
private final MapperService mapperService;
3536
private final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat();
3637
private final KnnVectorsFormat knnVectorsFormat = new Lucene99HnswVectorsFormat();
3738
private final ES87BloomFilterPostingsFormat bloomFilterPostingsFormat;
38-
private final ES87TSDBDocValuesFormat tsdbDocValuesFormat;
3939

4040
private final ES812PostingsFormat es812PostingsFormat;
4141

4242
public PerFieldFormatSupplier(MapperService mapperService, BigArrays bigArrays) {
4343
this.mapperService = mapperService;
4444
this.bloomFilterPostingsFormat = new ES87BloomFilterPostingsFormat(bigArrays, this::internalGetPostingsFormatForField);
45-
this.tsdbDocValuesFormat = new ES87TSDBDocValuesFormat();
4645
this.es812PostingsFormat = new ES812PostingsFormat();
4746
}
4847

server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormat.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ public ES87TSDBDocValuesFormat() {
4949

5050
@Override
5151
public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
52-
return new ES87TSDBDocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
52+
throw new UnsupportedOperationException("writing es87 doc values is no longer supported");
5353
}
5454

5555
@Override

server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444

4545
import static org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat.TERMS_DICT_BLOCK_LZ4_SHIFT;
4646

47-
public class ES87TSDBDocValuesProducer extends DocValuesProducer {
47+
final class ES87TSDBDocValuesProducer extends DocValuesProducer {
4848
private final IntObjectHashMap<NumericEntry> numerics;
4949
private final IntObjectHashMap<BinaryEntry> binaries;
5050
private final IntObjectHashMap<SortedEntry> sorted;

server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBDocValuesEncoder.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ private void forEncode(int token, int tokenBits, long[] in, DataOutput out) thro
175175
/**
176176
* Encode the given longs using a combination of delta-coding, GCD factorization and bit packing.
177177
*/
178-
void encode(long[] in, DataOutput out) throws IOException {
178+
public void encode(long[] in, DataOutput out) throws IOException {
179179
assert in.length == numericBlockSize;
180180

181181
deltaEncode(0, 0, in, out);
@@ -193,7 +193,7 @@ void encode(long[] in, DataOutput out) throws IOException {
193193
* <li>3: cycle</li>
194194
* </ul>
195195
*/
196-
void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOException {
196+
public void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOException {
197197
assert in.length == numericBlockSize;
198198
int numRuns = 1;
199199
long firstValue = in[0];
@@ -260,7 +260,7 @@ void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOExceptio
260260
}
261261
}
262262

263-
void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException {
263+
public void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException {
264264
assert out.length == numericBlockSize : out.length;
265265

266266
long v1 = in.readVLong();
@@ -294,7 +294,7 @@ void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException
294294
}
295295

296296
/** Decode longs that have been encoded with {@link #encode}. */
297-
void decode(DataInput in, long[] out) throws IOException {
297+
public void decode(DataInput in, long[] out) throws IOException {
298298
assert out.length == numericBlockSize : out.length;
299299

300300
final int token = in.readVInt();

0 commit comments

Comments
 (0)