Skip to content

Commit 66107f1

Browse files
authored
Push compute engine value loading for longs down to tsdb codec. (#132622)
This is the first of many changes that pushes loading of field values to the es819 doc values codec in case of logsdb/tsdb and when the field supports it. This change first targets reading field values in bulk mode at codec level when doc values type is numeric doc values or sorted doc values, there is only one value per document, and the field is dense (all documents have a value). Multivalued and sparse fields are more complex to support bulk reading for, but it is possible. With this change, the following field types will support bulk read mode at codec level under the described conditions: long, date, geo_point, point and unsigned_long. Other number types like integer, short, double, float, scaled_float will be supported in a followup, but would be similar to long based fields, but required an additional conversion step to either an int or float vector. This change originates from #132460 (which adds bulk reading to `@timestamp`, `_tsid` and dimension fields) and is basically the timestamp support part of it. In another followup, support for single valued, dense sorted (set) doc values will be added for field like _tsid. Relates to #128445
1 parent 7ab9bb6 commit 66107f1

File tree

17 files changed

+930
-12
lines changed

17 files changed

+930
-12
lines changed

docs/changelog/132622.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 132622
2+
summary: Push down compute engine value loading of long based singleton numeric doc value to the es819 tsdb doc values codec.
3+
area: "Codec"
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtil.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
import java.io.IOException;
1818

19-
public class DocValuesForUtil {
19+
public final class DocValuesForUtil {
2020
private static final int BITS_IN_FOUR_BYTES = 4 * Byte.SIZE;
2121
private static final int BITS_IN_FIVE_BYTES = 5 * Byte.SIZE;
2222
private static final int BITS_IN_SIX_BYTES = 6 * Byte.SIZE;

server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBDocValuesEncoder.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
*
5555
* Of course, decoding follows the opposite order with respect to encoding.
5656
*/
57-
public class TSDBDocValuesEncoder {
57+
public final class TSDBDocValuesEncoder {
5858
private final DocValuesForUtil forUtil;
5959
private final int numericBlockSize;
6060

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.codec.tsdb.es819;
11+
12+
import org.apache.lucene.index.NumericDocValues;
13+
import org.elasticsearch.index.mapper.BlockLoader;
14+
15+
import java.io.IOException;
16+
17+
/**
18+
* An es819 doc values specialization that allows bulk loading of values that is optimized in the context of compute engine.
19+
*/
20+
public abstract class BulkNumericDocValues extends NumericDocValues {
21+
22+
/**
23+
* Reads the values of all documents in {@code docs}.
24+
*/
25+
public abstract BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException;
26+
27+
}

server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
import org.apache.lucene.util.packed.PackedInts;
4444
import org.elasticsearch.core.IOUtils;
4545
import org.elasticsearch.index.codec.tsdb.TSDBDocValuesEncoder;
46+
import org.elasticsearch.index.mapper.BlockLoader;
4647

4748
import java.io.IOException;
4849

@@ -1140,7 +1141,7 @@ public long longValue() {
11401141
final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1;
11411142
if (entry.docsWithFieldOffset == -1) {
11421143
// dense
1143-
return new NumericDocValues() {
1144+
return new BulkNumericDocValues() {
11441145

11451146
private final int maxDoc = ES819TSDBDocValuesProducer.this.maxDoc;
11461147
private int doc = -1;
@@ -1197,6 +1198,53 @@ public long longValue() throws IOException {
11971198
}
11981199
return currentBlock[blockInIndex];
11991200
}
1201+
1202+
@Override
1203+
public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException {
1204+
assert maxOrd == -1 : "unexpected maxOrd[" + maxOrd + "]";
1205+
final int docsCount = docs.count();
1206+
doc = docs.get(docsCount - 1);
1207+
try (BlockLoader.SingletonLongBuilder builder = factory.singletonLongs(docs.count() - offset)) {
1208+
for (int i = offset; i < docsCount;) {
1209+
int index = docs.get(i);
1210+
final int blockIndex = index >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT;
1211+
final int blockInIndex = index & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK;
1212+
if (blockIndex != currentBlockIndex) {
1213+
assert blockIndex > currentBlockIndex : blockIndex + " < " + currentBlockIndex;
1214+
// no need to seek if the loading block is the next block
1215+
if (currentBlockIndex + 1 != blockIndex) {
1216+
valuesData.seek(indexReader.get(blockIndex));
1217+
}
1218+
currentBlockIndex = blockIndex;
1219+
decoder.decode(valuesData, currentBlock);
1220+
}
1221+
1222+
// Try to append more than just one value:
1223+
// Instead of iterating over docs and find the max length, take an optimistic approach to avoid as
1224+
// many comparisons as there are remaining docs and instead do at most 7 comparisons:
1225+
int length = 1;
1226+
int remainingBlockLength = Math.min(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE - blockInIndex, docsCount - i);
1227+
for (int newLength = remainingBlockLength; newLength > 1; newLength = newLength >> 1) {
1228+
int lastIndex = i + newLength - 1;
1229+
if (isDense(index, docs.get(lastIndex), newLength)) {
1230+
length = newLength;
1231+
break;
1232+
}
1233+
}
1234+
builder.appendLongs(currentBlock, blockInIndex, length);
1235+
i += length;
1236+
}
1237+
return builder.build();
1238+
}
1239+
}
1240+
1241+
static boolean isDense(int firstDocId, int lastDocId, int length) {
1242+
// This does not detect duplicate docids (e.g [1, 1, 2, 4] would be detected as dense),
1243+
// this can happen with enrich or lookup. However this codec isn't used for enrich / lookup.
1244+
// This codec is only used in the context of logsdb and tsdb, so this is fine here.
1245+
return lastDocId - firstDocId == length - 1;
1246+
}
1247+
12001248
};
12011249
} else {
12021250
final IndexedDISI disi = new IndexedDISI(

server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.apache.lucene.util.BytesRef;
2222
import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
2323
import org.elasticsearch.index.IndexVersion;
24+
import org.elasticsearch.index.codec.tsdb.es819.BulkNumericDocValues;
2425
import org.elasticsearch.index.mapper.BlockLoader.BlockFactory;
2526
import org.elasticsearch.index.mapper.BlockLoader.BooleanBuilder;
2627
import org.elasticsearch.index.mapper.BlockLoader.Builder;
@@ -84,6 +85,7 @@ public boolean supportsOrdinals() {
8485
public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException {
8586
throw new UnsupportedOperationException();
8687
}
88+
8789
}
8890

8991
public static class LongsBlockLoader extends DocValuesBlockLoader {
@@ -116,15 +118,18 @@ public AllReader reader(LeafReaderContext context) throws IOException {
116118
}
117119
}
118120

119-
private static class SingletonLongs extends BlockDocValuesReader {
120-
private final NumericDocValues numericDocValues;
121+
static class SingletonLongs extends BlockDocValuesReader {
122+
final NumericDocValues numericDocValues;
121123

122124
SingletonLongs(NumericDocValues numericDocValues) {
123125
this.numericDocValues = numericDocValues;
124126
}
125127

126128
@Override
127129
public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throws IOException {
130+
if (numericDocValues instanceof BulkNumericDocValues bulkDv) {
131+
return bulkDv.read(factory, docs, offset);
132+
}
128133
try (BlockLoader.LongBuilder builder = factory.longsFromDocValues(docs.count() - offset)) {
129134
int lastDoc = -1;
130135
for (int i = offset; i < docs.count(); i++) {
@@ -164,7 +169,7 @@ public String toString() {
164169
}
165170
}
166171

167-
private static class Longs extends BlockDocValuesReader {
172+
static class Longs extends BlockDocValuesReader {
168173
private final SortedNumericDocValues numericDocValues;
169174
private int docID = -1;
170175

server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,17 @@ interface BlockFactory {
400400
*/
401401
LongBuilder longs(int expectedCount);
402402

403+
/**
404+
* Build a specialized builder for singleton dense long based fields with the following constraints:
405+
* <ul>
406+
* <li>Only one value per document can be collected</li>
407+
* <li>No more than expectedCount values can be collected</li>
408+
* </ul>
409+
*
410+
* @param expectedCount The maximum number of values to be collected.
411+
*/
412+
SingletonLongBuilder singletonLongs(int expectedCount);
413+
403414
/**
404415
* Build a builder to load only {@code null}s.
405416
*/
@@ -498,6 +509,16 @@ interface IntBuilder extends Builder {
498509
IntBuilder appendInt(int value);
499510
}
500511

512+
/**
513+
* Specialized builder for collecting dense arrays of long values.
514+
*/
515+
interface SingletonLongBuilder extends Builder {
516+
517+
SingletonLongBuilder appendLong(long value);
518+
519+
SingletonLongBuilder appendLongs(long[] values, int from, int length);
520+
}
521+
501522
interface LongBuilder extends Builder {
502523
/**
503524
* Appends a long to the current entry.

0 commit comments

Comments
 (0)