Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
dd4c80d
Push compute engine value loading for longs down to tsdb codec.
martijnvg Aug 9, 2025
d8ea150
Remove SingletonBulkLongBuilder#appendLong(...) method and
martijnvg Aug 10, 2025
0d73a27
[CI] Auto commit changes from spotless
Aug 10, 2025
b32cf93
Simplified es819 bulk read logic, as the complexity for to full dense…
martijnvg Aug 10, 2025
173f113
adjust benchmark
martijnvg Aug 10, 2025
5d84f19
Update docs/changelog/132622.yaml
martijnvg Aug 11, 2025
e6014ec
Merge remote-tracking branch 'es/main' into compute_engine_improve_si…
martijnvg Aug 11, 2025
150e06a
fixed changelog
martijnvg Aug 11, 2025
ffd2de6
Remove BulkReader interface and use BlockLoader.ColumnAtATimeReader i…
martijnvg Aug 12, 2025
be0c77c
No need to pass down indexMode
martijnvg Aug 12, 2025
a5c877a
[CI] Auto commit changes from spotless
Aug 12, 2025
1c15d39
Renamed SingletonBulkLongsBuilder to SingletonLongBuilder
martijnvg Aug 12, 2025
054b12e
Simplify BulkNumericDocValues and LongsBlockLoader even more.
martijnvg Aug 12, 2025
f097f4a
Use SingletonLongBuilder also when we're not using es819 doc value co…
martijnvg Aug 12, 2025
c26e575
Merge remote-tracking branch 'es/main' into compute_engine_improve_si…
martijnvg Aug 12, 2025
cc3614b
iter
martijnvg Aug 12, 2025
8636124
iter2
martijnvg Aug 12, 2025
212ec5d
Revert "Use SingletonLongBuilder also when we're not using es819 doc …
martijnvg Aug 12, 2025
6ca5c66
improve computing remainingBlockLength
martijnvg Aug 12, 2025
2ef68f4
remove unneeded catch clauses.
martijnvg Aug 12, 2025
115e4e6
remove unneeded condition now that computing remainingBlockLength has…
martijnvg Aug 12, 2025
dcbcaf2
iter summary
martijnvg Aug 12, 2025
5e207e1
remove unused field
martijnvg Aug 12, 2025
206703b
iter changelog
martijnvg Aug 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.breaker.NoopCircuitBreaker;
import org.elasticsearch.common.logging.LogConfigurator;
import org.elasticsearch.common.lucene.Lucene;
Expand Down Expand Up @@ -272,6 +273,13 @@ private static BlockLoader numericBlockLoader(WhereAndBaseName w, NumberFieldMap
case STORED:
throw new UnsupportedOperationException();
}
var settings = Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current())
.put("index.number_of_shards", 1)
.put("index.number_of_replicas", 0)
.build();
var indexMetadata = new IndexMetadata.Builder("index").settings(settings).build();
var indexSettings = new IndexSettings(indexMetadata, settings);
return new NumberFieldMapper.NumberFieldType(
w.name,
numberType,
Expand Down Expand Up @@ -299,7 +307,7 @@ public MappedFieldType.FieldExtractPreference fieldExtractPreference() {

@Override
public IndexSettings indexSettings() {
throw new UnsupportedOperationException();
return indexSettings;
}

@Override
Expand Down
5 changes: 5 additions & 0 deletions docs/changelog/132622.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 132622
summary: Add bulk loading of dense singleton number doc values to tsdb codec and push compute engine value loading for longs down to tsdb codec
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the summary doesn't match with the PR title?

area: "Codec"
type: enhancement
issues: []
17 changes: 17 additions & 0 deletions server/src/main/java/org/elasticsearch/index/IndexMode.java
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,11 @@ public void validateSourceFieldMapper(SourceFieldMapper sourceFieldMapper) {
public SourceFieldMapper.Mode defaultSourceMode() {
return SourceFieldMapper.Mode.SYNTHETIC;
}

@Override
public boolean supportOptimizedDocValueLoading() {
return true;
}
},
LOGSDB("logsdb") {
@Override
Expand Down Expand Up @@ -320,6 +325,11 @@ public SourceFieldMapper.Mode defaultSourceMode() {
public String getDefaultCodec() {
return CodecService.BEST_COMPRESSION_CODEC;
}

@Override
public boolean supportOptimizedDocValueLoading() {
return true;
}
},
LOOKUP("lookup") {
@Override
Expand Down Expand Up @@ -564,6 +574,13 @@ public boolean useDefaultPostingsFormat() {
return false;
}

/**
* @return Whether the index mode uses a doc value codec that may support optimized doc value loading for one or more fields.
*/
public boolean supportOptimizedDocValueLoading() {
return false;
}

/**
* Parse a string into an {@link IndexMode}.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

import java.io.IOException;

public class DocValuesForUtil {
public final class DocValuesForUtil {
private static final int BITS_IN_FOUR_BYTES = 4 * Byte.SIZE;
private static final int BITS_IN_FIVE_BYTES = 5 * Byte.SIZE;
private static final int BITS_IN_SIX_BYTES = 6 * Byte.SIZE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
*
* Of course, decoding follows the opposite order with respect to encoding.
*/
public class TSDBDocValuesEncoder {
public final class TSDBDocValuesEncoder {
private final DocValuesForUtil forUtil;
private final int numericBlockSize;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.codec.tsdb.es819;

import org.apache.lucene.index.NumericDocValues;

/**
* An es819 doc values Specialization that allows retrieving a {@link BulkReader}.
*/
public abstract class BulkNumericDocValues extends NumericDocValues {

/**
* @return a bulk reader instance or <code>null</code> if field or implementation doesn't support bulk loading.
*/
public abstract BulkReader getBulkReader();

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.codec.tsdb.es819;

import org.elasticsearch.index.mapper.BlockLoader;

import java.io.IOException;

/**
* Low level abstraction for es819 doc values to allow for bulk reading doc values into compute engine's block builders.
*/
public interface BulkReader {

/**
* Appends values into the provided builder for the specified docs from the specified offset.
*/
void bulkRead(BlockLoader.SingletonBulkLongBuilder builder, BlockLoader.Docs docs, int offset) throws IOException;

/**
* @return the current docid of this bulk reader.
*/
int docID();

}
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.core.IOUtils;
import org.elasticsearch.index.codec.tsdb.TSDBDocValuesEncoder;
import org.elasticsearch.index.mapper.BlockLoader;

import java.io.IOException;

Expand Down Expand Up @@ -1140,13 +1141,14 @@ public long longValue() {
final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1;
if (entry.docsWithFieldOffset == -1) {
// dense
return new NumericDocValues() {
return new BulkNumericDocValues() {

private final int maxDoc = ES819TSDBDocValuesProducer.this.maxDoc;
private int doc = -1;
private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
private long currentBlockIndex = -1;
private final long[] currentBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
private BulkReader bulkReader;

@Override
public int docID() {
Expand All @@ -1155,11 +1157,13 @@ public int docID() {

@Override
public int nextDoc() throws IOException {
assert bulkReader == null : "can't use this method if bulk loader has been initialized";
return advance(doc + 1);
}

@Override
public int advance(int target) throws IOException {
assert bulkReader == null : "can't use this method if bulk loader has been initialized";
if (target >= maxDoc) {
return doc = NO_MORE_DOCS;
}
Expand All @@ -1168,6 +1172,7 @@ public int advance(int target) throws IOException {

@Override
public boolean advanceExact(int target) {
assert bulkReader == null : "can't use this method if bulk loader has been initialized";
doc = target;
return true;
}
Expand All @@ -1179,6 +1184,7 @@ public long cost() {

@Override
public long longValue() throws IOException {
assert bulkReader == null : "can't use this method if bulk loader has been initialized";
final int index = doc;
final int blockIndex = index >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT;
final int blockInIndex = index & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK;
Expand All @@ -1197,6 +1203,61 @@ public long longValue() throws IOException {
}
return currentBlock[blockInIndex];
}

@Override
public BulkReader getBulkReader() {
if (bulkReader == null) {
bulkReader = new BulkReader() {

@Override
public void bulkRead(BlockLoader.SingletonBulkLongBuilder builder, BlockLoader.Docs docs, int offset)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be more consistent to implement BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throws IOException instead.

throws IOException {
assert maxOrd == -1 : "unexpected maxOrd[" + maxOrd + "]";
final int docsCount = docs.count();
doc = docs.get(docsCount - 1);
for (int i = offset; i < docsCount;) {
int index = docs.get(i);
final int blockIndex = index >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT;
final int blockInIndex = index & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK;
if (blockIndex != currentBlockIndex) {
assert blockIndex > currentBlockIndex : blockIndex + " < " + currentBlockIndex;
// no need to seek if the loading block is the next block
if (currentBlockIndex + 1 != blockIndex) {
valuesData.seek(indexReader.get(blockIndex));
}
currentBlockIndex = blockIndex;
decoder.decode(valuesData, currentBlock);
}

// Try to append more than just one value:
// Instead of iterating over docs and find the max length, take an optimistic approach to avoid as
// many comparisons as there are remaining docs and instead do at most 7 comparisons:
int length = 1;
int remainingBlockLength = ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE - blockInIndex;
for (int newLength = remainingBlockLength; newLength > 1; newLength = newLength >> 1) {
int lastIndex = i + newLength - 1;
if (lastIndex < docsCount && isDense(index, docs.get(lastIndex), newLength)) {
length = newLength;
break;
}
}
builder.appendLongs(currentBlock, blockInIndex, length);
i += length;
}
}

static boolean isDense(int firstDocId, int lastDocId, int length) {
return lastDocId - firstDocId == length - 1;
}

@Override
public int docID() {
return doc;
}
};
}
return bulkReader;
}
};
} else {
final IndexedDISI disi = new IndexedDISI(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.codec.tsdb.es819.BulkNumericDocValues;
import org.elasticsearch.index.codec.tsdb.es819.BulkReader;
import org.elasticsearch.index.mapper.BlockLoader.BlockFactory;
import org.elasticsearch.index.mapper.BlockLoader.BooleanBuilder;
import org.elasticsearch.index.mapper.BlockLoader.Builder;
Expand Down Expand Up @@ -62,6 +65,12 @@ public abstract static class DocValuesBlockLoader implements BlockLoader {

@Override
public final ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws IOException {
if (supportsOptimizedColumnAtTimeReader()) {
var optimizedColumnAtTimeReader = optimizedColumnAtTimeReader(context);
if (optimizedColumnAtTimeReader != null) {
return optimizedColumnAtTimeReader;
}
}
return reader(context);
}

Expand All @@ -84,13 +93,23 @@ public boolean supportsOrdinals() {
public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException {
throw new UnsupportedOperationException();
}

protected boolean supportsOptimizedColumnAtTimeReader() {
return false;
}

protected ColumnAtATimeReader optimizedColumnAtTimeReader(LeafReaderContext context) throws IOException {
throw new UnsupportedOperationException();
}
}

public static class LongsBlockLoader extends DocValuesBlockLoader {
private final String fieldName;
private final IndexMode indexMode;

public LongsBlockLoader(String fieldName) {
public LongsBlockLoader(String fieldName, IndexMode indexMode) {
this.fieldName = fieldName;
this.indexMode = indexMode;
}

@Override
Expand All @@ -114,9 +133,57 @@ public AllReader reader(LeafReaderContext context) throws IOException {
}
return new ConstantNullsReader();
}

@Override
protected boolean supportsOptimizedColumnAtTimeReader() {
return indexMode.supportOptimizedDocValueLoading();
}

protected ColumnAtATimeReader optimizedColumnAtTimeReader(LeafReaderContext context) throws IOException {
NumericDocValues singleton = context.reader().getNumericDocValues(fieldName);
if (singleton == null) {
SortedNumericDocValues docValues = context.reader().getSortedNumericDocValues(fieldName);
singleton = DocValues.unwrapSingleton(docValues);
}

if (singleton instanceof BulkNumericDocValues bulkDv) {
var bulkLoader = bulkDv.getBulkReader();
return new BulkSingletonLong(bulkLoader);
}

return null;
}
}

static final class BulkSingletonLong implements BlockLoader.ColumnAtATimeReader {
private final Thread creationThread;
private final BulkReader bulkReader;

BulkSingletonLong(BulkReader bulkReader) {
this.creationThread = Thread.currentThread();
this.bulkReader = bulkReader;
}

@Override
public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throws IOException {
try (BlockLoader.SingletonBulkLongBuilder builder = factory.singletonLongs(docs.count() - offset)) {
bulkReader.bulkRead(builder, docs, offset);
return builder.build();
}
}

@Override
public boolean canReuse(int startingDocID) {
return creationThread == Thread.currentThread() && bulkReader.docID() <= startingDocID;
}

@Override
public String toString() {
return "BlockDocValuesReader.BulkSingletonLong";
}
}

private static class SingletonLongs extends BlockDocValuesReader {
static class SingletonLongs extends BlockDocValuesReader {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we enable the optimization in BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) of this class only?

public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throws IOException {
    if (numericDocValues instanceof ... r) {
        return r.read(factory, docs, offset);
    }
    ...
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that should work as well.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I took me a while, but this works out and it is now even simpler! 054b12e

private final NumericDocValues numericDocValues;

SingletonLongs(NumericDocValues numericDocValues) {
Expand Down Expand Up @@ -164,7 +231,7 @@ public String toString() {
}
}

private static class Longs extends BlockDocValuesReader {
static class Longs extends BlockDocValuesReader {
private final SortedNumericDocValues numericDocValues;
private int docID = -1;

Expand Down
Loading