Skip to content
Draft
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,8 @@ private static BlockLoader blockLoader(String name) {
Lucene.KEYWORD_ANALYZER,
Lucene.KEYWORD_ANALYZER,
new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(ft.docValuesType() != DocValuesType.NONE),
syntheticSource
syntheticSource,
false
).blockLoader(new MappedFieldType.BlockLoaderContext() {
@Override
public String indexName() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,8 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet
mock(NamedAnalyzer.class),
mock(NamedAnalyzer.class),
builder,
true
true,
false
);

MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType(
Expand Down Expand Up @@ -346,7 +347,8 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet
mock(NamedAnalyzer.class),
mock(NamedAnalyzer.class),
builder,
true
true,
false
);

MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
if (IndexSettings.DOC_VALUES_SKIPPER) {
settings.add(IndexSettings.USE_DOC_VALUES_SKIPPER);
}
settings.add(IndexSettings.USE_BINARY_DOC_VALUES);
settings.add(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING);
BUILT_IN_INDEX_SETTINGS = Collections.unmodifiableSet(settings);
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,10 @@ public ByteArray newByteArray(long size, boolean clearOnResize) {
}
}

public ByteArray newByteArrayWrapper(byte[] bytes) {
return validate(new ByteArrayWrapper(this, bytes, bytes.length, null, false));
}

/**
* Allocate a new {@link ByteArray} initialized with zeros.
* @param size the initial length of the array
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ public BytesRefArray(StreamInput in, BigArrays bigArrays) throws IOException {
}
}

private BytesRefArray(LongArray startOffsets, ByteArray bytes, long size, BigArrays bigArrays) {
public BytesRefArray(LongArray startOffsets, ByteArray bytes, long size, BigArrays bigArrays) {
this.bytes = bytes;
this.startOffsets = startOffsets;
this.size = size;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -675,6 +675,13 @@ public boolean isES87TSDBCodecEnabled() {
Property.Final
);

public static final Setting<Boolean> USE_BINARY_DOC_VALUES = Setting.boolSetting(
"index.mapping.use_binary_doc_values",
false,
Property.IndexScope,
Property.Final
);

/**
* The {@link IndexMode "mode"} of the index.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,28 @@ public BytesRef binaryValue() throws IOException {
bytesSlice.readBytes((long) doc * length, bytes.bytes, 0, length);
return bytes;
}

@Override
public BlockLoader.Block tryRead(
BlockLoader.BlockFactory factory,
BlockLoader.Docs docs,
int offset,
boolean nullsFiltered,
BlockDocValuesReader.ToDouble toDouble,
boolean toInt
) throws IOException {
int count = docs.count() - offset;
try (var builder = factory.bytesRefs(count)) {
int docId = -1;
for (int i = offset; i < docs.count(); i++) {
docId = docs.get(i);
bytesSlice.readBytes((long) docId * length, bytes.bytes, 0, length);
builder.appendBytesRef(bytes);
}
this.doc = docId;
return builder.build();
}
}
};
} else {
// variable length
Expand All @@ -223,6 +245,53 @@ public BytesRef binaryValue() throws IOException {
bytesSlice.readBytes(startOffset, bytes.bytes, 0, bytes.length);
return bytes;
}

@Override
public BlockLoader.Block tryRead(
BlockLoader.BlockFactory factory,
BlockLoader.Docs docs,
int offset,
boolean nullsFiltered,
BlockDocValuesReader.ToDouble toDouble,
boolean toInt
) throws IOException {
int count = docs.count() - offset;
int firstDocId = docs.get(offset);
doc = docs.get(count - 1);

if (isDense(firstDocId, doc, count)) {
try (var builder = factory.singletonBytesRefs(count)) {
long[] offsets = new long[count + 1];

// Normalize the offsets to check of bytes that is going to be fetched:
int j = 1;
long startOffset = addresses.get(firstDocId);
for (int i = offset; i < docs.count(); i++) {
int docId = docs.get(i);
long nextOffset = addresses.get(docId + 1) - startOffset;
offsets[j++] = nextOffset;
}

int lastDocId = docs.get(docs.count() - 1);
int length = Math.toIntExact(addresses.get(lastDocId + 1L) - startOffset);
byte[] bytes = new byte[length];
bytesSlice.readBytes(startOffset, bytes, 0, length);
builder.appendBytesRefs(bytes, offsets);
return builder.build();
}
Comment on lines +262 to +281
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is almost 2x faster than the else branch. Tested on my local machine.

} else {
try (var builder = factory.bytesRefs(count)) {
for (int i = offset; i < docs.count(); i++) {
int docId = docs.get(i);
long startOffset = addresses.get(docId);
bytes.length = (int) (addresses.get(docId + 1L) - startOffset);
bytesSlice.readBytes(startOffset, bytes.bytes, 0, bytes.length);
builder.appendBytesRef(bytes);
}
return builder.build();
}
}
}
};
}
} else {
Expand Down Expand Up @@ -267,7 +336,7 @@ public BytesRef binaryValue() throws IOException {
}
}

private abstract static class DenseBinaryDocValues extends BinaryDocValues {
private abstract static class DenseBinaryDocValues extends BinaryDocValues implements BlockLoader.OptionalColumnAtATimeReader {

final int maxDoc;
int doc = -1;
Expand Down Expand Up @@ -1481,13 +1550,6 @@ long lookAheadValueAt(int targetDoc) throws IOException {
return lookaheadBlock[valueIndex];
}

static boolean isDense(int firstDocId, int lastDocId, int length) {
// This does not detect duplicate docids (e.g [1, 1, 2, 4] would be detected as dense),
// this can happen with enrich or lookup. However this codec isn't used for enrich / lookup.
// This codec is only used in the context of logsdb and tsdb, so this is fine here.
return lastDocId - firstDocId == length - 1;
}

};
} else {
final IndexedDISI disi = new IndexedDISI(
Expand Down Expand Up @@ -1602,6 +1664,13 @@ public BlockLoader.Block tryRead(
}
}

static boolean isDense(int firstDocId, int lastDocId, int length) {
// This does not detect duplicate docids (e.g [1, 1, 2, 4] would be detected as dense),
// this can happen with enrich or lookup. However this codec isn't used for enrich / lookup.
// This codec is only used in the context of logsdb and tsdb, so this is fine here.
return lastDocId - firstDocId == length - 1;
}

private NumericDocValues getRangeEncodedNumericDocValues(NumericEntry entry, long maxOrd) throws IOException {
final var ordinalsReader = new SortedOrdinalReader(
maxOrd,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,28 @@ public AllReader reader(LeafReaderContext context) throws IOException {
}
}

public static class BytesRefsFromBinaryBlockLoader extends DocValuesBlockLoader {
private final String fieldName;

public BytesRefsFromBinaryBlockLoader(String fieldName) {
this.fieldName = fieldName;
}

@Override
public Builder builder(BlockFactory factory, int expectedCount) {
return factory.bytesRefs(expectedCount);
}

@Override
public AllReader reader(LeafReaderContext context) throws IOException {
BinaryDocValues docValues = context.reader().getBinaryDocValues(fieldName);
if (docValues == null) {
return new ConstantNullsReader();
}
return new BytesRefsFromBinary(docValues);
}
}

abstract static class AbstractBytesRefsFromBinary extends BlockDocValuesReader {
protected final BinaryDocValues docValues;

Expand Down Expand Up @@ -1010,6 +1032,17 @@ public BytesRefsFromBinary(BinaryDocValues docValues) {
super(docValues);
}

@Override
public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset, boolean nullsFiltered) throws IOException {
if (docValues instanceof BlockLoader.OptionalColumnAtATimeReader direct) {
BlockLoader.Block block = direct.tryRead(factory, docs, offset, nullsFiltered, null, false);
if (block != null) {
return block;
}
}
return super.read(factory, docs, offset, nullsFiltered);
}

@Override
void read(int doc, BytesRefBuilder builder) throws IOException {
if (false == docValues.advanceExact(doc)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,8 @@ interface BlockFactory {
*/
BytesRefBuilder bytesRefs(int expectedCount);

SingletonBytesRefBuilder singletonBytesRefs(int expectedCount);

/**
* Build a builder to load doubles as loaded from doc values.
* Doc values load doubles in sorted order.
Expand Down Expand Up @@ -546,6 +548,12 @@ interface BytesRefBuilder extends Builder {
BytesRefBuilder appendBytesRef(BytesRef value);
}

interface SingletonBytesRefBuilder extends Builder {

SingletonBytesRefBuilder appendBytesRefs(byte[] bytes, long[] offsets) throws IOException;

}

interface FloatBuilder extends Builder {
/**
* Appends a float to the current entry.
Expand Down
Loading