Skip to content
Merged
Show file tree
Hide file tree
Changes from 72 commits
Commits
Show all changes
74 commits
Select commit Hold shift + click to select a range
74880a0
Copy binary compression from LUCENE-9211
parkertimmins Oct 23, 2025
a973713
Initial version of block withs variable number values
parkertimmins Oct 23, 2025
3fc95dc
Fix issue with index output unclosed
parkertimmins Oct 23, 2025
c302cc2
Changes docRanges to single limit per block, plus start of 0
parkertimmins Oct 23, 2025
99748c8
Factor block address and block doc offset to accumulator class
parkertimmins Oct 23, 2025
fa2ea11
Rename offset accumulator
parkertimmins Oct 24, 2025
b67dd58
Change lz4 to zstd
parkertimmins Oct 24, 2025
638dbbc
Fix direct monotonic reader size
parkertimmins Oct 24, 2025
fdf3428
Fix docRangeLen bug, use for non-logsdb wildcards
parkertimmins Oct 24, 2025
36b3e10
Change offset encoding from zstd to numeric
parkertimmins Oct 24, 2025
eeded36
[CI] Auto commit changes from spotless
Oct 24, 2025
2d8e6dc
Fix missing compression in es819 format
parkertimmins Oct 25, 2025
efa270f
Merge branch 'main' into parker/compressed-binary-doc-values
parkertimmins Oct 25, 2025
c4d67e5
Store offsets rather than lengths
parkertimmins Oct 25, 2025
06a2035
[CI] Auto commit changes from spotless
Oct 25, 2025
7ccb18d
Remove forbidden APIs
parkertimmins Oct 25, 2025
a57e0d4
[CI] Auto commit changes from spotless
Oct 25, 2025
f156e55
Binary search to find block containing docNum
parkertimmins Oct 27, 2025
91e5842
[CI] Auto commit changes from spotless
Oct 27, 2025
401a041
do not mmap temp offset files
parkertimmins Oct 27, 2025
ad55bc3
feedback
parkertimmins Oct 27, 2025
4d4e153
[CI] Auto commit changes from spotless
Oct 27, 2025
f1ff182
Move zstd (de)compressor to separate class
parkertimmins Oct 27, 2025
9d2f237
Combine doAddCompressedBinary and doAddUncompressedBinary
parkertimmins Oct 27, 2025
2269f9c
[CI] Auto commit changes from spotless
Oct 27, 2025
1c4e9dc
feedback
parkertimmins Oct 28, 2025
3ddb649
Add WildcardRollingUpgradeIT
parkertimmins Oct 28, 2025
dbcd1c6
need new compressor/decompressor for new block writer
parkertimmins Oct 29, 2025
5537d8c
[CI] Auto commit changes from spotless
Oct 29, 2025
d7fce75
Cleanup binaryWriter interface
parkertimmins Oct 29, 2025
bb8361c
Revert "[CI] Auto commit changes from spotless"
parkertimmins Oct 29, 2025
aa3d44f
Revert "Add WildcardRollingUpgradeIT"
parkertimmins Oct 29, 2025
2c1f143
[CI] Auto commit changes from spotless
Oct 29, 2025
636c150
Update code lookup to support other compressors
parkertimmins Oct 29, 2025
09898ff
feedback
parkertimmins Oct 29, 2025
8b8b50b
Update bwc tests
parkertimmins Oct 29, 2025
8a82c23
cleanup
parkertimmins Oct 29, 2025
cef255f
Merge branch 'main' into parker/compressed-binary-doc-values
parkertimmins Oct 29, 2025
718ffc6
Fix test broken from merge
parkertimmins Oct 29, 2025
ebda5b0
Update docs/changelog/137139.yaml
parkertimmins Oct 30, 2025
9fc23f1
Move block address and doc_range accumulators into BlockMetadataAccum…
parkertimmins Oct 30, 2025
49e5425
Merge branch 'main' into parker/compressed-binary-doc-values
parkertimmins Oct 30, 2025
80525bf
Unit tests that require multiple doc value blocks
parkertimmins Oct 31, 2025
b1d4b17
Test values near the size of a block
parkertimmins Oct 31, 2025
e332619
Self close BlockMetadataAcc if throw during construction
parkertimmins Oct 31, 2025
60ebfaa
Merge branch 'main' into parker/compressed-binary-doc-values
parkertimmins Nov 3, 2025
1209e78
Update tsdb doc_values bwc test to mention version 1
parkertimmins Nov 3, 2025
80c14a3
Update docs/changelog/137139.yaml
parkertimmins Nov 3, 2025
602c203
Disable compression for geo_shape type
parkertimmins Nov 4, 2025
d6293d9
Test that wildcard uses ES819 docs encoding and geo_shape does not
parkertimmins Nov 4, 2025
982386e
[CI] Auto commit changes from spotless
Nov 4, 2025
e61b8c2
Add feature flag for binary dv compression
parkertimmins Nov 6, 2025
a225b98
Merge branch 'main' into parker/compressed-binary-doc-values
parkertimmins Nov 7, 2025
f6fd5bd
Merge branch 'main' into parker/compressed-binary-doc-values
parkertimmins Nov 7, 2025
5fe2c80
Add block count threshold in addition to size threshold
parkertimmins Nov 7, 2025
51b21ae
[CI] Auto commit changes from spotless
Nov 7, 2025
07eeb5a
Add test for very small binary values
parkertimmins Nov 7, 2025
d56d12f
Merge branch 'main' into parker/compressed-binary-doc-values
parkertimmins Nov 14, 2025
980df97
Use groupVarInt instead of TSDB encoder
parkertimmins Nov 14, 2025
21a98ac
Dont test bulk loading if compressed, as not implemented
parkertimmins Nov 14, 2025
2239732
[CI] Auto commit changes from spotless
Nov 14, 2025
15823e8
Fix broken merge
parkertimmins Nov 14, 2025
25dcb56
Merge branch 'main' into parker/compressed-binary-doc-values
parkertimmins Nov 14, 2025
200e14c
Revert to using TSDBDocValueEncoder for offsets
parkertimmins Nov 15, 2025
5ca24b4
Better naming and minor optmization
parkertimmins Nov 15, 2025
7f8fa16
Dont need to grow offsets array
parkertimmins Nov 15, 2025
91c23ee
And back to GroupedVarInt, this time with better delta decoding
parkertimmins Nov 17, 2025
92c8050
Add header to control whether block is compressed or uncompressed
parkertimmins Nov 17, 2025
016352a
Handle isCompressed in ES819DocValuesProducer, add bwc tests
parkertimmins Nov 17, 2025
8a2af81
Merge branch 'main' into parker/compressed-binary-doc-values
parkertimmins Nov 17, 2025
026406b
[CI] Auto commit changes from spotless
Nov 17, 2025
d27bb8b
Skip bulk loading tests if compressed
parkertimmins Nov 17, 2025
db68af6
review feedback
parkertimmins Nov 18, 2025
50d9a26
Merge branch 'main' into parker/compressed-binary-doc-values
parkertimmins Nov 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.elasticsearch.cluster.metadata.DataStream;
import org.elasticsearch.common.logging.LogConfigurator;
import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec;
import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode;
import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
Expand Down Expand Up @@ -257,7 +258,13 @@ private static IndexWriterConfig createIndexWriterConfig(boolean optimizedMergeE
);
config.setLeafSorter(DataStream.TIMESERIES_LEAF_READERS_SORTER);
config.setMergePolicy(new LogByteSizeMergePolicy());
var docValuesFormat = new ES819TSDBDocValuesFormat(4096, 512, optimizedMergeEnabled);
var docValuesFormat = new ES819TSDBDocValuesFormat(
4096,
512,
optimizedMergeEnabled,
BinaryDVCompressionMode.COMPRESSED_ZSTD_LEVEL_1,
true
);
config.setCodec(new Elasticsearch92Lucene103Codec() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
Expand Down
27 changes: 27 additions & 0 deletions docs/changelog/137139.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
pr: 137139
summary: Add binary doc value compression with variable doc count blocks
area: Mapping
type: feature
issues: []
highlight:
title: Add binary doc value compression with variable doc count blocks
body: "Add compression for binary doc values using Zstd and blocks with a\nvariable\
\ number of values.\n\nBlock-wise LZ4 was previously added to Lucene in\n[LUCENE-9211](https://issues.apache.org/jira/browse/LUCENE-9211).\
\ This\nwas subsequently removed in\n[LUCENE-9378](https://issues.apache.org/jira/browse/LUCENE-9378)\
\ due to\nquery performance issues. \n\nWe investigated adding to adding the original\
\ Lucene implementation to\nES in https://github.com/elastic/elasticsearch/pull/112416\
\ and\nhttps://github.com/elastic/elasticsearch/pull/105301. This approach\nstores\
\ a constant number of values per block (specifically 32 values).\nThis is nice\
\ because it makes it very easy to map a given value index\n(eg docId for dense\
\ values) to the block containing it with `blockId =\ndocId / 32`. Unfortunately,\
\ if values are very large we cannot reduce\nthe number of values per block and\
\ (de)compressing a block could cause\nan OOM. Also, since this is a concern,\
\ we have to keep the number of\nvalues lower than ideal.\n\nThis PR instead stores\
\ a variable number of documents per block. It\nstores a minimum of 1 document\
\ per block and stops adding values when\nthe size of a block exceeds a threshold.\
\ Like the previous version is\nstores an array of address for the start of each\
\ block. Additionally, it\nstores are parallel array with the value index at the\
\ start of each\nblock. When looking up a given value index, if it is not in the\
\ current\nblock, we binary search the array of value index starts to find the\n\
blockId containing the value. Then look up the address of the block."
notable: true
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
public class PerFieldFormatSupplier {

private static final Set<String> INCLUDE_META_FIELDS;
private static final Set<String> EXCLUDE_MAPPER_TYPES;

static {
// TODO: should we just allow all fields to use tsdb doc values codec?
Expand All @@ -53,6 +54,7 @@ public class PerFieldFormatSupplier {
// Don't the include _recovery_source_size and _recovery_source fields, since their values can be trimmed away in
// RecoverySourcePruneMergePolicy, which leads to inconsistencies between merge stats and actual values.
INCLUDE_META_FIELDS = Collections.unmodifiableSet(includeMetaField);
EXCLUDE_MAPPER_TYPES = Set.of("geo_shape");
}

private static final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat();
Expand Down Expand Up @@ -145,6 +147,10 @@ boolean useTSDBDocValuesFormat(final String field) {
return false;
}

if (excludeMapperTypes(field)) {
return false;
}

return mapperService != null
&& mapperService.getIndexSettings().useTimeSeriesDocValuesFormat()
&& mapperService.getIndexSettings().isES87TSDBCodecEnabled();
Expand All @@ -154,4 +160,29 @@ private boolean excludeFields(String fieldName) {
return fieldName.startsWith("_") && INCLUDE_META_FIELDS.contains(fieldName) == false;
}

private boolean excludeMapperTypes(String fieldName) {
var typeName = getMapperType(fieldName);
if (typeName == null) {
return false;
}
return EXCLUDE_MAPPER_TYPES.contains(getMapperType(fieldName));
}

private boolean isTimeSeriesModeIndex() {
return mapperService != null && IndexMode.TIME_SERIES == mapperService.getIndexSettings().getMode();
}

private boolean isLogsModeIndex() {
return mapperService != null && IndexMode.LOGSDB == mapperService.getIndexSettings().getMode();
}

String getMapperType(final String field) {
if (mapperService != null) {
Mapper mapper = mapperService.mappingLookup().getMapper(field);
if (mapper != null) {
return mapper.typeName();
}
}
return null;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.codec.tsdb;

import org.apache.lucene.codecs.compressing.CompressionMode;
import org.elasticsearch.index.codec.zstd.ZstdCompressionMode;

public enum BinaryDVCompressionMode {

NO_COMPRESS((byte) 0, null),
COMPRESSED_ZSTD_LEVEL_1((byte) 1, new ZstdCompressionMode(1));

public final byte code;
private final CompressionMode compressionMode;

private static final BinaryDVCompressionMode[] values = new BinaryDVCompressionMode[values().length];
static {
for (BinaryDVCompressionMode mode : values()) {
values[mode.code] = mode;
}
}

BinaryDVCompressionMode(byte code, CompressionMode compressionMode) {
this.code = code;
this.compressionMode = compressionMode;
}

public static BinaryDVCompressionMode fromMode(byte code) {
if (code < 0 || code >= values.length) {
throw new IllegalStateException("unknown compression mode [" + code + "]");
}
return values[code];
}

public CompressionMode compressionMode() {
if (compressionMode == null) {
throw new UnsupportedOperationException("BinaryDVCompressionMode [" + code + "] does not support compression");
}
return compressionMode;
}

public record BlockHeader(boolean isCompressed) {
static final byte IS_COMPRESSED = 0x1;

public static BlockHeader fromByte(byte header) {
boolean isCompressed = (header & IS_COMPRESSED) != 0;
return new BlockHeader(isCompressed);
}

public byte toByte() {
byte header = 0;
if (isCompressed) {
header |= IS_COMPRESSED;
}
return header;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.codec.tsdb.es819;

import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.packed.DirectMonotonicWriter;
import org.elasticsearch.core.IOUtils;

import java.io.Closeable;
import java.io.IOException;

public final class BlockMetadataAccumulator implements Closeable {

private final DelayedOffsetAccumulator blockAddressAcc;
private final DelayedOffsetAccumulator blockDocRangeAcc;

BlockMetadataAccumulator(Directory dir, IOContext context, IndexOutput data, long addressesStart) throws IOException {
boolean success = false;
try {
blockDocRangeAcc = new DelayedOffsetAccumulator(dir, context, data, "block-doc-ranges", 0);
blockAddressAcc = new DelayedOffsetAccumulator(dir, context, data, "block-addresses", addressesStart);
success = true;
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(this); // self-close because constructor caller can't
}
}
}

public void addDoc(long numDocsInBlock, long blockLenInBytes) throws IOException {
blockDocRangeAcc.addDoc(numDocsInBlock);
blockAddressAcc.addDoc(blockLenInBytes);
}

public void build(IndexOutput meta, IndexOutput data) throws IOException {
long dataAddressesStart = data.getFilePointer();
blockAddressAcc.build(meta, data);
long dataDocRangeStart = data.getFilePointer();
long addressesLength = dataDocRangeStart - dataAddressesStart;
meta.writeLong(addressesLength);

meta.writeLong(dataDocRangeStart);
blockDocRangeAcc.build(meta, data);
long docRangesLen = data.getFilePointer() - dataDocRangeStart;
meta.writeLong(docRangesLen);
}

@Override
public void close() throws IOException {
IOUtils.closeWhileHandlingException(blockAddressAcc, blockDocRangeAcc);
}

/**
* Like OffsetsAccumulator builds offsets and stores in a DirectMonotonicWriter. But write to temp file
* rather than directly to a DirectMonotonicWriter because the number of values is unknown.
*/
static final class DelayedOffsetAccumulator implements Closeable {

private final Directory dir;
private final long startOffset;

private int numValues = 0;
private final IndexOutput tempOutput;
private final String suffix;

DelayedOffsetAccumulator(Directory dir, IOContext context, IndexOutput data, String suffix, long startOffset) throws IOException {
this.dir = dir;
this.startOffset = startOffset;
this.suffix = suffix;

boolean success = false;
try {
tempOutput = dir.createTempOutput(data.getName(), suffix, context);
CodecUtil.writeHeader(tempOutput, ES819TSDBDocValuesFormat.META_CODEC + suffix, ES819TSDBDocValuesFormat.VERSION_CURRENT);
success = true;
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(this); // self-close because constructor caller can't
}
}
}

void addDoc(long delta) throws IOException {
tempOutput.writeVLong(delta);
numValues++;
}

void build(IndexOutput meta, IndexOutput data) throws IOException {
CodecUtil.writeFooter(tempOutput);
IOUtils.close(tempOutput);

// write the offsets info to the meta file by reading from temp file
try (ChecksumIndexInput tempInput = dir.openChecksumInput(tempOutput.getName());) {
CodecUtil.checkHeader(
tempInput,
ES819TSDBDocValuesFormat.META_CODEC + suffix,
ES819TSDBDocValuesFormat.VERSION_CURRENT,
ES819TSDBDocValuesFormat.VERSION_CURRENT
);
Throwable priorE = null;
try {
final DirectMonotonicWriter writer = DirectMonotonicWriter.getInstance(
meta,
data,
numValues + 1,
ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT
);

long offset = startOffset;
writer.add(offset);
for (int i = 0; i < numValues; ++i) {
offset += tempInput.readVLong();
writer.add(offset);
}
writer.finish();
} catch (Throwable e) {
priorE = e;
} finally {
CodecUtil.checkFooter(tempInput, priorE);
}
}
}

@Override
public void close() throws IOException {
if (tempOutput != null) {
IOUtils.close(tempOutput, () -> dir.deleteFile(tempOutput.getName()));
}
}
}
}
Loading