diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java index 9cc304828a8a7..cb929d4437722 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java @@ -222,7 +222,8 @@ private static BlockLoader blockLoader(String name) { Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER, new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(ft.docValuesType() != DocValuesType.NONE), - syntheticSource + syntheticSource, + false ).blockLoader(new MappedFieldType.BlockLoaderContext() { @Override public String indexName() { diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java index a3b2fd3633adf..94a080db539dc 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java @@ -27,6 +27,7 @@ import org.elasticsearch.cluster.metadata.DataStream; import org.elasticsearch.common.logging.LogConfigurator; import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec; +import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode; import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; @@ -257,7 +258,7 @@ private static IndexWriterConfig createIndexWriterConfig(boolean optimizedMergeE ); config.setLeafSorter(DataStream.TIMESERIES_LEAF_READERS_SORTER); config.setMergePolicy(new LogByteSizeMergePolicy()); - var docValuesFormat = new ES819TSDBDocValuesFormat(4096, 512, optimizedMergeEnabled); + var docValuesFormat = new ES819TSDBDocValuesFormat(4096, 512, optimizedMergeEnabled, BinaryDVCompressionMode.COMPRESSED_WITH_LZ4); config.setCodec(new Elasticsearch92Lucene103Codec() { @Override public DocValuesFormat getDocValuesFormatForField(String field) { diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java index 8e70945fc2a76..0042810241be7 100644 --- a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java @@ -298,7 +298,8 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType( @@ -346,7 +347,8 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType( diff --git a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index d4fef4e9bb489..9de1f103d85f5 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -243,6 +243,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { if (IndexSettings.DOC_VALUES_SKIPPER) { settings.add(IndexSettings.USE_DOC_VALUES_SKIPPER); } + settings.add(IndexSettings.USE_BINARY_DOC_VALUES); settings.add(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING); BUILT_IN_INDEX_SETTINGS = Collections.unmodifiableSet(settings); }; diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index b396e1ca206e3..de73e65a4696f 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -675,6 +675,13 @@ public boolean isES87TSDBCodecEnabled() { Property.Final ); + public static final Setting USE_BINARY_DOC_VALUES = Setting.boolSetting( + "index.mapping.use_binary_doc_values", + false, + Property.IndexScope, + Property.Final + ); + /** * The {@link IndexMode "mode"} of the index. */ diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java index 2ed1aa6c9f17f..b0b6c72b3ef81 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java @@ -20,9 +20,11 @@ import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat; import org.elasticsearch.index.codec.postings.ES812PostingsFormat; +import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode; import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat; import org.elasticsearch.index.mapper.CompletionFieldMapper; import org.elasticsearch.index.mapper.IdFieldMapper; +import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.SeqNoFieldMapper; @@ -57,7 +59,10 @@ public class PerFieldFormatSupplier { private static final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat(); private static final KnnVectorsFormat knnVectorsFormat = new Lucene99HnswVectorsFormat(); - private static final ES819TSDBDocValuesFormat tsdbDocValuesFormat = new ES819TSDBDocValuesFormat(); + private static final ES819TSDBDocValuesFormat tsdbDocValuesFormat = new ES819TSDBDocValuesFormat(BinaryDVCompressionMode.NO_COMPRESS); + private static final DocValuesFormat compressedBinaryDocValuesFormat = new ES819TSDBDocValuesFormat( + BinaryDVCompressionMode.COMPRESSED_WITH_LZ4 + ); private static final ES812PostingsFormat es812PostingsFormat = new ES812PostingsFormat(); private static final PostingsFormat completionPostingsFormat = PostingsFormat.forName("Completion101"); @@ -127,6 +132,13 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) { } public DocValuesFormat getDocValuesFormatForField(String field) { + if (mapperService != null) { + Mapper mapper = mapperService.mappingLookup().getMapper(field); + if (mapper != null && KeywordFieldMapper.CONTENT_TYPE.equals(mapper.typeName())) { + return compressedBinaryDocValuesFormat; + } + } + if (useTSDBDocValuesFormat(field)) { return tsdbDocValuesFormat; } diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/BinaryDVCompressionMode.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/BinaryDVCompressionMode.java new file mode 100644 index 0000000000000..ce0f365eb529e --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/BinaryDVCompressionMode.java @@ -0,0 +1,29 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.codec.tsdb; + +public enum BinaryDVCompressionMode { + + NO_COMPRESS((byte) 0), + COMPRESSED_WITH_LZ4((byte) 1); + + public final byte code; + + BinaryDVCompressionMode(byte code) { + this.code = code; + } + + public static BinaryDVCompressionMode fromMode(byte mode) { + return switch (mode) { + case 0 -> NO_COMPRESS; + case 1 -> COMPRESSED_WITH_LZ4; + default -> throw new IllegalStateException("unknown compression mode [" + mode + "]"); + }; + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java index 968e50eaf32be..fce05c94a4d3e 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java @@ -9,10 +9,12 @@ package org.elasticsearch.index.codec.tsdb.es819; +import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.lucene90.IndexedDISI; import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValuesSkipIndexType; import org.apache.lucene.index.FieldInfo; @@ -29,6 +31,7 @@ import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.store.ByteBuffersDataOutput; import org.apache.lucene.store.ByteBuffersIndexOutput; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexOutput; @@ -41,8 +44,10 @@ import org.apache.lucene.util.packed.DirectMonotonicWriter; import org.apache.lucene.util.packed.PackedInts; import org.elasticsearch.core.IOUtils; +import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode; import org.elasticsearch.index.codec.tsdb.TSDBDocValuesEncoder; +import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -65,9 +70,12 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer { private final int minDocsPerOrdinalForOrdinalRangeEncoding; final boolean enableOptimizedMerge; private final int primarySortFieldNumber; + final SegmentWriteState state; + final BinaryDVCompressionMode binaryDVCompressionMode; ES819TSDBDocValuesConsumer( SegmentWriteState state, + BinaryDVCompressionMode binaryDVCompressionMode, int skipIndexIntervalSize, int minDocsPerOrdinalForOrdinalRangeEncoding, boolean enableOptimizedMerge, @@ -76,6 +84,8 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer { String metaCodec, String metaExtension ) throws IOException { + this.state = state; + this.binaryDVCompressionMode = binaryDVCompressionMode; this.termsDictBuffer = new byte[1 << 14]; this.dir = state.directory; this.minDocsPerOrdinalForOrdinalRangeEncoding = minDocsPerOrdinalForOrdinalRangeEncoding; @@ -315,7 +325,14 @@ public void mergeBinaryField(FieldInfo mergeFieldInfo, MergeState mergeState) th public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { meta.writeInt(field.number); meta.writeByte(ES819TSDBDocValuesFormat.BINARY); + meta.writeByte(binaryDVCompressionMode.code); + switch (binaryDVCompressionMode) { + case NO_COMPRESS -> doAddUncompressedBinary(field, valuesProducer); + case COMPRESSED_WITH_LZ4 -> doAddCompressedBinaryLZ4(field, valuesProducer); + } + } + public void doAddUncompressedBinary(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { if (valuesProducer instanceof TsdbDocValuesProducer tsdbValuesProducer && tsdbValuesProducer.mergeStats.supported()) { final int numDocsWithField = tsdbValuesProducer.mergeStats.sumNumDocsWithField(); final int minLength = tsdbValuesProducer.mergeStats.minLength(); @@ -444,6 +461,213 @@ public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) th } } + public void doAddCompressedBinaryLZ4(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { + try (CompressedBinaryBlockWriter blockWriter = new CompressedBinaryBlockWriter()) { + BinaryDocValues values = valuesProducer.getBinary(field); + long start = data.getFilePointer(); + meta.writeLong(start); // dataOffset + int numDocsWithField = 0; + int minLength = Integer.MAX_VALUE; + int maxLength = 0; + for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { + numDocsWithField++; + BytesRef v = values.binaryValue(); + blockWriter.addDoc(doc, v); + int length = v.length; + minLength = Math.min(length, minLength); + maxLength = Math.max(length, maxLength); + } + blockWriter.flushData(); + + assert numDocsWithField <= maxDoc; + meta.writeLong(data.getFilePointer() - start); // dataLength + + if (numDocsWithField == 0) { + meta.writeLong(-2); // docsWithFieldOffset + meta.writeLong(0L); // docsWithFieldLength + meta.writeShort((short) -1); // jumpTableEntryCount + meta.writeByte((byte) -1); // denseRankPower + } else if (numDocsWithField == maxDoc) { + meta.writeLong(-1); // docsWithFieldOffset + meta.writeLong(0L); // docsWithFieldLength + meta.writeShort((short) -1); // jumpTableEntryCount + meta.writeByte((byte) -1); // denseRankPower + } else { + long offset = data.getFilePointer(); + meta.writeLong(offset); // docsWithFieldOffset + values = valuesProducer.getBinary(field); + final short jumpTableEntryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER); + meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength + meta.writeShort(jumpTableEntryCount); + meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER); + } + + meta.writeInt(numDocsWithField); + meta.writeInt(minLength); + meta.writeInt(maxLength); + + blockWriter.writeMetaData(); + } + } + + static final int BINARY_BLOCK_SHIFT = 5; + static final int BINARY_DOCS_PER_COMPRESSED_BLOCK = 1 << BINARY_BLOCK_SHIFT; + + private class CompressedBinaryBlockWriter implements Closeable { + final LZ4.FastCompressionHashTable ht = new LZ4.FastCompressionHashTable(); + int uncompressedBlockLength = 0; + int maxUncompressedBlockLength = 0; + int numDocsInCurrentBlock = 0; + final int[] docLengths = new int[BINARY_DOCS_PER_COMPRESSED_BLOCK]; + byte[] block = BytesRef.EMPTY_BYTES; + int totalChunks = 0; + long maxPointer = 0; + final long blockAddressesStart; + + final IndexOutput tempBinaryOffsets; + + CompressedBinaryBlockWriter() throws IOException { + tempBinaryOffsets = EndiannessReverserUtil.createTempOutput( + state.directory, + state.segmentInfo.name, + "binary_pointers", + state.context + ); + boolean success = false; + try { + CodecUtil.writeHeader( + tempBinaryOffsets, + ES819TSDBDocValuesFormat.META_CODEC + "FilePointers", + ES819TSDBDocValuesFormat.VERSION_CURRENT + ); + blockAddressesStart = data.getFilePointer(); + success = true; + } finally { + if (success == false) { + IOUtils.closeWhileHandlingException(this); // self-close because constructor caller can't + } + } + } + + void addDoc(int doc, BytesRef v) throws IOException { + docLengths[numDocsInCurrentBlock] = v.length; + block = ArrayUtil.grow(block, uncompressedBlockLength + v.length); + System.arraycopy(v.bytes, v.offset, block, uncompressedBlockLength, v.length); + uncompressedBlockLength += v.length; + numDocsInCurrentBlock++; + if (numDocsInCurrentBlock == BINARY_DOCS_PER_COMPRESSED_BLOCK) { + flushData(); + } + } + + private void flushData() throws IOException { + if (numDocsInCurrentBlock > 0) { + // Write offset to this block to temporary offsets file + totalChunks++; + long thisBlockStartPointer = data.getFilePointer(); + + // Optimisation - check if all lengths are same + boolean allLengthsSame = true; + for (int i = 1; i < BINARY_DOCS_PER_COMPRESSED_BLOCK; i++) { + if (docLengths[i] != docLengths[i - 1]) { + allLengthsSame = false; + break; + } + } + if (allLengthsSame) { + // Only write one value shifted. Steal a bit to indicate all other lengths are the same + int onlyOneLength = (docLengths[0] << 1) | 1; + data.writeVInt(onlyOneLength); + } else { + for (int i = 0; i < BINARY_DOCS_PER_COMPRESSED_BLOCK; i++) { + if (i == 0) { + // Write first value shifted and steal a bit to indicate other lengths are to follow + int multipleLengths = (docLengths[0] << 1); + data.writeVInt(multipleLengths); + } else { + data.writeVInt(docLengths[i]); + } + } + } + maxUncompressedBlockLength = Math.max(maxUncompressedBlockLength, uncompressedBlockLength); + LZ4.compress(block, 0, uncompressedBlockLength, EndiannessReverserUtil.wrapDataOutput(data), ht); + numDocsInCurrentBlock = 0; + // Ensure initialized with zeroes because full array is always written + Arrays.fill(docLengths, 0); + uncompressedBlockLength = 0; + maxPointer = data.getFilePointer(); + tempBinaryOffsets.writeVLong(maxPointer - thisBlockStartPointer); + } + } + + void writeMetaData() throws IOException { + if (totalChunks == 0) { + return; + } + + long startDMW = data.getFilePointer(); + meta.writeLong(startDMW); + + meta.writeVInt(totalChunks); + meta.writeVInt(BINARY_BLOCK_SHIFT); + meta.writeVInt(maxUncompressedBlockLength); + meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT); + + CodecUtil.writeFooter(tempBinaryOffsets); + IOUtils.close(tempBinaryOffsets); + // write the compressed block offsets info to the meta file by reading from temp file + try ( + ChecksumIndexInput filePointersIn = EndiannessReverserUtil.openChecksumInput( + state.directory, + tempBinaryOffsets.getName(), + IOContext.READONCE + ) + ) { + CodecUtil.checkHeader( + filePointersIn, + ES819TSDBDocValuesFormat.META_CODEC + "FilePointers", + ES819TSDBDocValuesFormat.VERSION_CURRENT, + ES819TSDBDocValuesFormat.VERSION_CURRENT + ); + Throwable priorE = null; + try { + final DirectMonotonicWriter filePointers = DirectMonotonicWriter.getInstance( + meta, + data, + totalChunks, + ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT + ); + long fp = blockAddressesStart; + for (int i = 0; i < totalChunks; ++i) { + filePointers.add(fp); + fp += filePointersIn.readVLong(); + } + if (maxPointer < fp) { + throw new CorruptIndexException( + "File pointers don't add up (" + fp + " vs expected " + maxPointer + ")", + filePointersIn + ); + } + filePointers.finish(); + } catch (Throwable e) { + priorE = e; + } finally { + CodecUtil.checkFooter(filePointersIn, priorE); + } + } + // Write the length of the DMW block in the data + meta.writeLong(data.getFilePointer() - startDMW); + } + + @Override + public void close() throws IOException { + if (tempBinaryOffsets != null) { + IOUtils.close(tempBinaryOffsets, () -> state.directory.deleteFile(tempBinaryOffsets.getName())); + } + } + } + // END: Copied fom LUCENE-9211 + @Override public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { meta.writeInt(field.number); diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java index fbdef488b8318..59eadd7825ec2 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormat.java @@ -14,6 +14,7 @@ import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.elasticsearch.core.SuppressForbidden; +import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode; import java.io.IOException; @@ -47,7 +48,8 @@ public class ES819TSDBDocValuesFormat extends org.apache.lucene.codecs.DocValues static final byte SORTED_NUMERIC = 4; static final int VERSION_START = 0; - static final int VERSION_CURRENT = VERSION_START; + static final int VERSION_BINARY_DV_COMPRESSION = 1; + static final int VERSION_CURRENT = VERSION_BINARY_DV_COMPRESSION; static final int TERMS_DICT_BLOCK_LZ4_SHIFT = 6; static final int TERMS_DICT_BLOCK_LZ4_SIZE = 1 << TERMS_DICT_BLOCK_LZ4_SHIFT; @@ -119,15 +121,36 @@ private static boolean getOptimizedMergeEnabledDefault() { final int skipIndexIntervalSize; final int minDocsPerOrdinalForRangeEncoding; private final boolean enableOptimizedMerge; + final BinaryDVCompressionMode binaryDVCompressionMode; /** Default constructor. */ public ES819TSDBDocValuesFormat() { - this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE, ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, OPTIMIZED_MERGE_ENABLE_DEFAULT); + this( + DEFAULT_SKIP_INDEX_INTERVAL_SIZE, + ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, + OPTIMIZED_MERGE_ENABLE_DEFAULT, + BinaryDVCompressionMode.COMPRESSED_WITH_LZ4 + ); + } + + public ES819TSDBDocValuesFormat(BinaryDVCompressionMode binaryDVCompressionMode) { + this( + DEFAULT_SKIP_INDEX_INTERVAL_SIZE, + ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL, + OPTIMIZED_MERGE_ENABLE_DEFAULT, + binaryDVCompressionMode + ); } /** Doc values fields format with specified skipIndexIntervalSize. */ - public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, int minDocsPerOrdinalForRangeEncoding, boolean enableOptimizedMerge) { + public ES819TSDBDocValuesFormat( + int skipIndexIntervalSize, + int minDocsPerOrdinalForRangeEncoding, + boolean enableOptimizedMerge, + BinaryDVCompressionMode binaryDVCompressionMode + ) { super(CODEC_NAME); + this.binaryDVCompressionMode = binaryDVCompressionMode; if (skipIndexIntervalSize < 2) { throw new IllegalArgumentException("skipIndexIntervalSize must be > 1, got [" + skipIndexIntervalSize + "]"); } @@ -140,6 +163,7 @@ public ES819TSDBDocValuesFormat(int skipIndexIntervalSize, int minDocsPerOrdinal public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException { return new ES819TSDBDocValuesConsumer( state, + binaryDVCompressionMode, skipIndexIntervalSize, minDocsPerOrdinalForRangeEncoding, enableOptimizedMerge, diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index 5d90f2814853d..00e5667a5d177 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -9,6 +9,7 @@ package org.elasticsearch.index.codec.tsdb.es819; +import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.lucene90.IndexedDISI; @@ -45,12 +46,15 @@ import org.apache.lucene.util.packed.PackedInts; import org.elasticsearch.core.Assertions; import org.elasticsearch.core.IOUtils; +import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode; import org.elasticsearch.index.codec.tsdb.TSDBDocValuesEncoder; import org.elasticsearch.index.mapper.BlockDocValuesReader; import org.elasticsearch.index.mapper.BlockLoader; import java.io.IOException; +import static org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode.COMPRESSED_WITH_LZ4; +import static org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode.NO_COMPRESS; import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.SKIP_INDEX_JUMP_LENGTH_PER_LEVEL; import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.SKIP_INDEX_MAX_LEVEL; import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.TERMS_DICT_BLOCK_LZ4_SHIFT; @@ -97,7 +101,7 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { state.segmentSuffix ); - readFields(in, state.fieldInfos); + readFields(in, state.fieldInfos, version); } catch (Throwable exception) { priorE = exception; @@ -193,6 +197,13 @@ public BinaryDocValues getBinary(FieldInfo field) throws IOException { return DocValues.emptyBinary(); } + return switch (entry.compression) { + case NO_COMPRESS -> getUncompressedBinary(entry); + case COMPRESSED_WITH_LZ4 -> getCompressedBinary(entry); + }; + } + + public BinaryDocValues getUncompressedBinary(BinaryEntry entry) throws IOException { final RandomAccessInput bytesSlice = data.randomAccessSlice(entry.dataOffset, entry.dataLength); if (entry.docsWithFieldOffset == -1) { @@ -208,6 +219,26 @@ public BytesRef binaryValue() throws IOException { bytesSlice.readBytes((long) doc * length, bytes.bytes, 0, length); return bytes; } + + @Override + public BlockLoader.Block tryRead( + BlockLoader.BlockFactory factory, + BlockLoader.Docs docs, + int offset, + boolean nullsFiltered, + BlockDocValuesReader.ToDouble toDouble, + boolean toInt + ) throws IOException { + int count = docs.count() - offset; + try (var builder = factory.bytesRefs(count)) { + for (int i = offset; i < docs.count(); i++) { + doc = docs.get(i); + bytesSlice.readBytes((long) doc * length, bytes.bytes, 0, length); + builder.appendBytesRef(bytes); + } + return builder.build(); + } + } }; } else { // variable length @@ -223,6 +254,28 @@ public BytesRef binaryValue() throws IOException { bytesSlice.readBytes(startOffset, bytes.bytes, 0, bytes.length); return bytes; } + + @Override + public BlockLoader.Block tryRead( + BlockLoader.BlockFactory factory, + BlockLoader.Docs docs, + int offset, + boolean nullsFiltered, + BlockDocValuesReader.ToDouble toDouble, + boolean toInt + ) throws IOException { + int count = docs.count() - offset; + try (var builder = factory.bytesRefs(count)) { + for (int i = offset; i < docs.count(); i++) { + doc = docs.get(i); + long startOffset = addresses.get(doc); + bytes.length = (int) (addresses.get(doc + 1L) - startOffset); + bytesSlice.readBytes(startOffset, bytes.bytes, 0, bytes.length); + builder.appendBytesRef(bytes); + } + return builder.build(); + } + } }; } } else { @@ -267,7 +320,146 @@ public BytesRef binaryValue() throws IOException { } } - private abstract static class DenseBinaryDocValues extends BinaryDocValues { + // START: Copied fom LUCENE-9211 + private BinaryDocValues getCompressedBinary(BinaryEntry entry) throws IOException { + if (entry.docsWithFieldOffset == -1) { + // dense + final RandomAccessInput addressesData = this.data.randomAccessSlice(entry.addressesOffset, entry.addressesLength); + final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData); + return new DenseBinaryDocValues(maxDoc) { + final BinaryDecoder decoder = new BinaryDecoder( + addresses, + data.clone(), + entry.maxUncompressedChunkSize, + entry.docsPerChunkShift + ); + + @Override + public BytesRef binaryValue() throws IOException { + return decoder.decode(doc); + } + + @Override + public BlockLoader.Block tryRead( + BlockLoader.BlockFactory factory, + BlockLoader.Docs docs, + int offset, + boolean nullsFiltered, + BlockDocValuesReader.ToDouble toDouble, + boolean toInt + ) throws IOException { + return null; + } + }; + } else { + // sparse + final IndexedDISI disi = new IndexedDISI( + data, + entry.docsWithFieldOffset, + entry.docsWithFieldLength, + entry.jumpTableEntryCount, + entry.denseRankPower, + entry.numDocsWithField + ); + final RandomAccessInput addressesData = this.data.randomAccessSlice(entry.addressesOffset, entry.addressesLength); + final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData); + return new SparseBinaryDocValues(disi) { + final BinaryDecoder decoder = new BinaryDecoder( + addresses, + data.clone(), + entry.maxUncompressedChunkSize, + entry.docsPerChunkShift + ); + + @Override + public BytesRef binaryValue() throws IOException { + return decoder.decode(disi.index()); + } + }; + } + + } + + // Decompresses blocks of binary values to retrieve content + static final class BinaryDecoder { + + private final LongValues addresses; + private final IndexInput compressedData; + // Cache of last uncompressed block + private long lastBlockId = -1; + private final int[] uncompressedDocStarts; + private final byte[] uncompressedBlock; + private final BytesRef uncompressedBytesRef; + private final int docsPerChunk; + private final int docsPerChunkShift; + + BinaryDecoder(LongValues addresses, IndexInput compressedData, int biggestUncompressedBlockSize, int docsPerChunkShift) { + super(); + this.addresses = addresses; + this.compressedData = compressedData; + // pre-allocate a byte array large enough for the biggest uncompressed block needed. + this.uncompressedBlock = new byte[biggestUncompressedBlockSize]; + uncompressedBytesRef = new BytesRef(uncompressedBlock); + this.docsPerChunk = 1 << docsPerChunkShift; + this.docsPerChunkShift = docsPerChunkShift; + uncompressedDocStarts = new int[docsPerChunk + 1]; + } + + BytesRef decode(int docNumber) throws IOException { + int blockId = docNumber >> docsPerChunkShift; + int docInBlockId = docNumber % docsPerChunk; + assert docInBlockId < docsPerChunk; + + // already read and uncompressed? + if (blockId != lastBlockId) { + lastBlockId = blockId; + long blockStartOffset = addresses.get(blockId); + compressedData.seek(blockStartOffset); + + int uncompressedBlockLength = 0; + + int onlyLength = -1; + for (int i = 0; i < docsPerChunk; i++) { + if (i == 0) { + // The first length value is special. It is shifted and has a bit to denote if + // all other values are the same length + int lengthPlusSameInd = compressedData.readVInt(); + int sameIndicator = lengthPlusSameInd & 1; + int firstValLength = lengthPlusSameInd >>> 1; + if (sameIndicator == 1) { + onlyLength = firstValLength; + } + uncompressedBlockLength += firstValLength; + } else { + if (onlyLength == -1) { + // Various lengths are stored - read each from disk + uncompressedBlockLength += compressedData.readVInt(); + } else { + // Only one length + uncompressedBlockLength += onlyLength; + } + } + uncompressedDocStarts[i + 1] = uncompressedBlockLength; + } + + if (uncompressedBlockLength == 0) { + uncompressedBytesRef.offset = 0; + uncompressedBytesRef.length = 0; + return uncompressedBytesRef; + } + + assert uncompressedBlockLength <= uncompressedBlock.length; + LZ4.decompress(EndiannessReverserUtil.wrapDataInput(compressedData), uncompressedBlockLength, uncompressedBlock, 0); + } + + uncompressedBytesRef.offset = uncompressedDocStarts[docInBlockId]; + uncompressedBytesRef.length = uncompressedDocStarts[docInBlockId + 1] - uncompressedBytesRef.offset; + return uncompressedBytesRef; + } + } + + // END: Copied fom LUCENE-9211 + private abstract static class DenseBinaryDocValues extends BinaryDocValues implements BlockLoader.OptionalColumnAtATimeReader { final int maxDoc; int doc = -1; @@ -1087,7 +1279,7 @@ static int primarySortFieldNumber(SegmentInfo segmentInfo, FieldInfos fieldInfos return -1; } - private void readFields(IndexInput meta, FieldInfos infos) throws IOException { + private void readFields(IndexInput meta, FieldInfos infos, int version) throws IOException { for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) { FieldInfo info = infos.fieldInfo(fieldNumber); if (info == null) { @@ -1100,7 +1292,7 @@ private void readFields(IndexInput meta, FieldInfos infos) throws IOException { if (type == ES819TSDBDocValuesFormat.NUMERIC) { numerics.put(info.number, readNumeric(meta)); } else if (type == ES819TSDBDocValuesFormat.BINARY) { - binaries.put(info.number, readBinary(meta)); + binaries.put(info.number, readBinary(meta, version)); } else if (type == ES819TSDBDocValuesFormat.SORTED) { sorted.put(info.number, readSorted(meta)); } else if (type == ES819TSDBDocValuesFormat.SORTED_SET) { @@ -1162,8 +1354,15 @@ private static void readNumeric(IndexInput meta, NumericEntry entry) throws IOEx entry.denseRankPower = meta.readByte(); } - private BinaryEntry readBinary(IndexInput meta) throws IOException { - final BinaryEntry entry = new BinaryEntry(); + private BinaryEntry readBinary(IndexInput meta, int version) throws IOException { + final BinaryDVCompressionMode compression; + if (version >= ES819TSDBDocValuesFormat.VERSION_BINARY_DV_COMPRESSION) { + compression = BinaryDVCompressionMode.fromMode(meta.readByte()); + } else { + compression = BinaryDVCompressionMode.NO_COMPRESS; + } + final BinaryEntry entry = new BinaryEntry(compression); + entry.dataOffset = meta.readLong(); entry.dataLength = meta.readLong(); entry.docsWithFieldOffset = meta.readLong(); @@ -1173,15 +1372,27 @@ private BinaryEntry readBinary(IndexInput meta) throws IOException { entry.numDocsWithField = meta.readInt(); entry.minLength = meta.readInt(); entry.maxLength = meta.readInt(); - if (entry.minLength < entry.maxLength) { - entry.addressesOffset = meta.readLong(); - - // Old count of uncompressed addresses - long numAddresses = entry.numDocsWithField + 1L; - - final int blockShift = meta.readVInt(); - entry.addressesMeta = DirectMonotonicReader.loadMeta(meta, numAddresses, blockShift); - entry.addressesLength = meta.readLong(); + if (compression == BinaryDVCompressionMode.NO_COMPRESS) { + if (entry.minLength < entry.maxLength) { + entry.addressesOffset = meta.readLong(); + // Old count of uncompressed addresses + long numAddresses = entry.numDocsWithField + 1L; + final int blockShift = meta.readVInt(); + entry.addressesMeta = DirectMonotonicReader.loadMeta(meta, numAddresses, blockShift); + entry.addressesLength = meta.readLong(); + } + } else { + if (entry.numDocsWithField > 0 || entry.minLength < entry.maxLength) { + entry.addressesOffset = meta.readLong(); + // New count of compressed addresses - the number of compresseed blocks + int numCompressedChunks = meta.readVInt(); + entry.docsPerChunkShift = meta.readVInt(); + entry.maxUncompressedChunkSize = meta.readVInt(); + + final int blockShift = meta.readVInt(); + entry.addressesMeta = DirectMonotonicReader.loadMeta(meta, numCompressedChunks, blockShift); + entry.addressesLength = meta.readLong(); + } } return entry; } @@ -1846,6 +2057,8 @@ static class NumericEntry { } static class BinaryEntry { + final BinaryDVCompressionMode compression; + long dataOffset; long dataLength; long docsWithFieldOffset; @@ -1857,7 +2070,14 @@ static class BinaryEntry { int maxLength; long addressesOffset; long addressesLength; + // compression mode + int maxUncompressedChunkSize; + int docsPerChunkShift; DirectMonotonicReader.Meta addressesMeta; + + BinaryEntry(BinaryDVCompressionMode compression) { + this.compression = compression; + } } static class SortedNumericEntry extends NumericEntry { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java index 457c90383b5d2..de35a85e8878c 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java @@ -925,6 +925,28 @@ public AllReader reader(LeafReaderContext context) throws IOException { } } + public static class BytesRefsFromBinaryBlockLoader extends DocValuesBlockLoader { + private final String fieldName; + + public BytesRefsFromBinaryBlockLoader(String fieldName) { + this.fieldName = fieldName; + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.bytesRefs(expectedCount); + } + + @Override + public AllReader reader(LeafReaderContext context) throws IOException { + BinaryDocValues docValues = context.reader().getBinaryDocValues(fieldName); + if (docValues == null) { + return new ConstantNullsReader(); + } + return new BytesRefsFromBinary(docValues); + } + } + abstract static class AbstractBytesRefsFromBinary extends BlockDocValuesReader { protected final BinaryDocValues docValues; @@ -1010,6 +1032,17 @@ public BytesRefsFromBinary(BinaryDocValues docValues) { super(docValues); } + @Override + public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset, boolean nullsFiltered) throws IOException { + if (docValues instanceof BlockLoader.OptionalColumnAtATimeReader direct) { + BlockLoader.Block block = direct.tryRead(factory, docs, offset, nullsFiltered, null, false); + if (block != null) { + return block; + } + } + return super.read(factory, docs, offset, nullsFiltered); + } + @Override void read(int doc, BytesRefBuilder builder) throws IOException { if (false == docValues.advanceExact(doc)) { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index cf3fad86812f5..1ffa4a67bd248 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -13,15 +13,18 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.InvertableType; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DocValuesSkipIndexType; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiTerms; import org.apache.lucene.index.Term; @@ -52,10 +55,12 @@ import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData; import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData; +import org.elasticsearch.index.fielddata.plain.BinaryIndexFieldData; import org.elasticsearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData; import org.elasticsearch.index.query.AutomatonQueryWithDescription; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.similarity.SimilarityProvider; +import org.elasticsearch.script.BinaryDocValuesStringFieldScript; import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptCompiler; import org.elasticsearch.script.SortedSetDocValuesStringFieldScript; @@ -90,6 +95,7 @@ import static org.apache.lucene.index.IndexWriter.MAX_TERM_LENGTH; import static org.elasticsearch.core.Strings.format; import static org.elasticsearch.index.IndexSettings.IGNORE_ABOVE_SETTING; +import static org.elasticsearch.index.IndexSettings.USE_BINARY_DOC_VALUES; import static org.elasticsearch.index.IndexSettings.USE_DOC_VALUES_SKIPPER; import static org.elasticsearch.index.mapper.FieldArrayContext.getOffsetsFieldName; import static org.elasticsearch.index.mapper.Mapper.IgnoreAbove.getIgnoreAboveDefaultValue; @@ -108,6 +114,8 @@ public static class Defaults { public static final FieldType FIELD_TYPE; public static final FieldType FIELD_TYPE_WITH_SKIP_DOC_VALUES; + public static final FieldType FIELD_TYPE_WITH_BINARY_DOC_VALUES; + static { FieldType ft = new FieldType(); ft.setTokenized(false); @@ -127,6 +135,15 @@ public static class Defaults { FIELD_TYPE_WITH_SKIP_DOC_VALUES = freezeAndDeduplicateFieldType(ft); } + static { + FieldType ft = new FieldType(); + ft.setTokenized(false); + ft.setOmitNorms(true); + ft.setIndexOptions(IndexOptions.NONE); + ft.setDocValuesType(DocValuesType.BINARY); + FIELD_TYPE_WITH_BINARY_DOC_VALUES = freezeAndDeduplicateFieldType(ft); + } + public static final TextSearchInfo TEXT_SEARCH_INFO = new TextSearchInfo( FIELD_TYPE, null, @@ -214,6 +231,7 @@ public static final class Builder extends FieldMapper.DimensionBuilder { private final boolean forceDocValuesSkipper; private final SourceKeepMode indexSourceKeepMode; private final boolean isWithinMultiField; + private final boolean useBinaryDocValues; public Builder(final String name, final MappingParserContext mappingParserContext) { this( @@ -227,7 +245,8 @@ public Builder(final String name, final MappingParserContext mappingParserContex USE_DOC_VALUES_SKIPPER.get(mappingParserContext.getSettings()), false, mappingParserContext.getIndexSettings().sourceKeepMode(), - mappingParserContext.isWithinMultiField() + mappingParserContext.isWithinMultiField(), + USE_BINARY_DOC_VALUES.get(mappingParserContext.getSettings()) ); } @@ -250,7 +269,8 @@ public Builder(final String name, final MappingParserContext mappingParserContex false, false, sourceKeepMode, - isWithinMultiField + isWithinMultiField, + false ); } @@ -265,7 +285,8 @@ private Builder( boolean enableDocValuesSkipper, boolean forceDocValuesSkipper, SourceKeepMode indexSourceKeepMode, - boolean isWithinMultiField + boolean isWithinMultiField, + boolean binaryDocValuesEnabled ) { super(name); this.indexAnalyzers = indexAnalyzers; @@ -301,6 +322,7 @@ private Builder( this.forceDocValuesSkipper = forceDocValuesSkipper; this.indexSourceKeepMode = indexSourceKeepMode; this.isWithinMultiField = isWithinMultiField; + this.useBinaryDocValues = binaryDocValuesEnabled; } public Builder(String name, IndexVersion indexCreatedVersion) { @@ -330,7 +352,8 @@ public static Builder buildWithDocValuesSkipper( enableDocValuesSkipper, true, SourceKeepMode.NONE, - isWithinMultiField + isWithinMultiField, + false ); } @@ -413,7 +436,7 @@ protected Parameter[] getParameters() { dimension }; } - private KeywordFieldType buildFieldType(MapperBuilderContext context, FieldType fieldType) { + private KeywordFieldType buildFieldType(MapperBuilderContext context, FieldType fieldType, boolean useBinaryDocValues) { NamedAnalyzer normalizer = Lucene.KEYWORD_ANALYZER; NamedAnalyzer searchAnalyzer = Lucene.KEYWORD_ANALYZER; NamedAnalyzer quoteAnalyzer = Lucene.KEYWORD_ANALYZER; @@ -448,20 +471,25 @@ private KeywordFieldType buildFieldType(MapperBuilderContext context, FieldType searchAnalyzer, quoteAnalyzer, this, - context.isSourceSynthetic() + context.isSourceSynthetic(), + useBinaryDocValues ); } @Override public KeywordFieldMapper build(MapperBuilderContext context) { + String fullName = context.buildFullName(leafName()); + // Index sorting by binary doc values not support (yet): + boolean useBinaryDocValues = fullName.equals("host.name") == false && this.useBinaryDocValues; FieldType fieldtype = resolveFieldType( + useBinaryDocValues, enableDocValuesSkipper, forceDocValuesSkipper, hasDocValues, indexCreatedVersion, indexSortConfig, indexMode, - context.buildFullName(leafName()) + fullName ); fieldtype.setOmitNorms(this.hasNorms.getValue() == false); fieldtype.setStored(this.stored.getValue()); @@ -492,15 +520,17 @@ public KeywordFieldMapper build(MapperBuilderContext context) { return new KeywordFieldMapper( leafName(), fieldtype, - buildFieldType(context, fieldtype), + buildFieldType(context, fieldtype, useBinaryDocValues), builderParams(this, context), this, offsetsFieldName, - indexSourceKeepMode + indexSourceKeepMode, + useBinaryDocValues ); } private static FieldType resolveFieldType( + final boolean useBinaryDocValues, final boolean enableDocValuesSkipper, final boolean forceDocValuesSkipper, final Parameter hasDocValues, @@ -509,6 +539,10 @@ private static FieldType resolveFieldType( final IndexMode indexMode, final String fullFieldName ) { + if (useBinaryDocValues) { + return new FieldType(Defaults.FIELD_TYPE_WITH_BINARY_DOC_VALUES); + } + if (enableDocValuesSkipper) { if (forceDocValuesSkipper) { assert hasDocValues.getValue(); @@ -553,6 +587,7 @@ public static final class KeywordFieldType extends TextFamilyFieldType { private final boolean isDimension; private final IndexSortConfig indexSortConfig; private final boolean hasDocValuesSkipper; + private final boolean useBinaryDocValues; public KeywordFieldType( String name, @@ -561,7 +596,8 @@ public KeywordFieldType( NamedAnalyzer searchAnalyzer, NamedAnalyzer quoteAnalyzer, Builder builder, - boolean isSyntheticSource + boolean isSyntheticSource, + boolean useBinaryDocValues ) { super( name, @@ -581,6 +617,7 @@ public KeywordFieldType( this.isDimension = builder.dimension.getValue(); this.indexSortConfig = builder.indexSortConfig; this.hasDocValuesSkipper = DocValuesSkipIndexType.NONE.equals(fieldType.docValuesSkipIndexType()) == false; + this.useBinaryDocValues = useBinaryDocValues; } public KeywordFieldType(String name) { @@ -597,6 +634,7 @@ public KeywordFieldType(String name, boolean isIndexed, boolean hasDocValues, Ma this.isDimension = false; this.indexSortConfig = null; this.hasDocValuesSkipper = false; + this.useBinaryDocValues = false; } public KeywordFieldType(String name, FieldType fieldType) { @@ -618,6 +656,7 @@ public KeywordFieldType(String name, FieldType fieldType) { this.isDimension = false; this.indexSortConfig = null; this.hasDocValuesSkipper = DocValuesSkipIndexType.NONE.equals(fieldType.docValuesSkipIndexType()) == false; + this.useBinaryDocValues = false; } public KeywordFieldType(String name, NamedAnalyzer analyzer) { @@ -639,6 +678,7 @@ public KeywordFieldType(String name, NamedAnalyzer analyzer) { this.isDimension = false; this.indexSortConfig = null; this.hasDocValuesSkipper = false; + this.useBinaryDocValues = false; } @Override @@ -799,6 +839,10 @@ NamedAnalyzer normalizer() { @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { + if (useBinaryDocValues) { + return new BlockDocValuesReader.BytesRefsFromBinaryBlockLoader(name()); + } + if (hasDocValues() && (blContext.fieldExtractPreference() != FieldExtractPreference.STORED || isSyntheticSourceEnabled())) { return new BlockDocValuesReader.BytesRefsFromOrdsBlockLoader(name()); } @@ -914,7 +958,10 @@ protected BytesRef storedToBytesRef(Object stored) { ); } - private SortedSetOrdinalsIndexFieldData.Builder fieldDataFromDocValues() { + private IndexFieldData.Builder fieldDataFromDocValues() { + if (useBinaryDocValues) { + return new BinaryIndexFieldData.Builder(name(), CoreValuesSourceType.KEYWORD); + } return new SortedSetOrdinalsIndexFieldData.Builder( name(), CoreValuesSourceType.KEYWORD, @@ -999,7 +1046,9 @@ public Query wildcardQuery( } return new StringScriptFieldWildcardQuery( new Script(""), - ctx -> new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx), + ctx -> useBinaryDocValues + ? new BinaryDocValuesStringFieldScript(name(), context.lookup(), ctx) + : new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx), name(), value, caseInsensitive @@ -1119,6 +1168,7 @@ public Query automatonQuery( private final boolean forceDocValuesSkipper; private final String offsetsFieldName; private final SourceKeepMode indexSourceKeepMode; + private final boolean useBinaryDocValues; private KeywordFieldMapper( String simpleName, @@ -1127,7 +1177,8 @@ private KeywordFieldMapper( BuilderParams builderParams, Builder builder, String offsetsFieldName, - SourceKeepMode indexSourceKeepMode + SourceKeepMode indexSourceKeepMode, + boolean useBinaryDocValues ) { super(simpleName, mappedFieldType, builderParams); assert fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) <= 0; @@ -1148,6 +1199,7 @@ private KeywordFieldMapper( this.forceDocValuesSkipper = builder.forceDocValuesSkipper; this.offsetsFieldName = offsetsFieldName; this.indexSourceKeepMode = indexSourceKeepMode; + this.useBinaryDocValues = useBinaryDocValues; } @Override @@ -1210,6 +1262,13 @@ private boolean indexValue(DocumentParserContext context, XContentString value) return false; } + if (useBinaryDocValues) { + var utfBytes = value.bytes(); + var binaryValue = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length()); + context.doc().add(new BinaryDocValuesField(fieldType().name(), binaryValue)); + return true; + } + // if the value's length exceeds ignore_above, then don't index it if (fieldType().ignoreAbove().isIgnored(value)) { context.addIgnoredField(fullPath()); @@ -1316,7 +1375,8 @@ public FieldMapper.Builder getMergeBuilder() { enableDocValuesSkipper, forceDocValuesSkipper, indexSourceKeepMode, - fieldType().isWithinMultiField() + fieldType().isWithinMultiField(), + useBinaryDocValues ).dimension(fieldType().isDimension()).init(this); } @@ -1365,22 +1425,26 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException { } }); } else if (hasDocValues) { - if (offsetsFieldName != null) { - layers.add(new SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(fullPath(), offsetsFieldName)); + if (useBinaryDocValues) { + layers.add(new BinarySyntheticFieldLoader()); } else { - layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) { - - @Override - protected BytesRef convert(BytesRef value) { - return value; - } - - @Override - protected BytesRef preserve(BytesRef value) { - // Preserve must make a deep copy because convert gets a shallow copy from the iterator - return BytesRef.deepCopyOf(value); - } - }); + if (offsetsFieldName != null) { + layers.add(new SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(fullPath(), offsetsFieldName)); + } else { + layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) { + + @Override + protected BytesRef convert(BytesRef value) { + return value; + } + + @Override + protected BytesRef preserve(BytesRef value) { + // Preserve must make a deep copy because convert gets a shallow copy from the iterator + return BytesRef.deepCopyOf(value); + } + }); + } } } @@ -1399,4 +1463,50 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException { return new CompositeSyntheticFieldLoader(leafFieldName, fullFieldName, layers); } + + final class BinarySyntheticFieldLoader implements CompositeSyntheticFieldLoader.DocValuesLayer { + private int docValueCount; + private BytesRef docValueBytes; + + @Override + public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException { + BinaryDocValues values = leafReader.getBinaryDocValues(fullPath()); + if (values == null) { + docValueCount = 0; + return null; + } + + return docId -> { + if (values.advanceExact(docId) == false) { + docValueCount = 0; + return hasValue(); + } + docValueBytes = BytesRef.deepCopyOf(values.binaryValue()); + docValueCount = 1; + return hasValue(); + }; + } + + @Override + public boolean hasValue() { + return docValueCount > 0; + } + + @Override + public long valueCount() { + return docValueCount; + } + + @Override + public void write(XContentBuilder b) throws IOException { + if (hasValue()) { + b.utf8Value(docValueBytes.bytes, docValueBytes.offset, docValueBytes.length); + } + } + + @Override + public String fieldName() { + return fullPath(); + } + } } diff --git a/server/src/main/java/org/elasticsearch/script/BinaryDocValuesStringFieldScript.java b/server/src/main/java/org/elasticsearch/script/BinaryDocValuesStringFieldScript.java new file mode 100644 index 0000000000000..be5aba49ec820 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/script/BinaryDocValuesStringFieldScript.java @@ -0,0 +1,56 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.script; + +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.index.mapper.OnScriptError; +import org.elasticsearch.search.lookup.SearchLookup; + +import java.io.IOException; +import java.util.Map; + +public class BinaryDocValuesStringFieldScript extends StringFieldScript { + private final BinaryDocValues binaryDocValues; + + boolean hasValue = false; + + public BinaryDocValuesStringFieldScript(String fieldName, SearchLookup searchLookup, LeafReaderContext ctx) { + super(fieldName, Map.of(), searchLookup, OnScriptError.FAIL, ctx); + try { + binaryDocValues = DocValues.getBinary(ctx.reader(), fieldName); + } catch (IOException e) { + throw new IllegalStateException("Cannot load doc values", e); + } + } + + @Override + public void setDocument(int docID) { + try { + hasValue = binaryDocValues.advanceExact(docID); + } catch (IOException e) { + throw new IllegalStateException("Cannot load doc values", e); + } + } + + @Override + public void execute() { + try { + if (hasValue) { + BytesRef bytesRef = binaryDocValues.binaryValue(); + emit(bytesRef.utf8ToString()); + } + } catch (IOException e) { + throw new IllegalStateException("Cannot load doc values", e); + } + } +} diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java index ee9351ed51b97..1efe9c3f0bf36 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java @@ -29,6 +29,7 @@ import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec; import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormatTests.TestES87TSDBDocValuesFormat; import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat; +import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormatTests; import org.elasticsearch.test.ESTestCase; import java.io.IOException; @@ -61,7 +62,8 @@ public void testDuel() throws IOException { ? new ES819TSDBDocValuesFormat( ESTestCase.randomIntBetween(1, 4096), ESTestCase.randomIntBetween(1, 512), - random().nextBoolean() + random().nextBoolean(), + ES819TSDBDocValuesFormatTests.randomCompressionMode() ) : new TestES87TSDBDocValuesFormat(); diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java index d2c8aae601977..23f85da450431 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java @@ -42,6 +42,7 @@ import org.elasticsearch.index.codec.perfield.XPerFieldDocValuesFormat; import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormatTests.TestES87TSDBDocValuesFormat; import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat; +import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormatTests; import org.elasticsearch.test.ESTestCase; import org.hamcrest.Matchers; @@ -291,7 +292,8 @@ public void testEncodeOrdinalRange() throws IOException { new ES819TSDBDocValuesFormat( random().nextInt(16, 128), nextOrdinalRangeThreshold.getAsInt(), - random().nextBoolean() + random().nextBoolean(), + ES819TSDBDocValuesFormatTests.randomCompressionMode() ) ) ); diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java index 003124ab4b6f4..e37ec4e1b95c5 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java @@ -42,6 +42,7 @@ import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec; import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec; +import org.elasticsearch.index.codec.tsdb.BinaryDVCompressionMode; import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormatTests; import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.BaseDenseNumericValues; import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.BaseSortedDocValues; @@ -68,12 +69,18 @@ public class ES819TSDBDocValuesFormatTests extends ES87TSDBDocValuesFormatTests { + public static BinaryDVCompressionMode randomCompressionMode() { + BinaryDVCompressionMode[] modes = BinaryDVCompressionMode.values(); + return modes[random().nextInt(modes.length)]; + } + private final Codec codec = new Elasticsearch92Lucene103Codec() { final ES819TSDBDocValuesFormat docValuesFormat = new ES819TSDBDocValuesFormat( ESTestCase.randomIntBetween(2, 4096), ESTestCase.randomIntBetween(1, 512), - random().nextBoolean() + random().nextBoolean(), + randomCompressionMode() ); @Override diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java index 247b75f2977b5..2587942febb14 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatVariableSkipIntervalTests.java @@ -19,14 +19,24 @@ public class ES819TSDBDocValuesFormatVariableSkipIntervalTests extends ES87TSDBD protected Codec getCodec() { // small interval size to test with many intervals return TestUtil.alwaysDocValuesFormat( - new ES819TSDBDocValuesFormat(random().nextInt(4, 16), random().nextInt(1, 32), random().nextBoolean()) + new ES819TSDBDocValuesFormat( + random().nextInt(4, 16), + random().nextInt(1, 32), + random().nextBoolean(), + ES819TSDBDocValuesFormatTests.randomCompressionMode() + ) ); } public void testSkipIndexIntervalSize() { IllegalArgumentException ex = expectThrows( IllegalArgumentException.class, - () -> new ES819TSDBDocValuesFormat(random().nextInt(Integer.MIN_VALUE, 2), random().nextInt(1, 32), random().nextBoolean()) + () -> new ES819TSDBDocValuesFormat( + random().nextInt(Integer.MIN_VALUE, 2), + random().nextInt(1, 32), + random().nextBoolean(), + ES819TSDBDocValuesFormatTests.randomCompressionMode() + ) ); assertTrue(ex.getMessage().contains("skipIndexIntervalSize must be > 1")); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java index 815f19ec2cfb6..ed0038b4645a5 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java @@ -322,7 +322,8 @@ public void testIgnoreAboveIndexLevelSetting() { mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); // when/then @@ -354,7 +355,8 @@ public void testIgnoreAboveIsSetReturnsTrueWhenIgnoreAboveIsGiven() { mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); // when/then @@ -385,7 +387,8 @@ public void testIgnoreAboveIsSetReturnsFalseWhenIgnoreAboveIsNotGiven() { mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); // when/then @@ -417,7 +420,8 @@ public void testIgnoreAboveIsSetReturnsFalseWhenIgnoreAboveIsGivenButItsTheSameA mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); // when/then @@ -449,7 +453,8 @@ public void testIgnoreAboveIsSetReturnsFalseWhenIgnoreAboveIsGivenButItsTheSameA mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); // when/then @@ -481,7 +486,8 @@ public void testIgnoreAboveIsSetReturnsTrueWhenIgnoreAboveIsGivenAsLogsdbDefault mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); // when/then @@ -513,7 +519,8 @@ public void testIgnoreAboveIsSetReturnsTrueWhenIgnoreAboveIsConfiguredAtIndexLev mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); // when/then diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java index 82f1d3a0b687c..62dfb2d90fc68 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java @@ -357,7 +357,8 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); TextFieldType ft = new TextFieldType( @@ -406,7 +407,8 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); TextFieldType ft = new TextFieldType( diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java index 4e562217a00e3..79b14aa2c65ea 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java @@ -1393,7 +1393,8 @@ private KeywordFieldMapper.KeywordFieldType storedKeywordField(String name) { Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER, new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(false), - true // TODO randomize - load from stored keyword fields if stored even in synthetic source + true, // TODO randomize - load from stored keyword fields if stored even in synthetic source + false ); } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java index 3a83d365f6f57..8c0e57e942d05 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java @@ -1580,7 +1580,8 @@ private KeywordFieldMapper.KeywordFieldType storedKeywordField(String name) { Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER, new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(false), - true // TODO randomize - load from stored keyword fields if stored even in synthetic source + true, // TODO randomize - load from stored keyword fields if stored even in synthetic source + false ); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index 29c98a073938e..3cec096140263 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -240,7 +240,8 @@ static MappedFieldType createUnmappedFieldType(String name, DefaultShardContext Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER, builder, - context.ctx.isSourceSynthetic() + context.ctx.isSourceSynthetic(), + false ); } } diff --git a/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/keyword_use_binary_doc_values.yml b/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/keyword_use_binary_doc_values.yml new file mode 100644 index 0000000000000..2f4d22ba233e5 --- /dev/null +++ b/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/keyword_use_binary_doc_values.yml @@ -0,0 +1,117 @@ +--- +setup: + - do: + indices.create: + index: my-index + body: + settings: + index: + mapping: + use_binary_doc_values: true + mode: logsdb + mappings: + properties: + "@timestamp": + type: date + host.name: + type: keyword + agent_id: + type: keyword + process_id: + type: integer + http_method: + type: keyword + is_https: + type: boolean + location: + type: geo_point + message: + type: text + + - do: + bulk: + index: my-index + refresh: true + body: + - { "index": { } } + - { "@timestamp": "2024-02-12T10:30:00Z", "host.name": "foo", "agent_id": "darth-vader", "process_id": 101, "http_method": "GET", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "No, I am your father." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:31:00Z", "host.name": "bar", "agent_id": "yoda", "process_id": 102, "http_method": "PUT", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "Do. Or do not. There is no try." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:32:00Z", "host.name": "foo", "agent_id": "obi-wan", "process_id": 103, "http_method": "GET", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "May the force be with you." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:33:00Z", "host.name": "baz", "agent_id": "darth-vader", "process_id": 102, "http_method": "POST", "is_https": true, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "I find your lack of faith disturbing." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:34:00Z", "host.name": "baz", "agent_id": "yoda", "process_id": 104, "http_method": "POST", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "Wars not make one great." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:35:00Z", "host.name": "foo", "agent_id": "obi-wan", "process_id": 105, "http_method": "GET", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "That's no moon. It's a space station." } + +--- +teardown: + - do: + indices.delete: + index: my-index + +--- +"Simple from": + - do: + esql.query: + body: + query: 'FROM my-index | SORT host.name, @timestamp | LIMIT 1' + + - match: {columns.0.name: "@timestamp"} + - match: {columns.0.type: "date"} + - match: {columns.1.name: "agent_id"} + - match: {columns.1.type: "keyword"} + - match: {columns.2.name: "host.name"} + - match: {columns.2.type: "keyword"} + - match: {columns.3.name: "http_method" } + - match: {columns.3.type: "keyword" } + - match: {columns.4.name: "is_https"} + - match: {columns.4.type: "boolean"} + - match: {columns.5.name: "location"} + - match: {columns.5.type: "geo_point"} + - match: {columns.6.name: "message"} + - match: {columns.6.type: "text"} + - match: {columns.7.name: "process_id"} + - match: {columns.7.type: "integer"} + + - match: {values.0.0: "2024-02-12T10:31:00.000Z"} + - match: {values.0.1: "yoda"} + - match: {values.0.2: "bar"} + - match: {values.0.3: "PUT"} + - match: {values.0.4: false} + - match: {values.0.5: "POINT (-74.00600004941225 40.712799984030426)"} + - match: {values.0.6: "Do. Or do not. There is no try."} + - match: {values.0.7: 102} + +--- +"Simple from keyword fields": + - do: + esql.query: + body: + query: 'FROM my-index | SORT host.name, @timestamp | KEEP agent_id, http_method | LIMIT 10' + profile: true + + - match: {columns.0.name: "agent_id"} + - match: {columns.0.type: "keyword"} + - match: {columns.1.name: "http_method"} + - match: {columns.1.type: "keyword"} + + - match: {values.0.0: "yoda"} + - match: {values.0.1: "PUT"} + - match: {values.1.0: "darth-vader"} + - match: {values.1.1: "POST"} + - match: {values.2.0: "yoda"} + - match: {values.2.1: "POST"} + - match: {values.3.0: "darth-vader"} + - match: {values.3.1: "GET"} + - match: {values.4.0: "obi-wan"} + - match: {values.4.1: "GET"} + - match: {values.5.0: "obi-wan"} + - match: {values.5.1: "GET"} + + - match: {profile.drivers.0.description: "data"} + - match: {profile.drivers.0.operators.1.operator: "ValuesSourceReaderOperator[fields = [@timestamp, agent_id, host.name, http_method]]"} + - match: {profile.drivers.0.operators.1.status.readers_built.agent_id:row_stride:BlockDocValuesReader\\.Bytes: 1} + - match: {profile.drivers.0.operators.1.status.readers_built.http_method:row_stride:BlockDocValuesReader\\.Bytes: 1}