diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java index 9cc304828a8a7..cb929d4437722 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java @@ -222,7 +222,8 @@ private static BlockLoader blockLoader(String name) { Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER, new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(ft.docValuesType() != DocValuesType.NONE), - syntheticSource + syntheticSource, + false ).blockLoader(new MappedFieldType.BlockLoaderContext() { @Override public String indexName() { diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java index 8e70945fc2a76..0042810241be7 100644 --- a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java @@ -298,7 +298,8 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType( @@ -346,7 +347,8 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType( diff --git a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index d4fef4e9bb489..9de1f103d85f5 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -243,6 +243,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { if (IndexSettings.DOC_VALUES_SKIPPER) { settings.add(IndexSettings.USE_DOC_VALUES_SKIPPER); } + settings.add(IndexSettings.USE_BINARY_DOC_VALUES); settings.add(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING); BUILT_IN_INDEX_SETTINGS = Collections.unmodifiableSet(settings); }; diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index b396e1ca206e3..de73e65a4696f 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -675,6 +675,13 @@ public boolean isES87TSDBCodecEnabled() { Property.Final ); + public static final Setting USE_BINARY_DOC_VALUES = Setting.boolSetting( + "index.mapping.use_binary_doc_values", + false, + Property.IndexScope, + Property.Final + ); + /** * The {@link IndexMode "mode"} of the index. */ diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index 5d90f2814853d..e26f8d04965ae 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -208,6 +208,26 @@ public BytesRef binaryValue() throws IOException { bytesSlice.readBytes((long) doc * length, bytes.bytes, 0, length); return bytes; } + + @Override + public BlockLoader.Block tryRead( + BlockLoader.BlockFactory factory, + BlockLoader.Docs docs, + int offset, + boolean nullsFiltered, + BlockDocValuesReader.ToDouble toDouble, + boolean toInt + ) throws IOException { + int count = docs.count() - offset; + try (var builder = factory.bytesRefs(count)) { + for (int i = offset; i < docs.count(); i++) { + doc = docs.get(i); + bytesSlice.readBytes((long) doc * length, bytes.bytes, 0, length); + builder.appendBytesRef(bytes); + } + return builder.build(); + } + } }; } else { // variable length @@ -223,6 +243,28 @@ public BytesRef binaryValue() throws IOException { bytesSlice.readBytes(startOffset, bytes.bytes, 0, bytes.length); return bytes; } + + @Override + public BlockLoader.Block tryRead( + BlockLoader.BlockFactory factory, + BlockLoader.Docs docs, + int offset, + boolean nullsFiltered, + BlockDocValuesReader.ToDouble toDouble, + boolean toInt + ) throws IOException { + int count = docs.count() - offset; + try (var builder = factory.bytesRefs(count)) { + for (int i = offset; i < docs.count(); i++) { + doc = docs.get(i); + long startOffset = addresses.get(doc); + bytes.length = (int) (addresses.get(doc + 1L) - startOffset); + bytesSlice.readBytes(startOffset, bytes.bytes, 0, bytes.length); + builder.appendBytesRef(bytes); + } + return builder.build(); + } + } }; } } else { @@ -267,7 +309,7 @@ public BytesRef binaryValue() throws IOException { } } - private abstract static class DenseBinaryDocValues extends BinaryDocValues { + private abstract static class DenseBinaryDocValues extends BinaryDocValues implements BlockLoader.OptionalColumnAtATimeReader { final int maxDoc; int doc = -1; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java index 457c90383b5d2..de35a85e8878c 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java @@ -925,6 +925,28 @@ public AllReader reader(LeafReaderContext context) throws IOException { } } + public static class BytesRefsFromBinaryBlockLoader extends DocValuesBlockLoader { + private final String fieldName; + + public BytesRefsFromBinaryBlockLoader(String fieldName) { + this.fieldName = fieldName; + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.bytesRefs(expectedCount); + } + + @Override + public AllReader reader(LeafReaderContext context) throws IOException { + BinaryDocValues docValues = context.reader().getBinaryDocValues(fieldName); + if (docValues == null) { + return new ConstantNullsReader(); + } + return new BytesRefsFromBinary(docValues); + } + } + abstract static class AbstractBytesRefsFromBinary extends BlockDocValuesReader { protected final BinaryDocValues docValues; @@ -1010,6 +1032,17 @@ public BytesRefsFromBinary(BinaryDocValues docValues) { super(docValues); } + @Override + public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset, boolean nullsFiltered) throws IOException { + if (docValues instanceof BlockLoader.OptionalColumnAtATimeReader direct) { + BlockLoader.Block block = direct.tryRead(factory, docs, offset, nullsFiltered, null, false); + if (block != null) { + return block; + } + } + return super.read(factory, docs, offset, nullsFiltered); + } + @Override void read(int doc, BytesRefBuilder builder) throws IOException { if (false == docValues.advanceExact(doc)) { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index cf3fad86812f5..1ffa4a67bd248 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -13,15 +13,18 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.InvertableType; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DocValuesSkipIndexType; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiTerms; import org.apache.lucene.index.Term; @@ -52,10 +55,12 @@ import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData; import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData; +import org.elasticsearch.index.fielddata.plain.BinaryIndexFieldData; import org.elasticsearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData; import org.elasticsearch.index.query.AutomatonQueryWithDescription; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.similarity.SimilarityProvider; +import org.elasticsearch.script.BinaryDocValuesStringFieldScript; import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptCompiler; import org.elasticsearch.script.SortedSetDocValuesStringFieldScript; @@ -90,6 +95,7 @@ import static org.apache.lucene.index.IndexWriter.MAX_TERM_LENGTH; import static org.elasticsearch.core.Strings.format; import static org.elasticsearch.index.IndexSettings.IGNORE_ABOVE_SETTING; +import static org.elasticsearch.index.IndexSettings.USE_BINARY_DOC_VALUES; import static org.elasticsearch.index.IndexSettings.USE_DOC_VALUES_SKIPPER; import static org.elasticsearch.index.mapper.FieldArrayContext.getOffsetsFieldName; import static org.elasticsearch.index.mapper.Mapper.IgnoreAbove.getIgnoreAboveDefaultValue; @@ -108,6 +114,8 @@ public static class Defaults { public static final FieldType FIELD_TYPE; public static final FieldType FIELD_TYPE_WITH_SKIP_DOC_VALUES; + public static final FieldType FIELD_TYPE_WITH_BINARY_DOC_VALUES; + static { FieldType ft = new FieldType(); ft.setTokenized(false); @@ -127,6 +135,15 @@ public static class Defaults { FIELD_TYPE_WITH_SKIP_DOC_VALUES = freezeAndDeduplicateFieldType(ft); } + static { + FieldType ft = new FieldType(); + ft.setTokenized(false); + ft.setOmitNorms(true); + ft.setIndexOptions(IndexOptions.NONE); + ft.setDocValuesType(DocValuesType.BINARY); + FIELD_TYPE_WITH_BINARY_DOC_VALUES = freezeAndDeduplicateFieldType(ft); + } + public static final TextSearchInfo TEXT_SEARCH_INFO = new TextSearchInfo( FIELD_TYPE, null, @@ -214,6 +231,7 @@ public static final class Builder extends FieldMapper.DimensionBuilder { private final boolean forceDocValuesSkipper; private final SourceKeepMode indexSourceKeepMode; private final boolean isWithinMultiField; + private final boolean useBinaryDocValues; public Builder(final String name, final MappingParserContext mappingParserContext) { this( @@ -227,7 +245,8 @@ public Builder(final String name, final MappingParserContext mappingParserContex USE_DOC_VALUES_SKIPPER.get(mappingParserContext.getSettings()), false, mappingParserContext.getIndexSettings().sourceKeepMode(), - mappingParserContext.isWithinMultiField() + mappingParserContext.isWithinMultiField(), + USE_BINARY_DOC_VALUES.get(mappingParserContext.getSettings()) ); } @@ -250,7 +269,8 @@ public Builder(final String name, final MappingParserContext mappingParserContex false, false, sourceKeepMode, - isWithinMultiField + isWithinMultiField, + false ); } @@ -265,7 +285,8 @@ private Builder( boolean enableDocValuesSkipper, boolean forceDocValuesSkipper, SourceKeepMode indexSourceKeepMode, - boolean isWithinMultiField + boolean isWithinMultiField, + boolean binaryDocValuesEnabled ) { super(name); this.indexAnalyzers = indexAnalyzers; @@ -301,6 +322,7 @@ private Builder( this.forceDocValuesSkipper = forceDocValuesSkipper; this.indexSourceKeepMode = indexSourceKeepMode; this.isWithinMultiField = isWithinMultiField; + this.useBinaryDocValues = binaryDocValuesEnabled; } public Builder(String name, IndexVersion indexCreatedVersion) { @@ -330,7 +352,8 @@ public static Builder buildWithDocValuesSkipper( enableDocValuesSkipper, true, SourceKeepMode.NONE, - isWithinMultiField + isWithinMultiField, + false ); } @@ -413,7 +436,7 @@ protected Parameter[] getParameters() { dimension }; } - private KeywordFieldType buildFieldType(MapperBuilderContext context, FieldType fieldType) { + private KeywordFieldType buildFieldType(MapperBuilderContext context, FieldType fieldType, boolean useBinaryDocValues) { NamedAnalyzer normalizer = Lucene.KEYWORD_ANALYZER; NamedAnalyzer searchAnalyzer = Lucene.KEYWORD_ANALYZER; NamedAnalyzer quoteAnalyzer = Lucene.KEYWORD_ANALYZER; @@ -448,20 +471,25 @@ private KeywordFieldType buildFieldType(MapperBuilderContext context, FieldType searchAnalyzer, quoteAnalyzer, this, - context.isSourceSynthetic() + context.isSourceSynthetic(), + useBinaryDocValues ); } @Override public KeywordFieldMapper build(MapperBuilderContext context) { + String fullName = context.buildFullName(leafName()); + // Index sorting by binary doc values not support (yet): + boolean useBinaryDocValues = fullName.equals("host.name") == false && this.useBinaryDocValues; FieldType fieldtype = resolveFieldType( + useBinaryDocValues, enableDocValuesSkipper, forceDocValuesSkipper, hasDocValues, indexCreatedVersion, indexSortConfig, indexMode, - context.buildFullName(leafName()) + fullName ); fieldtype.setOmitNorms(this.hasNorms.getValue() == false); fieldtype.setStored(this.stored.getValue()); @@ -492,15 +520,17 @@ public KeywordFieldMapper build(MapperBuilderContext context) { return new KeywordFieldMapper( leafName(), fieldtype, - buildFieldType(context, fieldtype), + buildFieldType(context, fieldtype, useBinaryDocValues), builderParams(this, context), this, offsetsFieldName, - indexSourceKeepMode + indexSourceKeepMode, + useBinaryDocValues ); } private static FieldType resolveFieldType( + final boolean useBinaryDocValues, final boolean enableDocValuesSkipper, final boolean forceDocValuesSkipper, final Parameter hasDocValues, @@ -509,6 +539,10 @@ private static FieldType resolveFieldType( final IndexMode indexMode, final String fullFieldName ) { + if (useBinaryDocValues) { + return new FieldType(Defaults.FIELD_TYPE_WITH_BINARY_DOC_VALUES); + } + if (enableDocValuesSkipper) { if (forceDocValuesSkipper) { assert hasDocValues.getValue(); @@ -553,6 +587,7 @@ public static final class KeywordFieldType extends TextFamilyFieldType { private final boolean isDimension; private final IndexSortConfig indexSortConfig; private final boolean hasDocValuesSkipper; + private final boolean useBinaryDocValues; public KeywordFieldType( String name, @@ -561,7 +596,8 @@ public KeywordFieldType( NamedAnalyzer searchAnalyzer, NamedAnalyzer quoteAnalyzer, Builder builder, - boolean isSyntheticSource + boolean isSyntheticSource, + boolean useBinaryDocValues ) { super( name, @@ -581,6 +617,7 @@ public KeywordFieldType( this.isDimension = builder.dimension.getValue(); this.indexSortConfig = builder.indexSortConfig; this.hasDocValuesSkipper = DocValuesSkipIndexType.NONE.equals(fieldType.docValuesSkipIndexType()) == false; + this.useBinaryDocValues = useBinaryDocValues; } public KeywordFieldType(String name) { @@ -597,6 +634,7 @@ public KeywordFieldType(String name, boolean isIndexed, boolean hasDocValues, Ma this.isDimension = false; this.indexSortConfig = null; this.hasDocValuesSkipper = false; + this.useBinaryDocValues = false; } public KeywordFieldType(String name, FieldType fieldType) { @@ -618,6 +656,7 @@ public KeywordFieldType(String name, FieldType fieldType) { this.isDimension = false; this.indexSortConfig = null; this.hasDocValuesSkipper = DocValuesSkipIndexType.NONE.equals(fieldType.docValuesSkipIndexType()) == false; + this.useBinaryDocValues = false; } public KeywordFieldType(String name, NamedAnalyzer analyzer) { @@ -639,6 +678,7 @@ public KeywordFieldType(String name, NamedAnalyzer analyzer) { this.isDimension = false; this.indexSortConfig = null; this.hasDocValuesSkipper = false; + this.useBinaryDocValues = false; } @Override @@ -799,6 +839,10 @@ NamedAnalyzer normalizer() { @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { + if (useBinaryDocValues) { + return new BlockDocValuesReader.BytesRefsFromBinaryBlockLoader(name()); + } + if (hasDocValues() && (blContext.fieldExtractPreference() != FieldExtractPreference.STORED || isSyntheticSourceEnabled())) { return new BlockDocValuesReader.BytesRefsFromOrdsBlockLoader(name()); } @@ -914,7 +958,10 @@ protected BytesRef storedToBytesRef(Object stored) { ); } - private SortedSetOrdinalsIndexFieldData.Builder fieldDataFromDocValues() { + private IndexFieldData.Builder fieldDataFromDocValues() { + if (useBinaryDocValues) { + return new BinaryIndexFieldData.Builder(name(), CoreValuesSourceType.KEYWORD); + } return new SortedSetOrdinalsIndexFieldData.Builder( name(), CoreValuesSourceType.KEYWORD, @@ -999,7 +1046,9 @@ public Query wildcardQuery( } return new StringScriptFieldWildcardQuery( new Script(""), - ctx -> new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx), + ctx -> useBinaryDocValues + ? new BinaryDocValuesStringFieldScript(name(), context.lookup(), ctx) + : new SortedSetDocValuesStringFieldScript(name(), context.lookup(), ctx), name(), value, caseInsensitive @@ -1119,6 +1168,7 @@ public Query automatonQuery( private final boolean forceDocValuesSkipper; private final String offsetsFieldName; private final SourceKeepMode indexSourceKeepMode; + private final boolean useBinaryDocValues; private KeywordFieldMapper( String simpleName, @@ -1127,7 +1177,8 @@ private KeywordFieldMapper( BuilderParams builderParams, Builder builder, String offsetsFieldName, - SourceKeepMode indexSourceKeepMode + SourceKeepMode indexSourceKeepMode, + boolean useBinaryDocValues ) { super(simpleName, mappedFieldType, builderParams); assert fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) <= 0; @@ -1148,6 +1199,7 @@ private KeywordFieldMapper( this.forceDocValuesSkipper = builder.forceDocValuesSkipper; this.offsetsFieldName = offsetsFieldName; this.indexSourceKeepMode = indexSourceKeepMode; + this.useBinaryDocValues = useBinaryDocValues; } @Override @@ -1210,6 +1262,13 @@ private boolean indexValue(DocumentParserContext context, XContentString value) return false; } + if (useBinaryDocValues) { + var utfBytes = value.bytes(); + var binaryValue = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length()); + context.doc().add(new BinaryDocValuesField(fieldType().name(), binaryValue)); + return true; + } + // if the value's length exceeds ignore_above, then don't index it if (fieldType().ignoreAbove().isIgnored(value)) { context.addIgnoredField(fullPath()); @@ -1316,7 +1375,8 @@ public FieldMapper.Builder getMergeBuilder() { enableDocValuesSkipper, forceDocValuesSkipper, indexSourceKeepMode, - fieldType().isWithinMultiField() + fieldType().isWithinMultiField(), + useBinaryDocValues ).dimension(fieldType().isDimension()).init(this); } @@ -1365,22 +1425,26 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException { } }); } else if (hasDocValues) { - if (offsetsFieldName != null) { - layers.add(new SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(fullPath(), offsetsFieldName)); + if (useBinaryDocValues) { + layers.add(new BinarySyntheticFieldLoader()); } else { - layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) { - - @Override - protected BytesRef convert(BytesRef value) { - return value; - } - - @Override - protected BytesRef preserve(BytesRef value) { - // Preserve must make a deep copy because convert gets a shallow copy from the iterator - return BytesRef.deepCopyOf(value); - } - }); + if (offsetsFieldName != null) { + layers.add(new SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(fullPath(), offsetsFieldName)); + } else { + layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) { + + @Override + protected BytesRef convert(BytesRef value) { + return value; + } + + @Override + protected BytesRef preserve(BytesRef value) { + // Preserve must make a deep copy because convert gets a shallow copy from the iterator + return BytesRef.deepCopyOf(value); + } + }); + } } } @@ -1399,4 +1463,50 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException { return new CompositeSyntheticFieldLoader(leafFieldName, fullFieldName, layers); } + + final class BinarySyntheticFieldLoader implements CompositeSyntheticFieldLoader.DocValuesLayer { + private int docValueCount; + private BytesRef docValueBytes; + + @Override + public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException { + BinaryDocValues values = leafReader.getBinaryDocValues(fullPath()); + if (values == null) { + docValueCount = 0; + return null; + } + + return docId -> { + if (values.advanceExact(docId) == false) { + docValueCount = 0; + return hasValue(); + } + docValueBytes = BytesRef.deepCopyOf(values.binaryValue()); + docValueCount = 1; + return hasValue(); + }; + } + + @Override + public boolean hasValue() { + return docValueCount > 0; + } + + @Override + public long valueCount() { + return docValueCount; + } + + @Override + public void write(XContentBuilder b) throws IOException { + if (hasValue()) { + b.utf8Value(docValueBytes.bytes, docValueBytes.offset, docValueBytes.length); + } + } + + @Override + public String fieldName() { + return fullPath(); + } + } } diff --git a/server/src/main/java/org/elasticsearch/script/BinaryDocValuesStringFieldScript.java b/server/src/main/java/org/elasticsearch/script/BinaryDocValuesStringFieldScript.java new file mode 100644 index 0000000000000..be5aba49ec820 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/script/BinaryDocValuesStringFieldScript.java @@ -0,0 +1,56 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.script; + +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.index.mapper.OnScriptError; +import org.elasticsearch.search.lookup.SearchLookup; + +import java.io.IOException; +import java.util.Map; + +public class BinaryDocValuesStringFieldScript extends StringFieldScript { + private final BinaryDocValues binaryDocValues; + + boolean hasValue = false; + + public BinaryDocValuesStringFieldScript(String fieldName, SearchLookup searchLookup, LeafReaderContext ctx) { + super(fieldName, Map.of(), searchLookup, OnScriptError.FAIL, ctx); + try { + binaryDocValues = DocValues.getBinary(ctx.reader(), fieldName); + } catch (IOException e) { + throw new IllegalStateException("Cannot load doc values", e); + } + } + + @Override + public void setDocument(int docID) { + try { + hasValue = binaryDocValues.advanceExact(docID); + } catch (IOException e) { + throw new IllegalStateException("Cannot load doc values", e); + } + } + + @Override + public void execute() { + try { + if (hasValue) { + BytesRef bytesRef = binaryDocValues.binaryValue(); + emit(bytesRef.utf8ToString()); + } + } catch (IOException e) { + throw new IllegalStateException("Cannot load doc values", e); + } + } +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java index 815f19ec2cfb6..ed0038b4645a5 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java @@ -322,7 +322,8 @@ public void testIgnoreAboveIndexLevelSetting() { mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); // when/then @@ -354,7 +355,8 @@ public void testIgnoreAboveIsSetReturnsTrueWhenIgnoreAboveIsGiven() { mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); // when/then @@ -385,7 +387,8 @@ public void testIgnoreAboveIsSetReturnsFalseWhenIgnoreAboveIsNotGiven() { mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); // when/then @@ -417,7 +420,8 @@ public void testIgnoreAboveIsSetReturnsFalseWhenIgnoreAboveIsGivenButItsTheSameA mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); // when/then @@ -449,7 +453,8 @@ public void testIgnoreAboveIsSetReturnsFalseWhenIgnoreAboveIsGivenButItsTheSameA mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); // when/then @@ -481,7 +486,8 @@ public void testIgnoreAboveIsSetReturnsTrueWhenIgnoreAboveIsGivenAsLogsdbDefault mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); // when/then @@ -513,7 +519,8 @@ public void testIgnoreAboveIsSetReturnsTrueWhenIgnoreAboveIsConfiguredAtIndexLev mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); // when/then diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java index 82f1d3a0b687c..62dfb2d90fc68 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java @@ -357,7 +357,8 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); TextFieldType ft = new TextFieldType( @@ -406,7 +407,8 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet mock(NamedAnalyzer.class), mock(NamedAnalyzer.class), builder, - true + true, + false ); TextFieldType ft = new TextFieldType( diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java index 4e562217a00e3..79b14aa2c65ea 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValueSourceReaderTypeConversionTests.java @@ -1393,7 +1393,8 @@ private KeywordFieldMapper.KeywordFieldType storedKeywordField(String name) { Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER, new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(false), - true // TODO randomize - load from stored keyword fields if stored even in synthetic source + true, // TODO randomize - load from stored keyword fields if stored even in synthetic source + false ); } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java index 3a83d365f6f57..8c0e57e942d05 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java @@ -1580,7 +1580,8 @@ private KeywordFieldMapper.KeywordFieldType storedKeywordField(String name) { Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER, new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(false), - true // TODO randomize - load from stored keyword fields if stored even in synthetic source + true, // TODO randomize - load from stored keyword fields if stored even in synthetic source + false ); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index 29c98a073938e..3cec096140263 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -240,7 +240,8 @@ static MappedFieldType createUnmappedFieldType(String name, DefaultShardContext Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER, builder, - context.ctx.isSourceSynthetic() + context.ctx.isSourceSynthetic(), + false ); } } diff --git a/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/keyword_use_binary_doc_values.yml b/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/keyword_use_binary_doc_values.yml new file mode 100644 index 0000000000000..2f4d22ba233e5 --- /dev/null +++ b/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/keyword_use_binary_doc_values.yml @@ -0,0 +1,117 @@ +--- +setup: + - do: + indices.create: + index: my-index + body: + settings: + index: + mapping: + use_binary_doc_values: true + mode: logsdb + mappings: + properties: + "@timestamp": + type: date + host.name: + type: keyword + agent_id: + type: keyword + process_id: + type: integer + http_method: + type: keyword + is_https: + type: boolean + location: + type: geo_point + message: + type: text + + - do: + bulk: + index: my-index + refresh: true + body: + - { "index": { } } + - { "@timestamp": "2024-02-12T10:30:00Z", "host.name": "foo", "agent_id": "darth-vader", "process_id": 101, "http_method": "GET", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "No, I am your father." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:31:00Z", "host.name": "bar", "agent_id": "yoda", "process_id": 102, "http_method": "PUT", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "Do. Or do not. There is no try." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:32:00Z", "host.name": "foo", "agent_id": "obi-wan", "process_id": 103, "http_method": "GET", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "May the force be with you." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:33:00Z", "host.name": "baz", "agent_id": "darth-vader", "process_id": 102, "http_method": "POST", "is_https": true, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "I find your lack of faith disturbing." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:34:00Z", "host.name": "baz", "agent_id": "yoda", "process_id": 104, "http_method": "POST", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "Wars not make one great." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:35:00Z", "host.name": "foo", "agent_id": "obi-wan", "process_id": 105, "http_method": "GET", "is_https": false, "location": {"lat" : 40.7128, "lon" : -74.0060}, "message": "That's no moon. It's a space station." } + +--- +teardown: + - do: + indices.delete: + index: my-index + +--- +"Simple from": + - do: + esql.query: + body: + query: 'FROM my-index | SORT host.name, @timestamp | LIMIT 1' + + - match: {columns.0.name: "@timestamp"} + - match: {columns.0.type: "date"} + - match: {columns.1.name: "agent_id"} + - match: {columns.1.type: "keyword"} + - match: {columns.2.name: "host.name"} + - match: {columns.2.type: "keyword"} + - match: {columns.3.name: "http_method" } + - match: {columns.3.type: "keyword" } + - match: {columns.4.name: "is_https"} + - match: {columns.4.type: "boolean"} + - match: {columns.5.name: "location"} + - match: {columns.5.type: "geo_point"} + - match: {columns.6.name: "message"} + - match: {columns.6.type: "text"} + - match: {columns.7.name: "process_id"} + - match: {columns.7.type: "integer"} + + - match: {values.0.0: "2024-02-12T10:31:00.000Z"} + - match: {values.0.1: "yoda"} + - match: {values.0.2: "bar"} + - match: {values.0.3: "PUT"} + - match: {values.0.4: false} + - match: {values.0.5: "POINT (-74.00600004941225 40.712799984030426)"} + - match: {values.0.6: "Do. Or do not. There is no try."} + - match: {values.0.7: 102} + +--- +"Simple from keyword fields": + - do: + esql.query: + body: + query: 'FROM my-index | SORT host.name, @timestamp | KEEP agent_id, http_method | LIMIT 10' + profile: true + + - match: {columns.0.name: "agent_id"} + - match: {columns.0.type: "keyword"} + - match: {columns.1.name: "http_method"} + - match: {columns.1.type: "keyword"} + + - match: {values.0.0: "yoda"} + - match: {values.0.1: "PUT"} + - match: {values.1.0: "darth-vader"} + - match: {values.1.1: "POST"} + - match: {values.2.0: "yoda"} + - match: {values.2.1: "POST"} + - match: {values.3.0: "darth-vader"} + - match: {values.3.1: "GET"} + - match: {values.4.0: "obi-wan"} + - match: {values.4.1: "GET"} + - match: {values.5.0: "obi-wan"} + - match: {values.5.1: "GET"} + + - match: {profile.drivers.0.description: "data"} + - match: {profile.drivers.0.operators.1.operator: "ValuesSourceReaderOperator[fields = [@timestamp, agent_id, host.name, http_method]]"} + - match: {profile.drivers.0.operators.1.status.readers_built.agent_id:row_stride:BlockDocValuesReader\\.Bytes: 1} + - match: {profile.drivers.0.operators.1.status.readers_built.http_method:row_stride:BlockDocValuesReader\\.Bytes: 1}