Skip to content

Commit 1b6a080

Browse files
Sparse doc values index for LogsDB host.name field (#120741)
Here we introduce a new field type in `KeywordFieldMapper` that enables a sparse doc values index for `host.name` when: - Index mode is `LOGSDB` - The field is a keyword and part of the primary sort - Doc values are enabled and indexing is not explicitly disabled When these conditions are met: - `DocValuesSkipIndexType.RANGE` is applied - The inverted index is removed in favor of sparse doc values This reduces storage footprint and improves indexing throughput but may slow down some queries. The change is gated by a feature flag, with future plans to extend it to `@timestamp`.
1 parent 6b76457 commit 1b6a080

File tree

4 files changed

+297
-9
lines changed

4 files changed

+297
-9
lines changed

server/src/main/java/org/elasticsearch/index/IndexVersions.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ private static Version parseUnchecked(String version) {
143143
public static final IndexVersion INFERENCE_METADATA_FIELDS = def(9_005_00_0, Version.LUCENE_10_0_0);
144144
public static final IndexVersion LOGSB_OPTIONAL_SORTING_ON_HOST_NAME = def(9_006_00_0, Version.LUCENE_10_0_0);
145145
public static final IndexVersion SOURCE_MAPPER_MODE_ATTRIBUTE_NOOP = def(9_007_00_0, Version.LUCENE_10_0_0);
146+
public static final IndexVersion HOSTNAME_DOC_VALUES_SPARSE_INDEX = def(9_008_00_0, Version.LUCENE_10_0_0);
146147
/*
147148
* STOP! READ THIS FIRST! No, really,
148149
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _

server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.elasticsearch.common.settings.Setting;
2222
import org.elasticsearch.common.settings.Setting.Property;
2323
import org.elasticsearch.common.util.CollectionUtils;
24+
import org.elasticsearch.common.util.FeatureFlag;
2425
import org.elasticsearch.common.util.Maps;
2526
import org.elasticsearch.common.xcontent.support.XContentMapValues;
2627
import org.elasticsearch.index.IndexMode;
@@ -63,6 +64,7 @@
6364
public abstract class FieldMapper extends Mapper {
6465
private static final Logger logger = LogManager.getLogger(FieldMapper.class);
6566

67+
public static final FeatureFlag DOC_VALUES_SPARSE_INDEX = new FeatureFlag("doc_values_sparse_index");
6668
public static final Setting<Boolean> IGNORE_MALFORMED_SETTING = Setting.boolSetting("index.mapping.ignore_malformed", settings -> {
6769
if (IndexSettings.MODE.get(settings) == IndexMode.LOGSDB
6870
&& IndexMetadata.SETTING_INDEX_VERSION_CREATED.get(settings).onOrAfter(IndexVersions.ENABLE_IGNORE_MALFORMED_LOGSDB)) {
@@ -851,6 +853,10 @@ public boolean isConfigured() {
851853
return isSet && Objects.equals(value, getDefaultValue()) == false;
852854
}
853855

856+
public boolean isSet() {
857+
return isSet;
858+
}
859+
854860
/**
855861
* Allows the parameter to accept a {@code null} value
856862
*/

server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

Lines changed: 134 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import org.apache.lucene.document.InvertableType;
1919
import org.apache.lucene.document.SortedSetDocValuesField;
2020
import org.apache.lucene.document.StoredField;
21+
import org.apache.lucene.index.DocValuesSkipIndexType;
2122
import org.apache.lucene.index.DocValuesType;
2223
import org.apache.lucene.index.IndexOptions;
2324
import org.apache.lucene.index.IndexReader;
@@ -38,7 +39,10 @@
3839
import org.elasticsearch.common.lucene.search.AutomatonQueries;
3940
import org.elasticsearch.common.unit.Fuzziness;
4041
import org.elasticsearch.core.Nullable;
42+
import org.elasticsearch.index.IndexMode;
43+
import org.elasticsearch.index.IndexSortConfig;
4144
import org.elasticsearch.index.IndexVersion;
45+
import org.elasticsearch.index.IndexVersions;
4246
import org.elasticsearch.index.analysis.IndexAnalyzers;
4347
import org.elasticsearch.index.analysis.NamedAnalyzer;
4448
import org.elasticsearch.index.fielddata.FieldData;
@@ -87,9 +91,11 @@ public final class KeywordFieldMapper extends FieldMapper {
8791
private static final Logger logger = LogManager.getLogger(KeywordFieldMapper.class);
8892

8993
public static final String CONTENT_TYPE = "keyword";
94+
private static final String HOST_NAME = "host.name";
9095

9196
public static class Defaults {
9297
public static final FieldType FIELD_TYPE;
98+
public static final FieldType FIELD_TYPE_WITH_SKIP_DOC_VALUES;
9399

94100
static {
95101
FieldType ft = new FieldType();
@@ -100,6 +106,16 @@ public static class Defaults {
100106
FIELD_TYPE = freezeAndDeduplicateFieldType(ft);
101107
}
102108

109+
static {
110+
FieldType ft = new FieldType();
111+
ft.setTokenized(false);
112+
ft.setOmitNorms(true);
113+
ft.setIndexOptions(IndexOptions.NONE);
114+
ft.setDocValuesType(DocValuesType.SORTED_SET);
115+
ft.setDocValuesSkipIndexType(DocValuesSkipIndexType.RANGE);
116+
FIELD_TYPE_WITH_SKIP_DOC_VALUES = freezeAndDeduplicateFieldType(ft);
117+
}
118+
103119
public static final TextSearchInfo TEXT_SEARCH_INFO = new TextSearchInfo(
104120
FIELD_TYPE,
105121
null,
@@ -154,7 +170,8 @@ public static final class Builder extends FieldMapper.DimensionBuilder {
154170
);
155171
private final Parameter<Integer> ignoreAbove;
156172
private final int ignoreAboveDefault;
157-
173+
private final IndexSortConfig indexSortConfig;
174+
private final IndexMode indexMode;
158175
private final Parameter<String> indexOptions = TextParams.keywordIndexOptions(m -> toType(m).indexOptions);
159176
private final Parameter<Boolean> hasNorms = TextParams.norms(false, m -> toType(m).fieldType.omitNorms() == false);
160177
private final Parameter<SimilarityProvider> similarity = TextParams.similarity(
@@ -189,7 +206,9 @@ public Builder(final String name, final MappingParserContext mappingParserContex
189206
mappingParserContext.getIndexAnalyzers(),
190207
mappingParserContext.scriptCompiler(),
191208
IGNORE_ABOVE_SETTING.get(mappingParserContext.getSettings()),
192-
mappingParserContext.getIndexSettings().getIndexVersionCreated()
209+
mappingParserContext.getIndexSettings().getIndexVersionCreated(),
210+
mappingParserContext.getIndexSettings().getMode(),
211+
mappingParserContext.getIndexSettings().getIndexSortConfig()
193212
);
194213
}
195214

@@ -199,6 +218,18 @@ public Builder(final String name, final MappingParserContext mappingParserContex
199218
ScriptCompiler scriptCompiler,
200219
int ignoreAboveDefault,
201220
IndexVersion indexCreatedVersion
221+
) {
222+
this(name, indexAnalyzers, scriptCompiler, ignoreAboveDefault, indexCreatedVersion, IndexMode.STANDARD, null);
223+
}
224+
225+
private Builder(
226+
String name,
227+
IndexAnalyzers indexAnalyzers,
228+
ScriptCompiler scriptCompiler,
229+
int ignoreAboveDefault,
230+
IndexVersion indexCreatedVersion,
231+
IndexMode indexMode,
232+
IndexSortConfig indexSortConfig
202233
) {
203234
super(name);
204235
this.indexAnalyzers = indexAnalyzers;
@@ -233,6 +264,8 @@ public Builder(final String name, final MappingParserContext mappingParserContex
233264
throw new IllegalArgumentException("[ignore_above] must be positive, got [" + v + "]");
234265
}
235266
});
267+
this.indexSortConfig = indexSortConfig;
268+
this.indexMode = indexMode;
236269
}
237270

238271
public Builder(String name, IndexVersion indexCreatedVersion) {
@@ -359,15 +392,21 @@ private KeywordFieldType buildFieldType(MapperBuilderContext context, FieldType
359392

360393
@Override
361394
public KeywordFieldMapper build(MapperBuilderContext context) {
362-
FieldType fieldtype = new FieldType(Defaults.FIELD_TYPE);
395+
FieldType fieldtype = resolveFieldType(indexCreatedVersion, indexSortConfig, indexMode, context.buildFullName(leafName()));
363396
fieldtype.setOmitNorms(this.hasNorms.getValue() == false);
364-
fieldtype.setIndexOptions(TextParams.toIndexOptions(this.indexed.getValue(), this.indexOptions.getValue()));
365397
fieldtype.setStored(this.stored.getValue());
366398
fieldtype.setDocValuesType(this.hasDocValues.getValue() ? DocValuesType.SORTED_SET : DocValuesType.NONE);
399+
if (fieldtype.equals(Defaults.FIELD_TYPE_WITH_SKIP_DOC_VALUES) == false) {
400+
// NOTE: override index options only if we are not using a sparse doc values index (and we use an inverted index)
401+
fieldtype.setIndexOptions(TextParams.toIndexOptions(this.indexed.getValue(), this.indexOptions.getValue()));
402+
}
367403
if (fieldtype.equals(Defaults.FIELD_TYPE)) {
368404
// deduplicate in the common default case to save some memory
369405
fieldtype = Defaults.FIELD_TYPE;
370406
}
407+
if (fieldtype.equals(Defaults.FIELD_TYPE_WITH_SKIP_DOC_VALUES)) {
408+
fieldtype = Defaults.FIELD_TYPE_WITH_SKIP_DOC_VALUES;
409+
}
371410
super.hasScript = script.get() != null;
372411
super.onScriptError = onScriptError.getValue();
373412
return new KeywordFieldMapper(
@@ -379,6 +418,63 @@ public KeywordFieldMapper build(MapperBuilderContext context) {
379418
this
380419
);
381420
}
421+
422+
private FieldType resolveFieldType(
423+
final IndexVersion indexCreatedVersion,
424+
final IndexSortConfig indexSortConfig,
425+
final IndexMode indexMode,
426+
final String fullFieldName
427+
) {
428+
if (FieldMapper.DOC_VALUES_SPARSE_INDEX.isEnabled()
429+
&& indexCreatedVersion.onOrAfter(IndexVersions.HOSTNAME_DOC_VALUES_SPARSE_INDEX)
430+
&& shouldUseDocValuesSparseIndex(indexSortConfig, indexMode, fullFieldName)) {
431+
return new FieldType(Defaults.FIELD_TYPE_WITH_SKIP_DOC_VALUES);
432+
}
433+
return new FieldType(Defaults.FIELD_TYPE);
434+
}
435+
436+
/**
437+
* Determines whether to use a sparse index representation for doc values.
438+
*
439+
* <p>If the field is explicitly indexed by setting {@code index: true}, we do not use
440+
* a sparse doc values index but instead rely on the inverted index, as is typically
441+
* the case for keyword fields.</p>
442+
*
443+
* <p>This method checks several conditions to decide if the sparse index format
444+
* should be applied:</p>
445+
*
446+
* <ul>
447+
* <li>Returns {@code false} immediately if the field is explicitly indexed.</li>
448+
* <li>Ensures the field is not explicitly configured as indexed (i.e., {@code index} has its default value).</li>
449+
* <li>Requires doc values to be enabled.</li>
450+
* <li>Index mode must be {@link IndexMode#LOGSDB}.</li>
451+
* <li>Field name must be {@code host.name}.</li>
452+
* <li>The {@code host.name} field must be a primary sort field.</li>
453+
* </ul>
454+
*
455+
* <p>Returns {@code true} if all conditions are met, indicating that sparse doc values
456+
* should be used. Otherwise, returns {@code false}.</p>
457+
*
458+
* @param indexSortConfig The index sort configuration, used to check primary sorting.
459+
* @param indexMode The mode of the index, which must be {@link IndexMode#LOGSDB}.
460+
* @param fullFieldName The name of the field being checked, which must be {@code host.name}.
461+
* @return {@code true} if sparse doc values should be used, otherwise {@code false}.
462+
*/
463+
464+
private boolean shouldUseDocValuesSparseIndex(
465+
final IndexSortConfig indexSortConfig,
466+
final IndexMode indexMode,
467+
final String fullFieldName
468+
) {
469+
if (indexed.isSet() && indexed.getValue()) {
470+
return false;
471+
}
472+
return indexed.isConfigured() == false
473+
&& hasDocValues.getValue()
474+
&& IndexMode.LOGSDB.equals(indexMode)
475+
&& HOST_NAME.equals(fullFieldName)
476+
&& (indexSortConfig != null && indexSortConfig.hasPrimarySortOnField(HOST_NAME));
477+
}
382478
}
383479

384480
public static final TypeParser PARSER = createTypeParserWithLegacySupport(Builder::new);
@@ -392,6 +488,9 @@ public static final class KeywordFieldType extends StringFieldType {
392488
private final FieldValues<String> scriptValues;
393489
private final boolean isDimension;
394490
private final boolean isSyntheticSource;
491+
private final IndexMode indexMode;
492+
private final IndexSortConfig indexSortConfig;
493+
private final boolean hasDocValuesSparseIndex;
395494

396495
public KeywordFieldType(
397496
String name,
@@ -417,6 +516,9 @@ public KeywordFieldType(
417516
this.scriptValues = builder.scriptValues();
418517
this.isDimension = builder.dimension.getValue();
419518
this.isSyntheticSource = isSyntheticSource;
519+
this.indexMode = builder.indexMode;
520+
this.indexSortConfig = builder.indexSortConfig;
521+
this.hasDocValuesSparseIndex = DocValuesSkipIndexType.NONE.equals(fieldType.docValuesSkipIndexType()) == false;
420522
}
421523

422524
public KeywordFieldType(String name, boolean isIndexed, boolean hasDocValues, Map<String, String> meta) {
@@ -428,6 +530,9 @@ public KeywordFieldType(String name, boolean isIndexed, boolean hasDocValues, Ma
428530
this.scriptValues = null;
429531
this.isDimension = false;
430532
this.isSyntheticSource = false;
533+
this.indexMode = IndexMode.STANDARD;
534+
this.indexSortConfig = null;
535+
this.hasDocValuesSparseIndex = false;
431536
}
432537

433538
public KeywordFieldType(String name) {
@@ -450,6 +555,9 @@ public KeywordFieldType(String name, FieldType fieldType) {
450555
this.scriptValues = null;
451556
this.isDimension = false;
452557
this.isSyntheticSource = false;
558+
this.indexMode = IndexMode.STANDARD;
559+
this.indexSortConfig = null;
560+
this.hasDocValuesSparseIndex = DocValuesSkipIndexType.NONE.equals(fieldType.docValuesSkipIndexType()) == false;
453561
}
454562

455563
public KeywordFieldType(String name, NamedAnalyzer analyzer) {
@@ -461,6 +569,9 @@ public KeywordFieldType(String name, NamedAnalyzer analyzer) {
461569
this.scriptValues = null;
462570
this.isDimension = false;
463571
this.isSyntheticSource = false;
572+
this.indexMode = IndexMode.STANDARD;
573+
this.indexSortConfig = null;
574+
this.hasDocValuesSparseIndex = false;
464575
}
465576

466577
@Override
@@ -851,6 +962,18 @@ public boolean hasScriptValues() {
851962
public boolean hasNormalizer() {
852963
return normalizer != Lucene.KEYWORD_ANALYZER;
853964
}
965+
966+
public IndexMode getIndexMode() {
967+
return indexMode;
968+
}
969+
970+
public IndexSortConfig getIndexSortConfig() {
971+
return indexSortConfig;
972+
}
973+
974+
public boolean hasDocValuesSparseIndex() {
975+
return hasDocValuesSparseIndex;
976+
}
854977
}
855978

856979
private final boolean indexed;
@@ -866,7 +989,8 @@ public boolean hasNormalizer() {
866989

867990
private final IndexAnalyzers indexAnalyzers;
868991
private final int ignoreAboveDefault;
869-
private final int ignoreAbove;
992+
private final IndexMode indexMode;
993+
private final IndexSortConfig indexSortConfig;
870994

871995
private KeywordFieldMapper(
872996
String simpleName,
@@ -890,7 +1014,8 @@ private KeywordFieldMapper(
8901014
this.indexCreatedVersion = builder.indexCreatedVersion;
8911015
this.isSyntheticSource = isSyntheticSource;
8921016
this.ignoreAboveDefault = builder.ignoreAboveDefault;
893-
this.ignoreAbove = builder.ignoreAbove.getValue();
1017+
this.indexMode = builder.indexMode;
1018+
this.indexSortConfig = builder.indexSortConfig;
8941019
}
8951020

8961021
@Override
@@ -1008,9 +1133,9 @@ public Map<String, NamedAnalyzer> indexAnalyzers() {
10081133

10091134
@Override
10101135
public FieldMapper.Builder getMergeBuilder() {
1011-
return new Builder(leafName(), indexAnalyzers, scriptCompiler, ignoreAboveDefault, indexCreatedVersion).dimension(
1012-
fieldType().isDimension()
1013-
).init(this);
1136+
return new Builder(leafName(), indexAnalyzers, scriptCompiler, ignoreAboveDefault, indexCreatedVersion, indexMode, indexSortConfig)
1137+
.dimension(fieldType().isDimension())
1138+
.init(this);
10141139
}
10151140

10161141
@Override

0 commit comments

Comments
 (0)