Skip to content

Commit ef89b90

Browse files
authored
Use docvalue skippers on dimension fields (#137029)
Disable points and terms indexes on dimension fields, and use doc_values skippers instead. Dimension fields are part of the `_tsid` and so align with the index sorts, meaning that doc_values skippers will work well for querying and filtering, and have a much smaller disk footprint than full indexes.
1 parent 56f05de commit ef89b90

File tree

35 files changed

+404
-408
lines changed

35 files changed

+404
-408
lines changed

benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/ValuesSourceReaderBenchmark.java

Lines changed: 31 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
import org.elasticsearch.cluster.metadata.IndexMetadata;
2727
import org.elasticsearch.common.breaker.NoopCircuitBreaker;
2828
import org.elasticsearch.common.logging.LogConfigurator;
29-
import org.elasticsearch.common.lucene.Lucene;
3029
import org.elasticsearch.common.settings.Settings;
3130
import org.elasticsearch.common.unit.ByteSizeValue;
3231
import org.elasticsearch.common.util.BigArrays;
@@ -227,50 +226,44 @@ private static BlockLoader blockLoader(String name) {
227226
break;
228227
}
229228
ft.freeze();
230-
return new KeywordFieldMapper.KeywordFieldType(
231-
w.name,
232-
ft,
233-
Lucene.KEYWORD_ANALYZER,
234-
Lucene.KEYWORD_ANALYZER,
235-
Lucene.KEYWORD_ANALYZER,
236-
new KeywordFieldMapper.Builder(name, defaultIndexSettings()).docValues(ft.docValuesType() != DocValuesType.NONE),
237-
syntheticSource
238-
).blockLoader(new MappedFieldType.BlockLoaderContext() {
239-
@Override
240-
public String indexName() {
241-
return "benchmark";
242-
}
229+
return new KeywordFieldMapper.KeywordFieldType(w.name, ft, syntheticSource).blockLoader(
230+
new MappedFieldType.BlockLoaderContext() {
231+
@Override
232+
public String indexName() {
233+
return "benchmark";
234+
}
243235

244-
@Override
245-
public IndexSettings indexSettings() {
246-
throw new UnsupportedOperationException();
247-
}
236+
@Override
237+
public IndexSettings indexSettings() {
238+
throw new UnsupportedOperationException();
239+
}
248240

249-
@Override
250-
public MappedFieldType.FieldExtractPreference fieldExtractPreference() {
251-
return MappedFieldType.FieldExtractPreference.NONE;
252-
}
241+
@Override
242+
public MappedFieldType.FieldExtractPreference fieldExtractPreference() {
243+
return MappedFieldType.FieldExtractPreference.NONE;
244+
}
253245

254-
@Override
255-
public SearchLookup lookup() {
256-
throw new UnsupportedOperationException();
257-
}
246+
@Override
247+
public SearchLookup lookup() {
248+
throw new UnsupportedOperationException();
249+
}
258250

259-
@Override
260-
public Set<String> sourcePaths(String name) {
261-
return Set.of(name);
262-
}
251+
@Override
252+
public Set<String> sourcePaths(String name) {
253+
return Set.of(name);
254+
}
263255

264-
@Override
265-
public String parentField(String field) {
266-
throw new UnsupportedOperationException();
267-
}
256+
@Override
257+
public String parentField(String field) {
258+
throw new UnsupportedOperationException();
259+
}
268260

269-
@Override
270-
public FieldNamesFieldMapper.FieldNamesFieldType fieldNames() {
271-
return FieldNamesFieldMapper.FieldNamesFieldType.get(true);
261+
@Override
262+
public FieldNamesFieldMapper.FieldNamesFieldType fieldNames() {
263+
return FieldNamesFieldMapper.FieldNamesFieldType.get(true);
264+
}
272265
}
273-
});
266+
);
274267
}
275268
throw new IllegalArgumentException("can't read [" + name + "]");
276269
}

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -722,7 +722,13 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
722722
}
723723
long scaledValue = encode(doubleValue, scalingFactor);
724724

725-
NumberFieldMapper.NumberType.LONG.addFields(context.doc(), fieldType().name(), scaledValue, indexed, hasDocValues, stored);
725+
NumberFieldMapper.NumberType.LONG.addFields(
726+
context.doc(),
727+
fieldType().name(),
728+
scaledValue,
729+
IndexType.points(indexed, hasDocValues),
730+
stored
731+
);
726732

727733
if (shouldStoreOffsets) {
728734
context.getOffSetContext().recordOffset(offsetsFieldName, scaledValue);

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/TokenCountFieldMapper.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,13 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
164164
tokenCount = countPositions(analyzer, fullPath(), value, enablePositionIncrements);
165165
}
166166

167-
NumberFieldMapper.NumberType.INTEGER.addFields(context.doc(), fieldType().name(), tokenCount, index, hasDocValues, store);
167+
NumberFieldMapper.NumberType.INTEGER.addFields(
168+
context.doc(),
169+
fieldType().name(),
170+
tokenCount,
171+
IndexType.points(index, hasDocValues),
172+
store
173+
);
168174
}
169175

170176
/**

modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import org.elasticsearch.index.mapper.BlockLoader;
4343
import org.elasticsearch.index.mapper.FieldNamesFieldMapper;
4444
import org.elasticsearch.index.mapper.FieldTypeTestCase;
45+
import org.elasticsearch.index.mapper.IndexType;
4546
import org.elasticsearch.index.mapper.KeywordFieldMapper;
4647
import org.elasticsearch.index.mapper.MappedFieldType;
4748
import org.elasticsearch.index.mapper.MappingParserContext;
@@ -295,9 +296,8 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet
295296

296297
KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate = new KeywordFieldMapper.KeywordFieldType(
297298
"child",
298-
mock(FieldType.class),
299-
mock(NamedAnalyzer.class),
300-
mock(NamedAnalyzer.class),
299+
IndexType.terms(true, true),
300+
new TextSearchInfo(mock(FieldType.class), null, mock(NamedAnalyzer.class), mock(NamedAnalyzer.class)),
301301
mock(NamedAnalyzer.class),
302302
builder,
303303
true
@@ -344,9 +344,8 @@ public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet
344344

345345
KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate = new KeywordFieldMapper.KeywordFieldType(
346346
"child",
347-
mock(FieldType.class),
348-
mock(NamedAnalyzer.class),
349-
mock(NamedAnalyzer.class),
347+
IndexType.terms(true, true),
348+
new TextSearchInfo(mock(FieldType.class), null, mock(NamedAnalyzer.class), mock(NamedAnalyzer.class)),
350349
mock(NamedAnalyzer.class),
351350
builder,
352351
true

modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@
7979
import org.elasticsearch.index.IndexService;
8080
import org.elasticsearch.index.IndexVersion;
8181
import org.elasticsearch.index.mapper.DocumentParserContext;
82+
import org.elasticsearch.index.mapper.IndexType;
8283
import org.elasticsearch.index.mapper.LuceneDocument;
8384
import org.elasticsearch.index.mapper.MappedFieldType;
8485
import org.elasticsearch.index.mapper.MapperService;
@@ -301,7 +302,7 @@ public void testDuel() throws Exception {
301302
document.add(new TextField(entry.getKey(), value, Field.Store.NO));
302303
}
303304
for (Integer intValue : intValues) {
304-
NumberFieldMapper.NumberType.INTEGER.addFields(document, "int_field", intValue, true, true, false);
305+
NumberFieldMapper.NumberType.INTEGER.addFields(document, "int_field", intValue, IndexType.points(true, true), false);
305306
}
306307
MemoryIndex memoryIndex = MemoryIndex.fromDocument(document, new WhitespaceAnalyzer());
307308
duelRun(queryStore, memoryIndex, shardSearcher);
@@ -424,7 +425,13 @@ public void testDuel2() throws Exception {
424425
}
425426

426427
for (int[] range : ranges) {
427-
NumberFieldMapper.NumberType.INTEGER.addFields(document, "int_field", between(range[0], range[1]), true, true, false);
428+
NumberFieldMapper.NumberType.INTEGER.addFields(
429+
document,
430+
"int_field",
431+
between(range[0], range[1]),
432+
IndexType.points(true, true),
433+
false
434+
);
428435
logger.info("Test with document: {}" + document);
429436
MemoryIndex memoryIndex = MemoryIndex.fromDocument(document, new WhitespaceAnalyzer());
430437
duelRun(queryStore, memoryIndex, shardSearcher);

plugins/mapper-size/src/main/java/org/elasticsearch/index/mapper/size/SizeFieldMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ public void postParse(DocumentParserContext context) {
104104
return;
105105
}
106106
final int value = context.sourceToParse().source().length();
107-
NumberType.INTEGER.addFields(context.doc(), fullPath(), value, true, true, true);
107+
NumberType.INTEGER.addFields(context.doc(), fullPath(), value, IndexType.points(true, true), true);
108108
}
109109

110110
@Override

server/src/main/java/org/elasticsearch/index/IndexVersions.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ private static Version parseUnchecked(String version) {
192192
public static final IndexVersion REENABLED_TIMESTAMP_DOC_VALUES_SPARSE_INDEX = def(9_042_0_00, Version.LUCENE_10_3_1);
193193
public static final IndexVersion SKIPPERS_ENABLED_BY_DEFAULT = def(9_043_0_00, Version.LUCENE_10_3_1);
194194
public static final IndexVersion TIME_SERIES_USE_SYNTHETIC_ID = def(9_044_0_00, Version.LUCENE_10_3_1);
195+
public static final IndexVersion TIME_SERIES_DIMENSIONS_USE_SKIPPERS = def(9_045_0_00, Version.LUCENE_10_3_1);
195196

196197
/*
197198
* STOP! READ THIS FIRST! No, really,

server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ private static BooleanFieldMapper toType(FieldMapper in) {
7979
public static final class Builder extends FieldMapper.DimensionBuilder {
8080

8181
private final Parameter<Boolean> docValues = Parameter.docValuesParam(m -> toType(m).hasDocValues, true);
82-
private final Parameter<Boolean> indexed = Parameter.indexParam(m -> toType(m).indexed, true);
82+
private final Parameter<Boolean> indexed;
8383
private final Parameter<Boolean> stored = Parameter.storeParam(m -> toType(m).stored, false);
8484
private final Parameter<Explicit<Boolean>> ignoreMalformed;
8585
private final Parameter<Boolean> nullValue = new Parameter<>(
@@ -117,20 +117,9 @@ public Builder(String name, ScriptCompiler scriptCompiler, IndexSettings indexSe
117117
IGNORE_MALFORMED_SETTING.get(indexSettings.getSettings())
118118
);
119119
this.script.precludesParameters(ignoreMalformed, nullValue);
120+
this.dimension = TimeSeriesParams.dimensionParam(m -> toType(m).fieldType().isDimension(), docValues::get);
121+
this.indexed = Parameter.indexParam(m -> toType(m).indexed, indexSettings, dimension);
120122
addScriptValidation(script, indexed, docValues);
121-
this.dimension = TimeSeriesParams.dimensionParam(m -> toType(m).fieldType().isDimension()).addValidator(v -> {
122-
if (v && (indexed.getValue() == false || docValues.getValue() == false)) {
123-
throw new IllegalArgumentException(
124-
"Field ["
125-
+ TimeSeriesParams.TIME_SERIES_DIMENSION_PARAM
126-
+ "] requires that ["
127-
+ indexed.name
128-
+ "] and ["
129-
+ docValues.name
130-
+ "] are true"
131-
);
132-
}
133-
});
134123
}
135124

136125
public Builder dimension(boolean dimension) {
@@ -152,16 +141,30 @@ protected Parameter<?>[] getParameters() {
152141
dimension };
153142
}
154143

144+
private IndexType indexType() {
145+
if (indexed.get() && indexSettings.getIndexVersionCreated().isLegacyIndexVersion() == false) {
146+
return IndexType.terms(true, docValues.getValue());
147+
}
148+
if (docValues.get() == false) {
149+
return IndexType.NONE;
150+
}
151+
if (dimension.get()
152+
&& indexSettings.useDocValuesSkipper()
153+
&& indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.TIME_SERIES_DIMENSIONS_USE_SKIPPERS)) {
154+
return IndexType.skippers();
155+
}
156+
return IndexType.docValuesOnly();
157+
}
158+
155159
@Override
156160
public BooleanFieldMapper build(MapperBuilderContext context) {
157161
if (inheritDimensionParameterFromParentObject(context)) {
158162
dimension(true);
159163
}
160164
MappedFieldType ft = new BooleanFieldType(
161165
context.buildFullName(leafName()),
162-
indexed.getValue() && indexSettings.getIndexVersionCreated().isLegacyIndexVersion() == false,
166+
indexType(),
163167
stored.getValue(),
164-
docValues.getValue(),
165168
nullValue.getValue(),
166169
scriptValues(),
167170
meta.getValue(),
@@ -215,32 +218,27 @@ public static final class BooleanFieldType extends TermBasedFieldType {
215218

216219
public BooleanFieldType(
217220
String name,
218-
boolean isIndexed,
221+
IndexType indexType,
219222
boolean isStored,
220-
boolean hasDocValues,
221223
Boolean nullValue,
222224
FieldValues<Boolean> scriptValues,
223225
Map<String, String> meta,
224226
boolean isDimension,
225227
boolean isSyntheticSource
226228
) {
227-
super(name, IndexType.terms(isIndexed, hasDocValues), isStored, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
229+
super(name, indexType, isStored, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
228230
this.nullValue = nullValue;
229231
this.scriptValues = scriptValues;
230232
this.isDimension = isDimension;
231233
this.isSyntheticSource = isSyntheticSource;
232234
}
233235

234236
public BooleanFieldType(String name) {
235-
this(name, true);
236-
}
237-
238-
public BooleanFieldType(String name, boolean isIndexed) {
239-
this(name, isIndexed, true);
237+
this(name, IndexType.terms(true, true));
240238
}
241239

242-
public BooleanFieldType(String name, boolean isIndexed, boolean hasDocValues) {
243-
this(name, isIndexed, isIndexed, hasDocValues, false, null, Collections.emptyMap(), false, false);
240+
public BooleanFieldType(String name, IndexType indexType) {
241+
this(name, indexType, true, false, null, Collections.emptyMap(), false, false);
244242
}
245243

246244
@Override
@@ -601,7 +599,11 @@ private void indexValue(DocumentParserContext context, Boolean value) {
601599
context.doc().add(new StoredField(fieldType().name(), value ? "T" : "F"));
602600
}
603601
if (hasDocValues) {
604-
context.doc().add(new SortedNumericDocValuesField(fieldType().name(), value ? 1 : 0));
602+
if (fieldType().indexType.hasDocValuesSkipper()) {
603+
context.doc().add(SortedNumericDocValuesField.indexedField(fieldType().name(), value ? 1 : 0));
604+
} else {
605+
context.doc().add(new SortedNumericDocValuesField(fieldType().name(), value ? 1 : 0));
606+
}
605607
} else {
606608
context.addToFieldNames(fieldType().name());
607609
}

server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1314,6 +1314,21 @@ public static Parameter<Boolean> indexParam(Function<FieldMapper, Boolean> initi
13141314
return Parameter.boolParam("index", false, initializer, defaultValue);
13151315
}
13161316

1317+
public static Parameter<Boolean> indexParam(
1318+
Function<FieldMapper, Boolean> initializer,
1319+
IndexSettings indexSettings,
1320+
Supplier<Boolean> isDimension
1321+
) {
1322+
return Parameter.boolParam(
1323+
"index",
1324+
false,
1325+
initializer,
1326+
() -> isDimension.get() == false
1327+
|| indexSettings.useDocValuesSkipper() == false
1328+
|| indexSettings.getIndexVersionCreated().before(IndexVersions.TIME_SERIES_DIMENSIONS_USE_SKIPPERS)
1329+
);
1330+
}
1331+
13171332
public static Parameter<Boolean> storeParam(Function<FieldMapper, Boolean> initializer, boolean defaultValue) {
13181333
return Parameter.boolParam("store", false, initializer, defaultValue);
13191334
}

server/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ public Builder(String name, ScriptCompiler scriptCompiler, IndexSettings indexSe
143143
}
144144
});
145145
// We allow `time_series_dimension` parameter to be parsed, but only allow it to be `false`
146-
this.dimension = TimeSeriesParams.dimensionParam(m -> false).addValidator(v -> {
146+
this.dimension = TimeSeriesParams.dimensionParam(m -> false, () -> true).addValidator(v -> {
147147
if (v) {
148148
throw new IllegalArgumentException(
149149
"Parameter [" + TimeSeriesParams.TIME_SERIES_DIMENSION_PARAM + "] cannot be set to geo_point"

0 commit comments

Comments
 (0)