Skip to content

Commit 328fed8

Browse files
Kubik42martijnvg
andauthored
Don't normalize fields of type text when the index mode is LogsDB or TSDB (#131317)
* Don't normalize fields of type text when the index mode is LOGSDB or TSDB. In the context of logsdb and tsdb, not many fields are configured to be of type text, and given the trade offs that logsdb and tsdb provide that is geared towards storage reduction, it makes sense not to store a normalized version of text fields. Closes #129183 * Update docs/changelog/131317.yaml * Update 131317.yaml * Update docs/changelog/131317.yaml * Update 131317.yaml * Update 131317.yaml * Update docs/changelog/131317.yaml * Update server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java Co-authored-by: Martijn van Groningen <[email protected]> * Update docs/changelog/131317.yaml Co-authored-by: Martijn van Groningen <[email protected]> * Update docs/changelog/131317.yaml Co-authored-by: Martijn van Groningen <[email protected]> --------- Co-authored-by: Martijn van Groningen <[email protected]>
1 parent 6d21904 commit 328fed8

File tree

9 files changed

+197
-15
lines changed

9 files changed

+197
-15
lines changed

docs/changelog/131317.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
pr: 131317
2+
summary: Don't enable norms for fields of type text when the index mode is LogsDB or TSDB
3+
area: Mapping
4+
type: breaking
5+
issues: []
6+
breaking:
7+
title: Don't enable norms for fields of type text when the index mode is LogsDB or TSDB
8+
area: Mapping
9+
details: "This changes the default behavior for norms on `text` fields in logsdb\
10+
\ and tsdb indices. Prior to this change, norms were enabled by default, with\
11+
\ the option to disable them via manual configurations. After this change, norms\
12+
\ will be disabled by default. Note, because we dont support enabling norms from\
13+
\ a disabled state, users will not be able to enable norms on `text` fields in\
14+
\ logsdb and tsdb indices."
15+
impact: Text fields will no longer be normalized by default in LogsDB and TSDB indicies.
16+
notable: false

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/SearchAsYouTypeFieldMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ public static class Builder extends FieldMapper.Builder {
138138
final Parameter<SimilarityProvider> similarity = TextParams.similarity(m -> builder(m).similarity.get());
139139

140140
final Parameter<String> indexOptions = TextParams.textIndexOptions(m -> builder(m).indexOptions.get());
141-
final Parameter<Boolean> norms = TextParams.norms(true, m -> builder(m).norms.get());
141+
final Parameter<Boolean> norms = Parameter.normsParam(m -> builder(m).norms.get(), true);
142142
final Parameter<String> termVectors = TextParams.termVectors(m -> builder(m).termVectors.get());
143143

144144
private final Parameter<Map<String, String>> meta = Parameter.metaParam();

plugins/analysis-icu/src/main/java/org/elasticsearch/plugin/analysis/icu/ICUCollationKeywordFieldMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ public static class Builder extends FieldMapper.Builder {
226226
final Parameter<Boolean> stored = Parameter.storeParam(m -> toType(m).fieldType.stored(), false);
227227

228228
final Parameter<String> indexOptions = TextParams.keywordIndexOptions(m -> toType(m).indexOptions);
229-
final Parameter<Boolean> hasNorms = TextParams.norms(false, m -> toType(m).fieldType.omitNorms() == false);
229+
final Parameter<Boolean> hasNorms = Parameter.normsParam(m -> toType(m).fieldType.omitNorms() == false, false);
230230

231231
final Parameter<Map<String, String>> meta = Parameter.metaParam();
232232

plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ public static class Builder extends FieldMapper.Builder {
8282

8383
final Parameter<SimilarityProvider> similarity = TextParams.similarity(m -> builder(m).similarity.getValue());
8484
final Parameter<String> indexOptions = TextParams.textIndexOptions(m -> builder(m).indexOptions.getValue());
85-
final Parameter<Boolean> norms = TextParams.norms(true, m -> builder(m).norms.getValue());
85+
final Parameter<Boolean> norms = Parameter.normsParam(m -> builder(m).norms.getValue(), true);
8686
final Parameter<String> termVectors = TextParams.termVectors(m -> builder(m).termVectors.getValue());
8787

8888
private final Parameter<Map<String, String>> meta = Parameter.metaParam();

server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1325,6 +1325,18 @@ public static Parameter<Boolean> docValuesParam(Function<FieldMapper, Boolean> i
13251325
return Parameter.boolParam("doc_values", false, initializer, defaultValue);
13261326
}
13271327

1328+
public static Parameter<Boolean> normsParam(Function<FieldMapper, Boolean> initializer, boolean defaultValue) {
1329+
// norms can be updated from 'true' to 'false' but not vice-versa
1330+
return Parameter.boolParam("norms", true, initializer, defaultValue)
1331+
.setMergeValidator((prev, curr, c) -> prev == curr || (prev && curr == false));
1332+
}
1333+
1334+
public static Parameter<Boolean> normsParam(Function<FieldMapper, Boolean> initializer, Supplier<Boolean> defaultValueSupplier) {
1335+
// norms can be updated from 'true' to 'false' but not vice-versa
1336+
return Parameter.boolParam("norms", true, initializer, defaultValueSupplier)
1337+
.setMergeValidator((prev, curr, c) -> prev == curr || (prev && curr == false));
1338+
}
1339+
13281340
/**
13291341
* Defines a script parameter
13301342
* @param initializer retrieves the equivalent parameter from an existing FieldMapper for use in merges

server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ public static final class Builder extends FieldMapper.DimensionBuilder {
183183
private final IndexSortConfig indexSortConfig;
184184
private final IndexMode indexMode;
185185
private final Parameter<String> indexOptions = TextParams.keywordIndexOptions(m -> toType(m).indexOptions);
186-
private final Parameter<Boolean> hasNorms = TextParams.norms(false, m -> toType(m).fieldType.omitNorms() == false);
186+
private final Parameter<Boolean> hasNorms = Parameter.normsParam(m -> toType(m).fieldType.omitNorms() == false, false);
187187
private final Parameter<SimilarityProvider> similarity = TextParams.similarity(
188188
m -> toType(m).fieldType().getTextSearchInfo().similarity()
189189
);

server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
import org.elasticsearch.common.unit.Fuzziness;
5656
import org.elasticsearch.common.xcontent.support.XContentMapValues;
5757
import org.elasticsearch.core.Nullable;
58+
import org.elasticsearch.index.IndexMode;
5859
import org.elasticsearch.index.IndexVersion;
5960
import org.elasticsearch.index.IndexVersions;
6061
import org.elasticsearch.index.analysis.AnalyzerScope;
@@ -239,15 +240,17 @@ public static class Builder extends FieldMapper.Builder {
239240

240241
private final IndexVersion indexCreatedVersion;
241242
private final Parameter<Boolean> store;
243+
private final Parameter<Boolean> norms;
242244

243245
private final boolean isSyntheticSourceEnabled;
244246

247+
private final IndexMode indexMode;
248+
245249
private final Parameter<Boolean> index = Parameter.indexParam(m -> ((TextFieldMapper) m).index, true);
246250

247251
final Parameter<SimilarityProvider> similarity = TextParams.similarity(m -> ((TextFieldMapper) m).similarity);
248252

249253
final Parameter<String> indexOptions = TextParams.textIndexOptions(m -> ((TextFieldMapper) m).indexOptions);
250-
final Parameter<Boolean> norms = TextParams.norms(true, m -> ((TextFieldMapper) m).norms);
251254
final Parameter<String> termVectors = TextParams.termVectors(m -> ((TextFieldMapper) m).termVectors);
252255

253256
final Parameter<Boolean> fieldData = Parameter.boolParam("fielddata", true, m -> ((TextFieldMapper) m).fieldData, false);
@@ -290,26 +293,37 @@ public static class Builder extends FieldMapper.Builder {
290293
private final boolean withinMultiField;
291294

292295
public Builder(String name, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) {
293-
this(name, IndexVersion.current(), indexAnalyzers, isSyntheticSourceEnabled, false);
296+
this(name, IndexVersion.current(), null, indexAnalyzers, isSyntheticSourceEnabled, false);
294297
}
295298

296299
public Builder(
297300
String name,
298301
IndexVersion indexCreatedVersion,
302+
IndexMode indexMode,
299303
IndexAnalyzers indexAnalyzers,
300304
boolean isSyntheticSourceEnabled,
301305
boolean withinMultiField
302306
) {
303307
super(name);
304308

309+
this.indexCreatedVersion = indexCreatedVersion;
310+
this.indexMode = indexMode;
311+
this.isSyntheticSourceEnabled = isSyntheticSourceEnabled;
312+
this.withinMultiField = withinMultiField;
313+
314+
// don't enable norms by default if the index is LOGSDB or TSDB based
315+
this.norms = Parameter.normsParam(
316+
m -> ((TextFieldMapper) m).norms,
317+
() -> indexMode != IndexMode.LOGSDB && indexMode != IndexMode.TIME_SERIES
318+
);
319+
305320
// If synthetic source is used we need to either store this field
306321
// to recreate the source or use keyword multi-fields for that.
307322
// So if there are no suitable multi-fields we will default to
308323
// storing the field without requiring users to explicitly set 'store'.
309324
//
310325
// If 'store' parameter was explicitly provided we'll reject the request.
311326
// Note that if current builder is a multi field, then we don't need to store, given that responsibility lies with parent field
312-
this.withinMultiField = withinMultiField;
313327
this.store = Parameter.storeParam(m -> ((TextFieldMapper) m).store, () -> {
314328
if (multiFieldsNotStoredByDefaultIndexVersionCheck(indexCreatedVersion)) {
315329
return isSyntheticSourceEnabled
@@ -319,14 +333,12 @@ public Builder(
319333
return isSyntheticSourceEnabled && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false;
320334
}
321335
});
322-
this.indexCreatedVersion = indexCreatedVersion;
323336
this.analyzers = new TextParams.Analyzers(
324337
indexAnalyzers,
325338
m -> ((TextFieldMapper) m).indexAnalyzer,
326339
m -> (((TextFieldMapper) m).positionIncrementGap),
327340
indexCreatedVersion
328341
);
329-
this.isSyntheticSourceEnabled = isSyntheticSourceEnabled;
330342
}
331343

332344
public static boolean multiFieldsNotStoredByDefaultIndexVersionCheck(IndexVersion indexCreatedVersion) {
@@ -508,6 +520,7 @@ public TextFieldMapper build(MapperBuilderContext context) {
508520
(n, c) -> new Builder(
509521
n,
510522
c.indexVersionCreated(),
523+
c.getIndexSettings().getMode(),
511524
c.getIndexAnalyzers(),
512525
SourceFieldMapper.isSynthetic(c.getIndexSettings()),
513526
c.isWithinMultiField()
@@ -1323,6 +1336,7 @@ public Query existsQuery(SearchExecutionContext context) {
13231336
}
13241337

13251338
private final IndexVersion indexCreatedVersion;
1339+
private final IndexMode indexMode;
13261340
private final boolean index;
13271341
private final boolean store;
13281342
private final String indexOptions;
@@ -1361,6 +1375,7 @@ private TextFieldMapper(
13611375
this.prefixFieldInfo = prefixFieldInfo;
13621376
this.phraseFieldInfo = phraseFieldInfo;
13631377
this.indexCreatedVersion = builder.indexCreatedVersion;
1378+
this.indexMode = builder.indexMode;
13641379
this.indexAnalyzer = builder.analyzers.getIndexAnalyzer();
13651380
this.indexAnalyzers = builder.analyzers.indexAnalyzers;
13661381
this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue();
@@ -1398,7 +1413,9 @@ public Map<String, NamedAnalyzer> indexAnalyzers() {
13981413

13991414
@Override
14001415
public FieldMapper.Builder getMergeBuilder() {
1401-
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, isSyntheticSourceEnabled, isWithinMultiField).init(this);
1416+
return new Builder(leafName(), indexCreatedVersion, indexMode, indexAnalyzers, isSyntheticSourceEnabled, isWithinMultiField).init(
1417+
this
1418+
);
14021419
}
14031420

14041421
@Override

server/src/main/java/org/elasticsearch/index/mapper/TextParams.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -123,11 +123,6 @@ private NamedAnalyzer wrapAnalyzer(NamedAnalyzer a) {
123123
}
124124
}
125125

126-
public static Parameter<Boolean> norms(boolean defaultValue, Function<FieldMapper, Boolean> initializer) {
127-
// norms can be updated from 'true' to 'false' but not vv
128-
return Parameter.boolParam("norms", true, initializer, defaultValue).setMergeValidator((o, n, c) -> o == n || (o && n == false));
129-
}
130-
131126
public static Parameter<SimilarityProvider> similarity(Function<FieldMapper, SimilarityProvider> init) {
132127
return new Parameter<>(
133128
"similarity",

server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
import org.elasticsearch.cluster.metadata.IndexMetadata;
4949
import org.elasticsearch.common.Strings;
5050
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
51+
import org.elasticsearch.common.settings.Settings;
5152
import org.elasticsearch.index.IndexMode;
5253
import org.elasticsearch.index.IndexSettings;
5354
import org.elasticsearch.index.IndexVersion;
@@ -79,6 +80,8 @@
7980
import org.junit.AssumptionViolatedException;
8081

8182
import java.io.IOException;
83+
import java.time.Instant;
84+
import java.time.temporal.ChronoUnit;
8285
import java.util.Arrays;
8386
import java.util.Collections;
8487
import java.util.HashMap;
@@ -1432,4 +1435,143 @@ public void testEmpty() throws Exception {
14321435
assertFalse(dv.advanceExact(3));
14331436
});
14341437
}
1438+
1439+
public void testNormalizeByDefault() throws IOException {
1440+
// given
1441+
Settings.Builder indexSettingsBuilder = getIndexSettingsBuilder();
1442+
indexSettingsBuilder.put(IndexSettings.MODE.getKey(), IndexMode.STANDARD.getName());
1443+
Settings indexSettings = indexSettingsBuilder.build();
1444+
1445+
XContentBuilder mapping = mapping(b -> {
1446+
b.startObject("potato");
1447+
b.field("type", "text");
1448+
b.endObject();
1449+
});
1450+
1451+
var source = source(b -> b.field("potato", "a potato flew around my room"));
1452+
1453+
// when
1454+
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
1455+
ParsedDocument doc = mapper.parse(source);
1456+
1457+
List<IndexableField> fields = doc.rootDoc().getFields("potato");
1458+
IndexableFieldType fieldType = fields.get(0).fieldType();
1459+
1460+
// then
1461+
assertThat(fieldType.omitNorms(), is(false));
1462+
}
1463+
1464+
public void testNormalizeWhenIndexModeIsNotGiven() throws IOException {
1465+
// given
1466+
Settings.Builder indexSettingsBuilder = getIndexSettingsBuilder();
1467+
Settings indexSettings = indexSettingsBuilder.build();
1468+
1469+
XContentBuilder mapping = mapping(b -> {
1470+
b.startObject("potato");
1471+
b.field("type", "text");
1472+
b.endObject();
1473+
});
1474+
1475+
var source = source(b -> b.field("potato", "a potato flew around my room"));
1476+
1477+
// when
1478+
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
1479+
ParsedDocument doc = mapper.parse(source);
1480+
1481+
List<IndexableField> fields = doc.rootDoc().getFields("potato");
1482+
IndexableFieldType fieldType = fields.get(0).fieldType();
1483+
1484+
// then
1485+
assertThat(fieldType.omitNorms(), is(false));
1486+
}
1487+
1488+
public void testNormalizeWhenIndexModeIsNull() throws IOException {
1489+
// given
1490+
Settings.Builder indexSettingsBuilder = getIndexSettingsBuilder();
1491+
indexSettingsBuilder.put(IndexSettings.MODE.getKey(), (String) null);
1492+
Settings indexSettings = indexSettingsBuilder.build();
1493+
1494+
XContentBuilder mapping = mapping(b -> {
1495+
b.startObject("potato");
1496+
b.field("type", "text");
1497+
b.endObject();
1498+
});
1499+
1500+
var source = source(b -> b.field("potato", "a potato flew around my room"));
1501+
1502+
// when
1503+
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
1504+
ParsedDocument doc = mapper.parse(source);
1505+
1506+
List<IndexableField> fields = doc.rootDoc().getFields("potato");
1507+
IndexableFieldType fieldType = fields.get(0).fieldType();
1508+
1509+
// then
1510+
assertThat(fieldType.omitNorms(), is(false));
1511+
}
1512+
1513+
public void testDontNormalizeWhenIndexModeIsLogsDB() throws IOException {
1514+
// given
1515+
Settings.Builder indexSettingsBuilder = getIndexSettingsBuilder();
1516+
indexSettingsBuilder.put(IndexSettings.MODE.getKey(), IndexMode.LOGSDB.getName());
1517+
Settings indexSettings = indexSettingsBuilder.build();
1518+
1519+
XContentBuilder mapping = mapping(b -> {
1520+
b.startObject("potato");
1521+
b.field("type", "text");
1522+
b.endObject();
1523+
});
1524+
1525+
var source = source(b -> {
1526+
b.field("@timestamp", Instant.now());
1527+
b.field("potato", "a potato flew around my room");
1528+
});
1529+
1530+
// when
1531+
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
1532+
ParsedDocument doc = mapper.parse(source);
1533+
1534+
List<IndexableField> fields = doc.rootDoc().getFields("potato");
1535+
IndexableFieldType fieldType = fields.get(0).fieldType();
1536+
1537+
// then
1538+
assertThat(fieldType.omitNorms(), is(true));
1539+
}
1540+
1541+
public void testDontNormalizeWhenIndexModeIsTSDB() throws IOException {
1542+
// given
1543+
Instant currentTime = Instant.now();
1544+
Settings.Builder indexSettingsBuilder = getIndexSettingsBuilder();
1545+
indexSettingsBuilder.put(IndexSettings.MODE.getKey(), IndexMode.TIME_SERIES.getName())
1546+
.put(IndexSettings.TIME_SERIES_START_TIME.getKey(), currentTime.minus(1, ChronoUnit.HOURS).toEpochMilli())
1547+
.put(IndexSettings.TIME_SERIES_END_TIME.getKey(), currentTime.plus(1, ChronoUnit.HOURS).toEpochMilli())
1548+
.put(IndexMetadata.INDEX_ROUTING_PATH.getKey(), "dimension");
1549+
Settings indexSettings = indexSettingsBuilder.build();
1550+
1551+
XContentBuilder mapping = mapping(b -> {
1552+
b.startObject("potato");
1553+
b.field("type", "text");
1554+
b.endObject();
1555+
1556+
b.startObject("@timestamp");
1557+
b.field("type", "date");
1558+
b.endObject();
1559+
});
1560+
1561+
var source = source(TimeSeriesRoutingHashFieldMapper.DUMMY_ENCODED_VALUE, b -> {
1562+
b.field("@timestamp", Instant.now());
1563+
b.field("potato", "a potato flew around my room");
1564+
}, null);
1565+
1566+
// when
1567+
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
1568+
ParsedDocument doc = mapper.parse(source);
1569+
1570+
List<IndexableField> fields = doc.rootDoc().getFields("potato");
1571+
IndexableFieldType fieldType = fields.get(0).fieldType();
1572+
1573+
// then
1574+
assertThat(fieldType.omitNorms(), is(true));
1575+
}
1576+
14351577
}

0 commit comments

Comments
 (0)