Skip to content

Commit 4ee05c4

Browse files
committed
Don't normalize fields of type text when the index mode is LOGSDB or TSDB.
In the context of logsdb and tsdb, not many fields are configured to be of type text, and given the trade offs that logsdb and tsdb provide that is geared towards storage reduction, it makes sense not to store a normalized version of text fields. Closes #129183
1 parent 1e5329c commit 4ee05c4

File tree

8 files changed

+181
-15
lines changed

8 files changed

+181
-15
lines changed

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/SearchAsYouTypeFieldMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ public static class Builder extends FieldMapper.Builder {
138138
final Parameter<SimilarityProvider> similarity = TextParams.similarity(m -> builder(m).similarity.get());
139139

140140
final Parameter<String> indexOptions = TextParams.textIndexOptions(m -> builder(m).indexOptions.get());
141-
final Parameter<Boolean> norms = TextParams.norms(true, m -> builder(m).norms.get());
141+
final Parameter<Boolean> norms = Parameter.normsParam(m -> builder(m).norms.get(), true);
142142
final Parameter<String> termVectors = TextParams.termVectors(m -> builder(m).termVectors.get());
143143

144144
private final Parameter<Map<String, String>> meta = Parameter.metaParam();

plugins/analysis-icu/src/main/java/org/elasticsearch/plugin/analysis/icu/ICUCollationKeywordFieldMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ public static class Builder extends FieldMapper.Builder {
226226
final Parameter<Boolean> stored = Parameter.storeParam(m -> toType(m).fieldType.stored(), false);
227227

228228
final Parameter<String> indexOptions = TextParams.keywordIndexOptions(m -> toType(m).indexOptions);
229-
final Parameter<Boolean> hasNorms = TextParams.norms(false, m -> toType(m).fieldType.omitNorms() == false);
229+
final Parameter<Boolean> hasNorms = Parameter.normsParam(m -> toType(m).fieldType.omitNorms() == false, false);
230230

231231
final Parameter<Map<String, String>> meta = Parameter.metaParam();
232232

plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ public static class Builder extends FieldMapper.Builder {
8282

8383
final Parameter<SimilarityProvider> similarity = TextParams.similarity(m -> builder(m).similarity.getValue());
8484
final Parameter<String> indexOptions = TextParams.textIndexOptions(m -> builder(m).indexOptions.getValue());
85-
final Parameter<Boolean> norms = TextParams.norms(true, m -> builder(m).norms.getValue());
85+
final Parameter<Boolean> norms = Parameter.normsParam(m -> builder(m).norms.getValue(), true);
8686
final Parameter<String> termVectors = TextParams.termVectors(m -> builder(m).termVectors.getValue());
8787

8888
private final Parameter<Map<String, String>> meta = Parameter.metaParam();

server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1325,6 +1325,18 @@ public static Parameter<Boolean> docValuesParam(Function<FieldMapper, Boolean> i
13251325
return Parameter.boolParam("doc_values", false, initializer, defaultValue);
13261326
}
13271327

1328+
public static Parameter<Boolean> normsParam(Function<FieldMapper, Boolean> initializer, boolean defaultValue) {
1329+
// norms can be updated from 'true' to 'false' but not vice-versa
1330+
return Parameter.boolParam("norms", true, initializer, defaultValue)
1331+
.setMergeValidator((prev, curr, c) -> prev == curr || (prev && curr == false));
1332+
}
1333+
1334+
public static Parameter<Boolean> normsParam(Function<FieldMapper, Boolean> initializer, Supplier<Boolean> defaultValueSupplier) {
1335+
// norms can be updated from 'true' to 'false' but not vice-versa
1336+
return Parameter.boolParam("norms", true, initializer, defaultValueSupplier)
1337+
.setMergeValidator((prev, curr, c) -> prev == curr || (prev && curr == false));
1338+
}
1339+
13281340
/**
13291341
* Defines a script parameter
13301342
* @param initializer retrieves the equivalent parameter from an existing FieldMapper for use in merges

server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ public static final class Builder extends FieldMapper.DimensionBuilder {
183183
private final IndexSortConfig indexSortConfig;
184184
private final IndexMode indexMode;
185185
private final Parameter<String> indexOptions = TextParams.keywordIndexOptions(m -> toType(m).indexOptions);
186-
private final Parameter<Boolean> hasNorms = TextParams.norms(false, m -> toType(m).fieldType.omitNorms() == false);
186+
private final Parameter<Boolean> hasNorms = Parameter.normsParam(m -> toType(m).fieldType.omitNorms() == false, false);
187187
private final Parameter<SimilarityProvider> similarity = TextParams.similarity(
188188
m -> toType(m).fieldType().getTextSearchInfo().similarity()
189189
);

server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
import org.elasticsearch.common.unit.Fuzziness;
5656
import org.elasticsearch.common.xcontent.support.XContentMapValues;
5757
import org.elasticsearch.core.Nullable;
58+
import org.elasticsearch.index.IndexMode;
5859
import org.elasticsearch.index.IndexVersion;
5960
import org.elasticsearch.index.IndexVersions;
6061
import org.elasticsearch.index.analysis.AnalyzerScope;
@@ -239,15 +240,17 @@ public static class Builder extends FieldMapper.Builder {
239240

240241
private final IndexVersion indexCreatedVersion;
241242
private final Parameter<Boolean> store;
243+
private final Parameter<Boolean> norms;
242244

243245
private final boolean isSyntheticSourceEnabled;
244246

247+
private final IndexMode indexMode;
248+
245249
private final Parameter<Boolean> index = Parameter.indexParam(m -> ((TextFieldMapper) m).index, true);
246250

247251
final Parameter<SimilarityProvider> similarity = TextParams.similarity(m -> ((TextFieldMapper) m).similarity);
248252

249253
final Parameter<String> indexOptions = TextParams.textIndexOptions(m -> ((TextFieldMapper) m).indexOptions);
250-
final Parameter<Boolean> norms = TextParams.norms(true, m -> ((TextFieldMapper) m).norms);
251254
final Parameter<String> termVectors = TextParams.termVectors(m -> ((TextFieldMapper) m).termVectors);
252255

253256
final Parameter<Boolean> fieldData = Parameter.boolParam("fielddata", true, m -> ((TextFieldMapper) m).fieldData, false);
@@ -290,26 +293,37 @@ public static class Builder extends FieldMapper.Builder {
290293
private final boolean withinMultiField;
291294

292295
public Builder(String name, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) {
293-
this(name, IndexVersion.current(), indexAnalyzers, isSyntheticSourceEnabled, false);
296+
this(name, IndexVersion.current(), null, indexAnalyzers, isSyntheticSourceEnabled, false);
294297
}
295298

296299
public Builder(
297300
String name,
298301
IndexVersion indexCreatedVersion,
302+
IndexMode indexMode,
299303
IndexAnalyzers indexAnalyzers,
300304
boolean isSyntheticSourceEnabled,
301305
boolean withinMultiField
302306
) {
303307
super(name);
304308

309+
this.indexCreatedVersion = indexCreatedVersion;
310+
this.indexMode = indexMode;
311+
this.isSyntheticSourceEnabled = isSyntheticSourceEnabled;
312+
this.withinMultiField = withinMultiField;
313+
314+
// don't normalize if the index is LOGSDB or TSDB based
315+
this.norms = Parameter.normsParam(
316+
m -> ((TextFieldMapper) m).norms,
317+
() -> indexMode != IndexMode.LOGSDB && indexMode != IndexMode.TIME_SERIES
318+
);
319+
305320
// If synthetic source is used we need to either store this field
306321
// to recreate the source or use keyword multi-fields for that.
307322
// So if there are no suitable multi-fields we will default to
308323
// storing the field without requiring users to explicitly set 'store'.
309324
//
310325
// If 'store' parameter was explicitly provided we'll reject the request.
311326
// Note that if current builder is a multi field, then we don't need to store, given that responsibility lies with parent field
312-
this.withinMultiField = withinMultiField;
313327
this.store = Parameter.storeParam(m -> ((TextFieldMapper) m).store, () -> {
314328
if (multiFieldsNotStoredByDefaultIndexVersionCheck(indexCreatedVersion)) {
315329
return isSyntheticSourceEnabled
@@ -319,14 +333,12 @@ public Builder(
319333
return isSyntheticSourceEnabled && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false;
320334
}
321335
});
322-
this.indexCreatedVersion = indexCreatedVersion;
323336
this.analyzers = new TextParams.Analyzers(
324337
indexAnalyzers,
325338
m -> ((TextFieldMapper) m).indexAnalyzer,
326339
m -> (((TextFieldMapper) m).positionIncrementGap),
327340
indexCreatedVersion
328341
);
329-
this.isSyntheticSourceEnabled = isSyntheticSourceEnabled;
330342
}
331343

332344
public static boolean multiFieldsNotStoredByDefaultIndexVersionCheck(IndexVersion indexCreatedVersion) {
@@ -508,6 +520,7 @@ public TextFieldMapper build(MapperBuilderContext context) {
508520
(n, c) -> new Builder(
509521
n,
510522
c.indexVersionCreated(),
523+
c.getIndexSettings().getMode(),
511524
c.getIndexAnalyzers(),
512525
SourceFieldMapper.isSynthetic(c.getIndexSettings()),
513526
c.isWithinMultiField()
@@ -1319,6 +1332,7 @@ public Query existsQuery(SearchExecutionContext context) {
13191332
}
13201333

13211334
private final IndexVersion indexCreatedVersion;
1335+
private final IndexMode indexMode;
13221336
private final boolean index;
13231337
private final boolean store;
13241338
private final String indexOptions;
@@ -1357,6 +1371,7 @@ private TextFieldMapper(
13571371
this.prefixFieldInfo = prefixFieldInfo;
13581372
this.phraseFieldInfo = phraseFieldInfo;
13591373
this.indexCreatedVersion = builder.indexCreatedVersion;
1374+
this.indexMode = builder.indexMode;
13601375
this.indexAnalyzer = builder.analyzers.getIndexAnalyzer();
13611376
this.indexAnalyzers = builder.analyzers.indexAnalyzers;
13621377
this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue();
@@ -1394,7 +1409,9 @@ public Map<String, NamedAnalyzer> indexAnalyzers() {
13941409

13951410
@Override
13961411
public FieldMapper.Builder getMergeBuilder() {
1397-
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, isSyntheticSourceEnabled, isWithinMultiField).init(this);
1412+
return new Builder(leafName(), indexCreatedVersion, indexMode, indexAnalyzers, isSyntheticSourceEnabled, isWithinMultiField).init(
1413+
this
1414+
);
13981415
}
13991416

14001417
@Override

server/src/main/java/org/elasticsearch/index/mapper/TextParams.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -123,11 +123,6 @@ private NamedAnalyzer wrapAnalyzer(NamedAnalyzer a) {
123123
}
124124
}
125125

126-
public static Parameter<Boolean> norms(boolean defaultValue, Function<FieldMapper, Boolean> initializer) {
127-
// norms can be updated from 'true' to 'false' but not vv
128-
return Parameter.boolParam("norms", true, initializer, defaultValue).setMergeValidator((o, n, c) -> o == n || (o && n == false));
129-
}
130-
131126
public static Parameter<SimilarityProvider> similarity(Function<FieldMapper, SimilarityProvider> init) {
132127
return new Parameter<>(
133128
"similarity",

server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
import org.elasticsearch.cluster.metadata.IndexMetadata;
4949
import org.elasticsearch.common.Strings;
5050
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
51+
import org.elasticsearch.common.settings.Settings;
5152
import org.elasticsearch.index.IndexMode;
5253
import org.elasticsearch.index.IndexSettings;
5354
import org.elasticsearch.index.IndexVersion;
@@ -79,6 +80,8 @@
7980
import org.junit.AssumptionViolatedException;
8081

8182
import java.io.IOException;
83+
import java.time.Instant;
84+
import java.time.temporal.ChronoUnit;
8285
import java.util.Arrays;
8386
import java.util.Collections;
8487
import java.util.HashMap;
@@ -1432,4 +1435,143 @@ public void testEmpty() throws Exception {
14321435
assertFalse(dv.advanceExact(3));
14331436
});
14341437
}
1438+
1439+
public void testNormalizeByDefault() throws IOException {
1440+
// given
1441+
Settings.Builder indexSettingsBuilder = getIndexSettingsBuilder();
1442+
indexSettingsBuilder.put(IndexSettings.MODE.getKey(), IndexMode.STANDARD.getName());
1443+
Settings indexSettings = indexSettingsBuilder.build();
1444+
1445+
XContentBuilder mapping = mapping(b -> {
1446+
b.startObject("potato");
1447+
b.field("type", "text");
1448+
b.endObject();
1449+
});
1450+
1451+
var source = source(b -> b.field("potato", "a potato flew around my room"));
1452+
1453+
// when
1454+
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
1455+
ParsedDocument doc = mapper.parse(source);
1456+
1457+
List<IndexableField> fields = doc.rootDoc().getFields("potato");
1458+
IndexableFieldType fieldType = fields.get(0).fieldType();
1459+
1460+
// then
1461+
assertThat(fieldType.omitNorms(), is(false));
1462+
}
1463+
1464+
public void testNormalizeWhenIndexModeIsNotGiven() throws IOException {
1465+
// given
1466+
Settings.Builder indexSettingsBuilder = getIndexSettingsBuilder();
1467+
Settings indexSettings = indexSettingsBuilder.build();
1468+
1469+
XContentBuilder mapping = mapping(b -> {
1470+
b.startObject("potato");
1471+
b.field("type", "text");
1472+
b.endObject();
1473+
});
1474+
1475+
var source = source(b -> b.field("potato", "a potato flew around my room"));
1476+
1477+
// when
1478+
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
1479+
ParsedDocument doc = mapper.parse(source);
1480+
1481+
List<IndexableField> fields = doc.rootDoc().getFields("potato");
1482+
IndexableFieldType fieldType = fields.get(0).fieldType();
1483+
1484+
// then
1485+
assertThat(fieldType.omitNorms(), is(false));
1486+
}
1487+
1488+
public void testNormalizeWhenIndexModeIsNull() throws IOException {
1489+
// given
1490+
Settings.Builder indexSettingsBuilder = getIndexSettingsBuilder();
1491+
indexSettingsBuilder.put(IndexSettings.MODE.getKey(), (String) null);
1492+
Settings indexSettings = indexSettingsBuilder.build();
1493+
1494+
XContentBuilder mapping = mapping(b -> {
1495+
b.startObject("potato");
1496+
b.field("type", "text");
1497+
b.endObject();
1498+
});
1499+
1500+
var source = source(b -> b.field("potato", "a potato flew around my room"));
1501+
1502+
// when
1503+
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
1504+
ParsedDocument doc = mapper.parse(source);
1505+
1506+
List<IndexableField> fields = doc.rootDoc().getFields("potato");
1507+
IndexableFieldType fieldType = fields.get(0).fieldType();
1508+
1509+
// then
1510+
assertThat(fieldType.omitNorms(), is(false));
1511+
}
1512+
1513+
public void testDontNormalizeWhenIndexModeIsLogsDB() throws IOException {
1514+
// given
1515+
Settings.Builder indexSettingsBuilder = getIndexSettingsBuilder();
1516+
indexSettingsBuilder.put(IndexSettings.MODE.getKey(), IndexMode.LOGSDB.getName());
1517+
Settings indexSettings = indexSettingsBuilder.build();
1518+
1519+
XContentBuilder mapping = mapping(b -> {
1520+
b.startObject("potato");
1521+
b.field("type", "text");
1522+
b.endObject();
1523+
});
1524+
1525+
var source = source(b -> {
1526+
b.field("@timestamp", Instant.now());
1527+
b.field("potato", "a potato flew around my room");
1528+
});
1529+
1530+
// when
1531+
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
1532+
ParsedDocument doc = mapper.parse(source);
1533+
1534+
List<IndexableField> fields = doc.rootDoc().getFields("potato");
1535+
IndexableFieldType fieldType = fields.get(0).fieldType();
1536+
1537+
// then
1538+
assertThat(fieldType.omitNorms(), is(true));
1539+
}
1540+
1541+
public void testDontNormalizeWhenIndexModeIsTSDB() throws IOException {
1542+
// given
1543+
Instant currentTime = Instant.now();
1544+
Settings.Builder indexSettingsBuilder = getIndexSettingsBuilder();
1545+
indexSettingsBuilder.put(IndexSettings.MODE.getKey(), IndexMode.TIME_SERIES.getName())
1546+
.put(IndexSettings.TIME_SERIES_START_TIME.getKey(), currentTime.minus(1, ChronoUnit.HOURS).toEpochMilli())
1547+
.put(IndexSettings.TIME_SERIES_END_TIME.getKey(), currentTime.plus(1, ChronoUnit.HOURS).toEpochMilli())
1548+
.put(IndexMetadata.INDEX_ROUTING_PATH.getKey(), "dimension");
1549+
Settings indexSettings = indexSettingsBuilder.build();
1550+
1551+
XContentBuilder mapping = mapping(b -> {
1552+
b.startObject("potato");
1553+
b.field("type", "text");
1554+
b.endObject();
1555+
1556+
b.startObject("@timestamp");
1557+
b.field("type", "date");
1558+
b.endObject();
1559+
});
1560+
1561+
var source = source(TimeSeriesRoutingHashFieldMapper.DUMMY_ENCODED_VALUE, b -> {
1562+
b.field("@timestamp", Instant.now());
1563+
b.field("potato", "a potato flew around my room");
1564+
}, null);
1565+
1566+
// when
1567+
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();
1568+
ParsedDocument doc = mapper.parse(source);
1569+
1570+
List<IndexableField> fields = doc.rootDoc().getFields("potato");
1571+
IndexableFieldType fieldType = fields.get(0).fieldType();
1572+
1573+
// then
1574+
assertThat(fieldType.omitNorms(), is(true));
1575+
}
1576+
14351577
}

0 commit comments

Comments
 (0)