Skip to content

Commit eb3fd62

Browse files
authored
Use FallbackSyntheticSourceBlockLoader for text fields (#126237) (#126430)
(cherry picked from commit 21ff72b) # Conflicts: # server/src/test/java/org/elasticsearch/index/mapper/blockloader/KeywordFieldBlockLoaderTests.java
1 parent e00de75 commit eb3fd62

File tree

9 files changed

+329
-8
lines changed

9 files changed

+329
-8
lines changed

docs/changelog/126237.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 126237
2+
summary: Use `FallbackSyntheticSourceBlockLoader` for text fields
3+
area: Mapping
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
7575
import org.elasticsearch.xcontent.ToXContent;
7676
import org.elasticsearch.xcontent.XContentBuilder;
77+
import org.elasticsearch.xcontent.XContentParser;
7778

7879
import java.io.IOException;
7980
import java.util.ArrayList;
@@ -1019,10 +1020,53 @@ protected String delegatingTo() {
10191020
if (isStored()) {
10201021
return new BlockStoredFieldsReader.BytesFromStringsBlockLoader(name());
10211022
}
1023+
1024+
// _ignored_source field will only be present if text field is not stored
1025+
// and there is no syntheticSourceDelegate
1026+
if (isSyntheticSource && syntheticSourceDelegate == null) {
1027+
return fallbackSyntheticSourceBlockLoader();
1028+
}
1029+
10221030
SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name()));
10231031
return new BlockSourceReader.BytesRefsBlockLoader(fetcher, blockReaderDisiLookup(blContext));
10241032
}
10251033

1034+
FallbackSyntheticSourceBlockLoader fallbackSyntheticSourceBlockLoader() {
1035+
var reader = new FallbackSyntheticSourceBlockLoader.SingleValueReader<BytesRef>(null) {
1036+
@Override
1037+
public void convertValue(Object value, List<BytesRef> accumulator) {
1038+
if (value != null) {
1039+
accumulator.add(new BytesRef(value.toString()));
1040+
}
1041+
}
1042+
1043+
@Override
1044+
protected void parseNonNullValue(XContentParser parser, List<BytesRef> accumulator) throws IOException {
1045+
var text = parser.textOrNull();
1046+
1047+
if (text != null) {
1048+
accumulator.add(new BytesRef(text));
1049+
}
1050+
}
1051+
1052+
@Override
1053+
public void writeToBlock(List<BytesRef> values, BlockLoader.Builder blockBuilder) {
1054+
var bytesRefBuilder = (BlockLoader.BytesRefBuilder) blockBuilder;
1055+
1056+
for (var value : values) {
1057+
bytesRefBuilder.appendBytesRef(value);
1058+
}
1059+
}
1060+
};
1061+
1062+
return new FallbackSyntheticSourceBlockLoader(reader, name()) {
1063+
@Override
1064+
public Builder builder(BlockFactory factory, int expectedCount) {
1065+
return factory.bytesRefs(expectedCount);
1066+
}
1067+
};
1068+
}
1069+
10261070
/**
10271071
* Build an iterator of documents that have the field. This mirrors parseCreateField,
10281072
* using whatever

server/src/test/java/org/elasticsearch/index/mapper/blockloader/KeywordFieldBlockLoaderTests.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,13 @@ public KeywordFieldBlockLoaderTests(Params params) {
2424
super(FieldType.KEYWORD.toString(), params);
2525
}
2626

27-
@SuppressWarnings("unchecked")
2827
@Override
2928
protected Object expected(Map<String, Object> fieldMapping, Object value, TestContext testContext) {
29+
return expectedValue(fieldMapping, value, params, testContext);
30+
}
31+
32+
@SuppressWarnings("unchecked")
33+
public static Object expectedValue(Map<String, Object> fieldMapping, Object value, Params params, TestContext testContext) {
3034
var nullValue = (String) fieldMapping.get("null_value");
3135

3236
var ignoreAbove = fieldMapping.get("ignore_above") == null
@@ -59,7 +63,7 @@ protected Object expected(Map<String, Object> fieldMapping, Object value, TestCo
5963
return maybeFoldList(resultList);
6064
}
6165

62-
private BytesRef convert(String value, String nullValue, int ignoreAbove) {
66+
private static BytesRef convert(String value, String nullValue, int ignoreAbove) {
6367
if (value == null) {
6468
if (nullValue != null) {
6569
value = nullValue;
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.mapper.blockloader;
11+
12+
import org.apache.lucene.util.BytesRef;
13+
import org.elasticsearch.index.mapper.BlockLoaderTestCase;
14+
import org.elasticsearch.logsdb.datageneration.FieldType;
15+
16+
import java.util.ArrayList;
17+
import java.util.HashSet;
18+
import java.util.List;
19+
import java.util.Map;
20+
import java.util.Objects;
21+
import java.util.stream.Collectors;
22+
23+
public class TextFieldBlockLoaderTests extends BlockLoaderTestCase {
24+
public TextFieldBlockLoaderTests(Params params) {
25+
super(FieldType.TEXT.toString(), params);
26+
}
27+
28+
@SuppressWarnings("unchecked")
29+
@Override
30+
protected Object expected(Map<String, Object> fieldMapping, Object value, TestContext testContext) {
31+
if (fieldMapping.getOrDefault("store", false).equals(true)) {
32+
return valuesInSourceOrder(value);
33+
}
34+
35+
var fields = (Map<String, Object>) fieldMapping.get("fields");
36+
if (fields != null) {
37+
var keywordMultiFieldMapping = (Map<String, Object>) fields.get("kwd");
38+
boolean docValues = hasDocValues(keywordMultiFieldMapping, true);
39+
boolean index = keywordMultiFieldMapping.getOrDefault("index", true).equals(true);
40+
boolean store = keywordMultiFieldMapping.getOrDefault("store", false).equals(true);
41+
Object ignoreAbove = keywordMultiFieldMapping.get("ignore_above");
42+
43+
// See TextFieldMapper.SyntheticSourceHelper#usingSyntheticSourceDelegate
44+
// and TextFieldMapper#canUseSyntheticSourceDelegateForLoading().
45+
boolean usingSyntheticSourceDelegate = docValues || store;
46+
boolean canUseSyntheticSourceDelegateForLoading = usingSyntheticSourceDelegate && ignoreAbove == null && (index || store);
47+
if (canUseSyntheticSourceDelegateForLoading) {
48+
return KeywordFieldBlockLoaderTests.expectedValue(keywordMultiFieldMapping, value, params, testContext);
49+
}
50+
51+
// Even if multi-field is not eligible for loading it can still be used to produce synthetic source
52+
// and then we load from the synthetic source.
53+
// Synthetic source is actually different from keyword field block loader results
54+
// because synthetic source includes values exceeding ignore_above and block loader doesn't.
55+
// TODO ideally this logic should be in some kind of KeywordFieldSyntheticSourceTest that uses same infra as
56+
// KeywordFieldBlockLoaderTest
57+
// It is here since KeywordFieldBlockLoaderTest does not really need it
58+
if (params.syntheticSource() && testContext.forceFallbackSyntheticSource() == false && usingSyntheticSourceDelegate) {
59+
var nullValue = (String) keywordMultiFieldMapping.get("null_value");
60+
61+
// Due to how TextFieldMapper#blockReaderDisiLookup works this is complicated.
62+
// If we are using lookupMatchingAll() then we'll see all docs, generate synthetic source using syntheticSourceDelegate,
63+
// parse it and see null_value inside.
64+
// But if we are using lookupFromNorms() we will skip the document (since the text field itself does not exist).
65+
// Same goes for lookupFromFieldNames().
66+
boolean textFieldIndexed = (boolean) fieldMapping.getOrDefault("index", true);
67+
68+
if (value == null) {
69+
if (textFieldIndexed == false
70+
&& nullValue != null
71+
&& (ignoreAbove == null || nullValue.length() <= (int) ignoreAbove)) {
72+
return new BytesRef(nullValue);
73+
}
74+
75+
return null;
76+
}
77+
78+
if (value instanceof String s) {
79+
return new BytesRef(s);
80+
}
81+
82+
var values = (List<String>) value;
83+
84+
// See note above about TextFieldMapper#blockReaderDisiLookup.
85+
if (textFieldIndexed && values.stream().allMatch(Objects::isNull)) {
86+
return null;
87+
}
88+
89+
var indexed = values.stream()
90+
.map(s -> s == null ? nullValue : s)
91+
.filter(Objects::nonNull)
92+
.filter(s -> ignoreAbove == null || s.length() <= (int) ignoreAbove)
93+
.map(BytesRef::new)
94+
.collect(Collectors.toList());
95+
96+
if (store == false) {
97+
// using doc_values for synthetic source
98+
indexed = new ArrayList<>(new HashSet<>(indexed));
99+
indexed.sort(BytesRef::compareTo);
100+
}
101+
102+
// ignored values always come last
103+
List<BytesRef> ignored = ignoreAbove == null
104+
? List.of()
105+
: values.stream()
106+
.map(s -> s == null ? nullValue : s)
107+
.filter(Objects::nonNull)
108+
.filter(s -> s.length() > (int) ignoreAbove)
109+
.map(BytesRef::new)
110+
.toList();
111+
112+
indexed.addAll(ignored);
113+
114+
return maybeFoldList(indexed);
115+
}
116+
}
117+
118+
// Loading from _ignored_source or stored _source
119+
return valuesInSourceOrder(value);
120+
}
121+
122+
@SuppressWarnings("unchecked")
123+
private Object valuesInSourceOrder(Object value) {
124+
if (value == null) {
125+
return null;
126+
}
127+
128+
if (value instanceof String s) {
129+
return new BytesRef(s);
130+
}
131+
132+
var resultList = ((List<String>) value).stream().filter(Objects::nonNull).map(BytesRef::new).toList();
133+
return maybeFoldList(resultList);
134+
}
135+
}

test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/FieldType.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.elasticsearch.logsdb.datageneration.fields.leaf.LongFieldDataGenerator;
2424
import org.elasticsearch.logsdb.datageneration.fields.leaf.ScaledFloatFieldDataGenerator;
2525
import org.elasticsearch.logsdb.datageneration.fields.leaf.ShortFieldDataGenerator;
26+
import org.elasticsearch.logsdb.datageneration.fields.leaf.TextFieldDataGenerator;
2627
import org.elasticsearch.logsdb.datageneration.fields.leaf.UnsignedLongFieldDataGenerator;
2728

2829
/**
@@ -42,7 +43,8 @@ public enum FieldType {
4243
COUNTED_KEYWORD("counted_keyword"),
4344
BOOLEAN("boolean"),
4445
DATE("date"),
45-
GEO_POINT("geo_point");
46+
GEO_POINT("geo_point"),
47+
TEXT("text");
4648

4749
private final String name;
4850

@@ -66,6 +68,7 @@ public FieldDataGenerator generator(String fieldName, DataSource dataSource) {
6668
case BOOLEAN -> new BooleanFieldDataGenerator(dataSource);
6769
case DATE -> new DateFieldDataGenerator(dataSource);
6870
case GEO_POINT -> new GeoPointFieldDataGenerator(dataSource);
71+
case TEXT -> new TextFieldDataGenerator(dataSource);
6972
};
7073
}
7174

@@ -85,6 +88,7 @@ public static FieldType tryParse(String name) {
8588
case "boolean" -> FieldType.BOOLEAN;
8689
case "date" -> FieldType.DATE;
8790
case "geo_point" -> FieldType.GEO_POINT;
91+
case "text" -> FieldType.TEXT;
8892
default -> null;
8993
};
9094
}

test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultMappingParametersHandler.java

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,7 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques
3535
return null;
3636
}
3737

38-
var map = new HashMap<String, Object>();
39-
map.put("store", ESTestCase.randomBoolean());
40-
map.put("index", ESTestCase.randomBoolean());
41-
map.put("doc_values", ESTestCase.randomBoolean());
38+
var map = commonMappingParameters();
4239
if (ESTestCase.randomBoolean()) {
4340
map.put(Mapper.SYNTHETIC_SOURCE_KEEP_PARAM, ESTestCase.randomFrom("none", "arrays", "all"));
4441
}
@@ -51,6 +48,7 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques
5148
case BOOLEAN -> booleanMapping(map);
5249
case DATE -> dateMapping(map);
5350
case GEO_POINT -> geoPointMapping(map);
51+
case TEXT -> textMapping(request, new HashMap<>());
5452
});
5553
}
5654

@@ -190,6 +188,35 @@ private Supplier<Map<String, Object>> geoPointMapping(Map<String, Object> inject
190188
};
191189
}
192190

191+
private Supplier<Map<String, Object>> textMapping(
192+
DataSourceRequest.LeafMappingParametersGenerator request,
193+
Map<String, Object> injected
194+
) {
195+
return () -> {
196+
injected.put("store", ESTestCase.randomBoolean());
197+
injected.put("index", ESTestCase.randomBoolean());
198+
199+
if (ESTestCase.randomDouble() <= 0.1) {
200+
var keywordMultiFieldMapping = keywordMapping(request, commonMappingParameters()).get();
201+
keywordMultiFieldMapping.put("type", "keyword");
202+
keywordMultiFieldMapping.remove("copy_to");
203+
204+
injected.put("fields", Map.of("kwd", keywordMultiFieldMapping));
205+
206+
}
207+
208+
return injected;
209+
};
210+
}
211+
212+
private static HashMap<String, Object> commonMappingParameters() {
213+
var map = new HashMap<String, Object>();
214+
map.put("store", ESTestCase.randomBoolean());
215+
map.put("index", ESTestCase.randomBoolean());
216+
map.put("doc_values", ESTestCase.randomBoolean());
217+
return map;
218+
}
219+
193220
@Override
194221
public DataSourceResponse.ObjectMappingParametersGenerator handle(DataSourceRequest.ObjectMappingParametersGenerator request) {
195222
if (request.isNested()) {
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.logsdb.datageneration.fields.leaf;
11+
12+
import org.elasticsearch.logsdb.datageneration.FieldDataGenerator;
13+
import org.elasticsearch.logsdb.datageneration.datasource.DataSource;
14+
import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
15+
16+
import java.util.Map;
17+
import java.util.function.Supplier;
18+
19+
public class TextFieldDataGenerator implements FieldDataGenerator {
20+
private final Supplier<Object> valueGenerator;
21+
22+
public TextFieldDataGenerator(DataSource dataSource) {
23+
var strings = dataSource.get(new DataSourceRequest.StringGenerator());
24+
var nulls = dataSource.get(new DataSourceRequest.NullWrapper());
25+
var arrays = dataSource.get(new DataSourceRequest.ArrayWrapper());
26+
27+
this.valueGenerator = arrays.wrapper().compose(nulls.wrapper()).apply(() -> strings.generator().get());
28+
}
29+
30+
@Override
31+
public Object generateValue(Map<String, Object> fieldMapping) {
32+
return valueGenerator.get();
33+
}
34+
}

0 commit comments

Comments
 (0)