Skip to content

Commit a5a3ef2

Browse files
authored
Properly handle multi fields in block loaders with synthetic source enabled (elastic#127483) (elastic#127581)
(cherry picked from commit 0c1b3ac) # Conflicts: # server/src/test/java/org/elasticsearch/index/mapper/blockloader/GeoPointFieldBlockLoaderTests.java
1 parent 4d091b7 commit a5a3ef2

File tree

23 files changed

+535
-302
lines changed

23 files changed

+535
-302
lines changed

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,8 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
378378
if (hasDocValues()) {
379379
return new BlockDocValuesReader.DoublesBlockLoader(name(), l -> l / scalingFactor);
380380
}
381-
if (isSyntheticSource) {
381+
// Multi fields don't have fallback synthetic source.
382+
if (isSyntheticSource && blContext.parentField(name()) == null) {
382383
return new FallbackSyntheticSourceBlockLoader(fallbackSyntheticSourceBlockLoaderReader(), name()) {
383384
@Override
384385
public Builder builder(BlockFactory factory, int expectedCount) {

server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,8 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
356356
return new BlockDocValuesReader.BooleansBlockLoader(name());
357357
}
358358

359-
if (isSyntheticSource) {
359+
// Multi fields don't have fallback synthetic source.
360+
if (isSyntheticSource && blContext.parentField(name()) == null) {
360361
return new FallbackSyntheticSourceBlockLoader(fallbackSyntheticSourceBlockLoaderReader(), name()) {
361362
@Override
362363
public Builder builder(BlockFactory factory, int expectedCount) {

server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -856,7 +856,8 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
856856
return new BlockDocValuesReader.LongsBlockLoader(name());
857857
}
858858

859-
if (isSyntheticSource) {
859+
// Multi fields don't have fallback synthetic source.
860+
if (isSyntheticSource && blContext.parentField(name()) == null) {
860861
return new FallbackSyntheticSourceBlockLoader(fallbackSyntheticSourceBlockLoaderReader(), name()) {
861862
@Override
862863
public Builder builder(BlockFactory factory, int expectedCount) {

server/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ protected void index(DocumentParserContext context, GeoPoint geometry) throws IO
319319

320320
/**
321321
* Parser that pretends to be the main document parser, but exposes the provided geohash regardless of how the geopoint was provided
322-
* in the incoming document. We rely on the fact that consumers are only ever call {@link XContentParser#textOrNull()} and never
322+
* in the incoming document. We rely on the fact that consumers only ever read text from the parser and never
323323
* advance tokens, which is explicitly disallowed by this parser.
324324
*/
325325
static class GeoHashMultiFieldParser extends FilterXContentParserWrapper {
@@ -335,6 +335,11 @@ public String textOrNull() throws IOException {
335335
return value;
336336
}
337337

338+
@Override
339+
public String text() throws IOException {
340+
return value;
341+
}
342+
338343
@Override
339344
public Token currentToken() {
340345
return Token.VALUE_STRING;
@@ -548,8 +553,9 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
548553
// So we have two subcases:
549554
// - doc_values are enabled - _ignored_source field does not exist since we have doc_values. We will use
550555
// blockLoaderFromSource which reads "native" synthetic source.
551-
// - doc_values are disabled - we know that _ignored_source field is present and use a special block loader.
552-
if (isSyntheticSource && hasDocValues() == false) {
556+
// - doc_values are disabled - we know that _ignored_source field is present and use a special block loader unless it's a multi
557+
// field.
558+
if (isSyntheticSource && hasDocValues() == false && blContext.parentField(name()) == null) {
553559
return blockLoaderFromFallbackSyntheticSource(blContext);
554560
}
555561

server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,8 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
469469
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(name());
470470
}
471471

472-
if (isSyntheticSource) {
472+
// Multi fields don't have fallback synthetic source.
473+
if (isSyntheticSource && blContext.parentField(name()) == null) {
473474
return blockLoaderFromFallbackSyntheticSource(blContext);
474475
}
475476

server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,8 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
656656
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(name());
657657
}
658658

659-
if (isSyntheticSource) {
659+
// Multi fields don't have fallback synthetic source.
660+
if (isSyntheticSource && blContext.parentField(name()) == null) {
660661
return new FallbackSyntheticSourceBlockLoader(fallbackSyntheticSourceBlockLoaderReader(), name()) {
661662
@Override
662663
public Builder builder(BlockFactory factory, int expectedCount) {

server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1975,7 +1975,8 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
19751975
return type.blockLoaderFromDocValues(name());
19761976
}
19771977

1978-
if (isSyntheticSource) {
1978+
// Multi fields don't have fallback synthetic source.
1979+
if (isSyntheticSource && blContext.parentField(name()) == null) {
19791980
return type.blockLoaderFromFallbackSyntheticSource(name(), nullValue, coerce);
19801981
}
19811982

server/src/test/java/org/elasticsearch/index/mapper/blockloader/GeoPointFieldBlockLoaderTests.java

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -40,28 +40,29 @@ protected Object expected(Map<String, Object> fieldMapping, Object value, TestCo
4040
if (rawNullValue == null) {
4141
nullValue = null;
4242
} else if (rawNullValue instanceof String s) {
43-
nullValue = convert(s, null);
43+
nullValue = convert(s, null, false);
4444
} else {
4545
throw new IllegalStateException("Unexpected null_value format");
4646
}
4747

4848
if (params.preference() == MappedFieldType.FieldExtractPreference.DOC_VALUES && hasDocValues(fieldMapping, true)) {
4949
if (values instanceof List<?> == false) {
50-
var point = convert(values, nullValue);
50+
var point = convert(values, nullValue, testContext.isMultifield());
5151
return point != null ? point.getEncoded() : null;
5252
}
5353

5454
var resultList = ((List<Object>) values).stream()
55-
.map(v -> convert(v, nullValue))
55+
.map(v -> convert(v, nullValue, testContext.isMultifield()))
5656
.filter(Objects::nonNull)
5757
.map(GeoPoint::getEncoded)
5858
.sorted()
5959
.toList();
6060
return maybeFoldList(resultList);
6161
}
6262

63+
// stored source is used
6364
if (params.syntheticSource() == false) {
64-
return exactValuesFromSource(values, nullValue);
65+
return exactValuesFromSource(values, nullValue, false);
6566
}
6667

6768
// Usually implementation of block loader from source adjusts values read from source
@@ -72,25 +73,25 @@ protected Object expected(Map<String, Object> fieldMapping, Object value, TestCo
7273
// That is unless "synthetic_source_keep" forces fallback synthetic source again.
7374

7475
if (testContext.forceFallbackSyntheticSource()) {
75-
return exactValuesFromSource(values, nullValue);
76+
return exactValuesFromSource(values, nullValue, false);
7677
}
7778

7879
String syntheticSourceKeep = (String) fieldMapping.getOrDefault("synthetic_source_keep", "none");
7980
if (syntheticSourceKeep.equals("all")) {
80-
return exactValuesFromSource(values, nullValue);
81+
return exactValuesFromSource(values, nullValue, false);
8182
}
8283
if (syntheticSourceKeep.equals("arrays") && extractedFieldValues.documentHasObjectArrays()) {
83-
return exactValuesFromSource(values, nullValue);
84+
return exactValuesFromSource(values, nullValue, false);
8485
}
8586

8687
// synthetic source and doc_values are present
8788
if (hasDocValues(fieldMapping, true)) {
8889
if (values instanceof List<?> == false) {
89-
return toWKB(normalize(convert(values, nullValue)));
90+
return toWKB(normalize(convert(values, nullValue, false)));
9091
}
9192

9293
var resultList = ((List<Object>) values).stream()
93-
.map(v -> convert(v, nullValue))
94+
.map(v -> convert(v, nullValue, false))
9495
.filter(Objects::nonNull)
9596
.sorted(Comparator.comparingLong(GeoPoint::getEncoded))
9697
.map(p -> toWKB(normalize(p)))
@@ -99,16 +100,20 @@ protected Object expected(Map<String, Object> fieldMapping, Object value, TestCo
99100
}
100101

101102
// synthetic source but no doc_values so using fallback synthetic source
102-
return exactValuesFromSource(values, nullValue);
103+
return exactValuesFromSource(values, nullValue, false);
103104
}
104105

105106
@SuppressWarnings("unchecked")
106-
private Object exactValuesFromSource(Object value, GeoPoint nullValue) {
107+
private Object exactValuesFromSource(Object value, GeoPoint nullValue, boolean needsMultifieldAdjustment) {
107108
if (value instanceof List<?> == false) {
108-
return toWKB(convert(value, nullValue));
109+
return toWKB(convert(value, nullValue, needsMultifieldAdjustment));
109110
}
110111

111-
var resultList = ((List<Object>) value).stream().map(v -> convert(v, nullValue)).filter(Objects::nonNull).map(this::toWKB).toList();
112+
var resultList = ((List<Object>) value).stream()
113+
.map(v -> convert(v, nullValue, needsMultifieldAdjustment))
114+
.filter(Objects::nonNull)
115+
.map(this::toWKB)
116+
.toList();
112117
return maybeFoldList(resultList);
113118
}
114119

@@ -168,14 +173,17 @@ private void processLeafLevel(Object value, ArrayList<Object> extracted) {
168173
}
169174

170175
@SuppressWarnings("unchecked")
171-
private GeoPoint convert(Object value, GeoPoint nullValue) {
176+
private GeoPoint convert(Object value, GeoPoint nullValue, boolean needsMultifieldAdjustment) {
172177
if (value == null) {
173-
return nullValue;
178+
if (nullValue == null) {
179+
return null;
180+
}
181+
return possiblyAdjustMultifieldValue(nullValue, needsMultifieldAdjustment);
174182
}
175183

176184
if (value instanceof String s) {
177185
try {
178-
return new GeoPoint(s);
186+
return possiblyAdjustMultifieldValue(new GeoPoint(s), needsMultifieldAdjustment);
179187
} catch (Exception e) {
180188
return null;
181189
}
@@ -185,16 +193,30 @@ private GeoPoint convert(Object value, GeoPoint nullValue) {
185193
if (m.get("type") != null) {
186194
var coordinates = (List<Double>) m.get("coordinates");
187195
// Order is GeoJSON is lon,lat
188-
return new GeoPoint(coordinates.get(1), coordinates.get(0));
196+
return possiblyAdjustMultifieldValue(new GeoPoint(coordinates.get(1), coordinates.get(0)), needsMultifieldAdjustment);
189197
} else {
190-
return new GeoPoint((Double) m.get("lat"), (Double) m.get("lon"));
198+
return possiblyAdjustMultifieldValue(new GeoPoint((Double) m.get("lat"), (Double) m.get("lon")), needsMultifieldAdjustment);
191199
}
192200
}
193201

194202
// Malformed values are excluded
195203
return null;
196204
}
197205

206+
private GeoPoint possiblyAdjustMultifieldValue(GeoPoint point, boolean isMultifield) {
207+
// geo_point multifields are parsed from a geohash representation of the original point (GeoPointFieldMapper#index)
208+
// and it's not exact.
209+
// So if this is a multifield we need another adjustment here.
210+
// Note that this does not apply to block loader from source because in this case we parse raw original values.
211+
// Same thing happens with synthetic source since it is generated from the parent field data that didn't go through multi field
212+
// parsing logic.
213+
if (isMultifield) {
214+
return point.resetFromString(point.geohash());
215+
}
216+
217+
return point;
218+
}
219+
198220
private GeoPoint normalize(GeoPoint point) {
199221
if (point == null) {
200222
return null;

server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldWithParentBlockLoaderTests.java

Lines changed: 79 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -9,51 +9,108 @@
99

1010
package org.elasticsearch.index.mapper.blockloader;
1111

12+
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
13+
14+
import org.elasticsearch.datageneration.DocumentGenerator;
1215
import org.elasticsearch.datageneration.FieldType;
16+
import org.elasticsearch.datageneration.MappingGenerator;
17+
import org.elasticsearch.datageneration.Template;
1318
import org.elasticsearch.datageneration.datasource.DataSourceHandler;
1419
import org.elasticsearch.datageneration.datasource.DataSourceRequest;
1520
import org.elasticsearch.datageneration.datasource.DataSourceResponse;
16-
import org.elasticsearch.datageneration.datasource.DefaultMappingParametersHandler;
1721
import org.elasticsearch.index.mapper.BlockLoaderTestCase;
22+
import org.elasticsearch.index.mapper.BlockLoaderTestRunner;
23+
import org.elasticsearch.index.mapper.MapperServiceTestCase;
24+
import org.elasticsearch.xcontent.XContentBuilder;
25+
import org.elasticsearch.xcontent.XContentType;
1826

19-
import java.util.HashMap;
27+
import java.io.IOException;
2028
import java.util.List;
2129
import java.util.Map;
2230

23-
public class TextFieldWithParentBlockLoaderTests extends BlockLoaderTestCase {
24-
public TextFieldWithParentBlockLoaderTests(Params params) {
25-
// keyword because we need a keyword parent field
26-
super(FieldType.KEYWORD.toString(), List.of(new DataSourceHandler() {
31+
import static org.elasticsearch.index.mapper.BlockLoaderTestCase.buildSpecification;
32+
import static org.elasticsearch.index.mapper.BlockLoaderTestCase.hasDocValues;
33+
34+
public class TextFieldWithParentBlockLoaderTests extends MapperServiceTestCase {
35+
private final BlockLoaderTestCase.Params params;
36+
private final BlockLoaderTestRunner runner;
37+
38+
@ParametersFactory(argumentFormatting = "preference=%s")
39+
public static List<Object[]> args() {
40+
return BlockLoaderTestCase.args();
41+
}
42+
43+
public TextFieldWithParentBlockLoaderTests(BlockLoaderTestCase.Params params) {
44+
this.params = params;
45+
this.runner = new BlockLoaderTestRunner(params);
46+
}
47+
48+
// This is similar to BlockLoaderTestCase#testBlockLoaderOfMultiField but has customizations required to properly test the case
49+
// of text multi field in a keyword field.
50+
public void testBlockLoaderOfParentField() throws IOException {
51+
var template = new Template(Map.of("parent", new Template.Leaf("parent", FieldType.KEYWORD.toString())));
52+
var specification = buildSpecification(List.of(new DataSourceHandler() {
2753
@Override
2854
public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceRequest.LeafMappingParametersGenerator request) {
29-
assert request.fieldType().equals(FieldType.KEYWORD.toString());
55+
// This is a bit tricky meta-logic.
56+
// We want to customize mapping but to do this we need the mapping for the same field type
57+
// so we use name to untangle this.
58+
if (request.fieldName().equals("parent") == false) {
59+
return null;
60+
}
3061

31-
// We need to force multi field generation
3262
return new DataSourceResponse.LeafMappingParametersGenerator(() -> {
33-
var defaultSupplier = DefaultMappingParametersHandler.keywordMapping(
34-
request,
35-
DefaultMappingParametersHandler.commonMappingParameters()
36-
);
37-
var mapping = defaultSupplier.get();
63+
var dataSource = request.dataSource();
64+
65+
var keywordParentMapping = dataSource.get(
66+
new DataSourceRequest.LeafMappingParametersGenerator(
67+
dataSource,
68+
"_field",
69+
FieldType.KEYWORD.toString(),
70+
request.eligibleCopyToFields(),
71+
request.dynamicMapping()
72+
)
73+
).mappingGenerator().get();
74+
75+
var textMultiFieldMapping = dataSource.get(
76+
new DataSourceRequest.LeafMappingParametersGenerator(
77+
dataSource,
78+
"_field",
79+
FieldType.TEXT.toString(),
80+
request.eligibleCopyToFields(),
81+
request.dynamicMapping()
82+
)
83+
).mappingGenerator().get();
84+
3885
// we don't need this here
39-
mapping.remove("copy_to");
86+
keywordParentMapping.remove("copy_to");
4087

41-
var textMultiFieldMappingSupplier = DefaultMappingParametersHandler.textMapping(request, new HashMap<>());
42-
var textMultiFieldMapping = textMultiFieldMappingSupplier.get();
4388
textMultiFieldMapping.put("type", "text");
4489
textMultiFieldMapping.remove("fields");
4590

46-
mapping.put("fields", Map.of("txt", textMultiFieldMapping));
91+
keywordParentMapping.put("fields", Map.of("mf", textMultiFieldMapping));
4792

48-
return mapping;
93+
return keywordParentMapping;
4994
});
5095
}
51-
}), params);
96+
}));
97+
var mapping = new MappingGenerator(specification).generate(template);
98+
var fieldMapping = mapping.lookup().get("parent");
99+
100+
var document = new DocumentGenerator(specification).generate(template, mapping);
101+
var fieldValue = document.get("parent");
102+
103+
Object expected = expected(fieldMapping, fieldValue, new BlockLoaderTestCase.TestContext(false, true));
104+
var mappingXContent = XContentBuilder.builder(XContentType.JSON.xContent()).map(mapping.raw());
105+
var mapperService = params.syntheticSource()
106+
? createSytheticSourceMapperService(mappingXContent)
107+
: createMapperService(mappingXContent);
108+
109+
runner.runTest(mapperService, document, expected, "parent.mf");
52110
}
53111

54-
@Override
55112
@SuppressWarnings("unchecked")
56-
protected Object expected(Map<String, Object> fieldMapping, Object value, TestContext testContext) {
113+
private Object expected(Map<String, Object> fieldMapping, Object value, BlockLoaderTestCase.TestContext testContext) {
57114
assert fieldMapping.containsKey("fields");
58115

59116
Object normalizer = fieldMapping.get("normalizer");
@@ -66,12 +123,7 @@ protected Object expected(Map<String, Object> fieldMapping, Object value, TestCo
66123
}
67124

68125
// we are using block loader of the text field itself
69-
var textFieldMapping = (Map<String, Object>) ((Map<String, Object>) fieldMapping.get("fields")).get("txt");
126+
var textFieldMapping = (Map<String, Object>) ((Map<String, Object>) fieldMapping.get("fields")).get("mf");
70127
return TextFieldBlockLoaderTests.expectedValue(textFieldMapping, value, params, testContext);
71128
}
72-
73-
@Override
74-
protected String blockLoaderFieldName(String originalName) {
75-
return originalName + ".txt";
76-
}
77129
}

test/framework/src/main/java/org/elasticsearch/datageneration/MappingGenerator.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ private void generateMapping(
104104
var mappingParametersGenerator = specification.dataSource()
105105
.get(
106106
new DataSourceRequest.LeafMappingParametersGenerator(
107+
specification.dataSource(),
107108
fieldName,
108109
leaf.type(),
109110
context.eligibleCopyToDestinations(),

0 commit comments

Comments
 (0)