Skip to content
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
fc524df
Add random tests for match_only_text with multi-field
parkertimmins Aug 1, 2025
0861dcc
Skip phrase query for match_only_text in nested
parkertimmins Aug 1, 2025
f4ce957
Simplify phrase query generation
parkertimmins Aug 1, 2025
63098b0
Tune down number of ignove_above and multi-fields
parkertimmins Aug 1, 2025
770ad5d
Some cleanup and tune params
parkertimmins Aug 3, 2025
f96a1a8
[CI] Auto commit changes from spotless
Aug 4, 2025
560d937
Use text matcher for match_only_text
parkertimmins Aug 4, 2025
934b764
Merge branch 'main' into parker/random-tests-match-only-text-multifield
parkertimmins Aug 4, 2025
5446ac4
Merge branch 'main' into parker/random-tests-match-only-text-multifield
parkertimmins Aug 8, 2025
789222b
Remove wildcard as a subfield option
parkertimmins Aug 8, 2025
be6da22
Merge branch 'main' into parker/random-tests-match-only-text-multifield
parkertimmins Aug 11, 2025
501f385
Dont add wildcard and match_only_text to block loader tests
parkertimmins Aug 12, 2025
787f4ae
Merge branch 'main' into parker/random-tests-match-only-text-multifield
parkertimmins Aug 12, 2025
4c2843c
Update multi-field name to subfield_keyword
parkertimmins Aug 12, 2025
1760919
Move multifield into separate data source handler
parkertimmins Aug 13, 2025
f136489
[CI] Auto commit changes from spotless
Aug 13, 2025
401c9e8
Small fix
parkertimmins Aug 13, 2025
7715c83
Change set to list to avoid non-determinism
parkertimmins Aug 14, 2025
5ce0810
improve naming
parkertimmins Aug 14, 2025
0037c8f
Improvements from review
parkertimmins Aug 14, 2025
cafc130
Merge branch 'main' into parker/random-tests-match-only-text-multifield
parkertimmins Aug 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public static Object expectedValue(Map<String, Object> fieldMapping, Object valu

var fields = (Map<String, Object>) fieldMapping.get("fields");
if (fields != null) {
var keywordMultiFieldMapping = (Map<String, Object>) fields.get("kwd");
var keywordMultiFieldMapping = (Map<String, Object>) fields.get("subfield_keyword");
Object normalizer = fields.get("normalizer");
boolean docValues = hasDocValues(keywordMultiFieldMapping, true);
boolean store = keywordMultiFieldMapping.getOrDefault("store", false).equals(true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@
import org.elasticsearch.datageneration.FieldType;
import org.elasticsearch.datageneration.MappingGenerator;
import org.elasticsearch.datageneration.Template;
import org.elasticsearch.datageneration.datasource.DataSourceHandler;
import org.elasticsearch.datageneration.datasource.DataSourceRequest;
import org.elasticsearch.datageneration.datasource.DataSourceResponse;
import org.elasticsearch.datageneration.datasource.MultifieldAddonHandler;
import org.elasticsearch.index.mapper.BlockLoaderTestCase;
import org.elasticsearch.index.mapper.BlockLoaderTestRunner;
import org.elasticsearch.index.mapper.MapperServiceTestCase;
Expand Down Expand Up @@ -49,51 +47,8 @@ public TextFieldWithParentBlockLoaderTests(BlockLoaderTestCase.Params params) {
// of text multi field in a keyword field.
public void testBlockLoaderOfParentField() throws IOException {
var template = new Template(Map.of("parent", new Template.Leaf("parent", FieldType.KEYWORD.toString())));
var specification = buildSpecification(List.of(new DataSourceHandler() {
@Override
public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceRequest.LeafMappingParametersGenerator request) {
// This is a bit tricky meta-logic.
// We want to customize mapping but to do this we need the mapping for the same field type
// so we use name to untangle this.
if (request.fieldName().equals("parent") == false) {
return null;
}

return new DataSourceResponse.LeafMappingParametersGenerator(() -> {
var dataSource = request.dataSource();

var keywordParentMapping = dataSource.get(
new DataSourceRequest.LeafMappingParametersGenerator(
dataSource,
"_field",
FieldType.KEYWORD.toString(),
request.eligibleCopyToFields(),
request.dynamicMapping()
)
).mappingGenerator().get();

var textMultiFieldMapping = dataSource.get(
new DataSourceRequest.LeafMappingParametersGenerator(
dataSource,
"_field",
FieldType.TEXT.toString(),
request.eligibleCopyToFields(),
request.dynamicMapping()
)
).mappingGenerator().get();

// we don't need this here
keywordParentMapping.remove("copy_to");

textMultiFieldMapping.put("type", "text");
textMultiFieldMapping.remove("fields");

keywordParentMapping.put("fields", Map.of("mf", textMultiFieldMapping));

return keywordParentMapping;
});
}
}));
var specification = buildSpecification(List.of(new MultifieldAddonHandler(Map.of(FieldType.KEYWORD, List.of(FieldType.TEXT)), 1f)));

var mapping = new MappingGenerator(specification).generate(template);
var fieldMapping = mapping.lookup().get("parent");

Expand All @@ -106,7 +61,7 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques
? createSytheticSourceMapperService(mappingXContent)
: createMapperService(mappingXContent);

runner.runTest(mapperService, document, expected, "parent.mf");
runner.runTest(mapperService, document, expected, "parent.subfield_text");
}

@SuppressWarnings("unchecked")
Expand All @@ -123,7 +78,7 @@ private Object expected(Map<String, Object> fieldMapping, Object value, BlockLoa
}

// we are using block loader of the text field itself
var textFieldMapping = (Map<String, Object>) ((Map<String, Object>) fieldMapping.get("fields")).get("mf");
var textFieldMapping = (Map<String, Object>) ((Map<String, Object>) fieldMapping.get("fields")).get("subfield_text");
return TextFieldBlockLoaderTests.expectedValue(textFieldMapping, value, params, testContext);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.elasticsearch.datageneration.fields.leaf.IpFieldDataGenerator;
import org.elasticsearch.datageneration.fields.leaf.KeywordFieldDataGenerator;
import org.elasticsearch.datageneration.fields.leaf.LongFieldDataGenerator;
import org.elasticsearch.datageneration.fields.leaf.MatchOnlyTextFieldDataGenerator;
import org.elasticsearch.datageneration.fields.leaf.ScaledFloatFieldDataGenerator;
import org.elasticsearch.datageneration.fields.leaf.ShortFieldDataGenerator;
import org.elasticsearch.datageneration.fields.leaf.TextFieldDataGenerator;
Expand Down Expand Up @@ -50,7 +51,8 @@ public enum FieldType {
TEXT("text"),
IP("ip"),
CONSTANT_KEYWORD("constant_keyword"),
WILDCARD("wildcard");
WILDCARD("wildcard"),
MATCH_ONLY_TEXT("match_only_text");

private final String name;

Expand Down Expand Up @@ -78,6 +80,7 @@ public FieldDataGenerator generator(String fieldName, DataSource dataSource) {
case IP -> new IpFieldDataGenerator(dataSource);
case CONSTANT_KEYWORD -> new ConstantKeywordFieldDataGenerator();
case WILDCARD -> new WildcardFieldDataGenerator(dataSource);
case MATCH_ONLY_TEXT -> new MatchOnlyTextFieldDataGenerator(dataSource);
};
}

Expand All @@ -101,6 +104,7 @@ public static FieldType tryParse(String name) {
case "ip" -> FieldType.IP;
case "constant_keyword" -> FieldType.CONSTANT_KEYWORD;
case "wildcard" -> FieldType.WILDCARD;
case "match_only_text" -> FieldType.MATCH_ONLY_TEXT;
default -> null;
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,11 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques
case BOOLEAN -> booleanMapping();
case DATE -> dateMapping();
case GEO_POINT -> geoPointMapping();
case TEXT -> textMapping(request);
case TEXT -> textMapping();
case IP -> ipMapping();
case CONSTANT_KEYWORD -> constantKeywordMapping();
case WILDCARD -> wildcardMapping();
case MATCH_ONLY_TEXT -> matchOnlyTextMapping();
});
}

Expand Down Expand Up @@ -96,8 +97,8 @@ private Supplier<Map<String, Object>> keywordMapping(DataSourceRequest.LeafMappi
}
}

if (ESTestCase.randomDouble() <= 0.2) {
mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 100));
if (ESTestCase.randomDouble() <= 0.3) {
mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 50));
}
if (ESTestCase.randomDouble() <= 0.2) {
mapping.put("null_value", ESTestCase.randomAlphaOfLengthBetween(0, 10));
Expand Down Expand Up @@ -196,21 +197,13 @@ private Supplier<Map<String, Object>> geoPointMapping() {
};
}

private Supplier<Map<String, Object>> textMapping(DataSourceRequest.LeafMappingParametersGenerator request) {
private Supplier<Map<String, Object>> textMapping() {
return () -> {
var mapping = new HashMap<String, Object>();

mapping.put("store", ESTestCase.randomBoolean());
mapping.put("index", ESTestCase.randomBoolean());

if (ESTestCase.randomDouble() <= 0.1) {
var keywordMultiFieldMapping = keywordMapping(request).get();
keywordMultiFieldMapping.put("type", "keyword");
keywordMultiFieldMapping.remove("copy_to");

mapping.put("fields", Map.of("kwd", keywordMultiFieldMapping));
}

return mapping;
};
}
Expand Down Expand Up @@ -247,8 +240,8 @@ private Supplier<Map<String, Object>> wildcardMapping() {
return () -> {
var mapping = new HashMap<String, Object>();

if (ESTestCase.randomDouble() <= 0.2) {
mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 100));
if (ESTestCase.randomDouble() <= 0.3) {
mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 50));
}
if (ESTestCase.randomDouble() <= 0.2) {
mapping.put("null_value", ESTestCase.randomAlphaOfLengthBetween(0, 10));
Expand All @@ -258,6 +251,10 @@ private Supplier<Map<String, Object>> wildcardMapping() {
};
}

private Supplier<Map<String, Object>> matchOnlyTextMapping() {
return HashMap::new;
}

public static HashMap<String, Object> commonMappingParameters() {
var map = new HashMap<String, Object>();
map.put("store", ESTestCase.randomBoolean());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.datageneration.datasource;

import org.elasticsearch.datageneration.FieldType;
import org.elasticsearch.test.ESTestCase;

import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

public class MultifieldAddonHandler implements DataSourceHandler {

private static final String PLACEHOLDER_NAME = "_an_improbable_placeholder_name";
private static final float DEFAULT_CHANCE_OF_CHILD_FIELD = 0.5f;
private final Map<FieldType, List<FieldType>> subfieldTypes;
private final float chanceOfChildField;

private static final List<FieldType> STRING_TYPES = List.of(
FieldType.TEXT,
FieldType.KEYWORD,
FieldType.MATCH_ONLY_TEXT,
FieldType.WILDCARD
);
public static MultifieldAddonHandler STRING_TYPE_HANDLER = new MultifieldAddonHandler(
STRING_TYPES.stream().collect(Collectors.toMap(t -> t, t -> STRING_TYPES.stream().filter(s -> s != t).toList()))
);

public MultifieldAddonHandler(Map<FieldType, List<FieldType>> subfieldTypes, float chanceOfChildField) {
this.subfieldTypes = subfieldTypes;
this.chanceOfChildField = chanceOfChildField;
}

public MultifieldAddonHandler(Map<FieldType, List<FieldType>> subfieldTypes) {
this(subfieldTypes, DEFAULT_CHANCE_OF_CHILD_FIELD);
}

@Override
public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceRequest.LeafMappingParametersGenerator request) {

// Need to delegate creation of the same type of field to other handlers. So skip request
// if it's for the placeholder name used when creating the child and parent fields.
if (request.fieldName().equals(PLACEHOLDER_NAME)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's easy to implement a reserved prefix, you need to update generateFieldName() in DefaultObjectGenerationHandler and that's it i believe. Should we do that?

return null;
}

FieldType parentType = FieldType.tryParse(request.fieldType());
List<FieldType> childTypes = subfieldTypes.get(parentType);
if (childTypes == null) {
return null;
}

return new DataSourceResponse.LeafMappingParametersGenerator(() -> {
assert parentType != null;
var parent = getMappingForType(parentType, request);
if (ESTestCase.randomFloat() > chanceOfChildField) {
return parent;
}

var childType = ESTestCase.randomFrom(childTypes);
var child = getChildMappingForType(childType, request);

child.put("type", childType.toString());
String childName = "subfield_" + childType;
parent.put("fields", Map.of(childName, child));
return parent;
});
}

private static Map<String, Object> getChildMappingForType(FieldType type, DataSourceRequest.LeafMappingParametersGenerator request) {
Map<String, Object> mapping = getMappingForType(type, request);
if (type == FieldType.KEYWORD) {
mapping.remove("copy_to");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I honestly don't remember why this is here but i don't see a reason why this should be done only for keywords.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh yeah, I guess it should probably be done for other types too 🤔

}
return mapping;
}

private static Map<String, Object> getMappingForType(FieldType type, DataSourceRequest.LeafMappingParametersGenerator request) {
return request.dataSource()
.get(
new DataSourceRequest.LeafMappingParametersGenerator(
request.dataSource(),
PLACEHOLDER_NAME,
type.toString(),
request.eligibleCopyToFields(),
request.dynamicMapping()
)
)
.mappingGenerator()
.get();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.datageneration.fields.leaf;

import org.elasticsearch.datageneration.FieldDataGenerator;
import org.elasticsearch.datageneration.datasource.DataSource;

import java.util.Map;

public class MatchOnlyTextFieldDataGenerator implements FieldDataGenerator {
private final FieldDataGenerator textGenerator;

public MatchOnlyTextFieldDataGenerator(DataSource dataSource) {
this.textGenerator = new TextFieldDataGenerator(dataSource);
}

@Override
public Object generateValue(Map<String, Object> fieldMapping) {
return textGenerator.generateValue(fieldMapping);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ static Map<String, FieldSpecificMatcher> matchers(
put("shape", new ExactMatcher("shape", actualMappings, actualSettings, expectedMappings, expectedSettings));
put("geo_point", new GeoPointMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings));
put("text", new TextMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings));
put("match_only_text", new MatchOnlyTextMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings));
put("ip", new IpMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings));
put("constant_keyword", new ConstantKeywordMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings));
put("wildcard", new WildcardMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings));
Expand Down Expand Up @@ -621,6 +622,10 @@ class TextMatcher implements FieldSpecificMatcher {
this.expectedSettings = expectedSettings;
}

public String type() {
return "text";
}

@Override
@SuppressWarnings("unchecked")
public MatchResult match(
Expand All @@ -643,7 +648,7 @@ public MatchResult match(
if (multiFields != null) {
var keywordMatcher = new KeywordMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings);

var keywordFieldMapping = (Map<String, Object>) multiFields.get("kwd");
var keywordFieldMapping = (Map<String, Object>) multiFields.get("subfield_keyword");
var keywordMatchResult = keywordMatcher.match(actual, expected, keywordFieldMapping, keywordFieldMapping);
if (keywordMatchResult.isMatch()) {
return MatchResult.match();
Expand All @@ -656,7 +661,7 @@ public MatchResult match(
actualSettings,
expectedMappings,
expectedSettings,
"Values of type [text] don't match, " + prettyPrintCollections(actual, expected)
"Values of type [" + type() + "] don't match, " + prettyPrintCollections(actual, expected)
)
);
}
Expand All @@ -670,6 +675,22 @@ private Set<Object> normalize(List<Object> values) {
}
}

class MatchOnlyTextMatcher extends TextMatcher {
MatchOnlyTextMatcher(
XContentBuilder actualMappings,
Settings.Builder actualSettings,
XContentBuilder expectedMappings,
Settings.Builder expectedSettings
) {
super(actualMappings, actualSettings, expectedMappings, expectedSettings);
}

@Override
public String type() {
return "match_only_text";
}
}

class IpMatcher extends GenericMappingAwareMatcher {
IpMatcher(
XContentBuilder actualMappings,
Expand Down
Loading