Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
fc524df
Add random tests for match_only_text with multi-field
parkertimmins Aug 1, 2025
0861dcc
Skip phrase query for match_only_text in nested
parkertimmins Aug 1, 2025
f4ce957
Simplify phrase query generation
parkertimmins Aug 1, 2025
63098b0
Tune down number of ignove_above and multi-fields
parkertimmins Aug 1, 2025
770ad5d
Some cleanup and tune params
parkertimmins Aug 3, 2025
f96a1a8
[CI] Auto commit changes from spotless
Aug 4, 2025
560d937
Use text matcher for match_only_text
parkertimmins Aug 4, 2025
934b764
Merge branch 'main' into parker/random-tests-match-only-text-multifield
parkertimmins Aug 4, 2025
5446ac4
Merge branch 'main' into parker/random-tests-match-only-text-multifield
parkertimmins Aug 8, 2025
789222b
Remove wildcard as a subfield option
parkertimmins Aug 8, 2025
be6da22
Merge branch 'main' into parker/random-tests-match-only-text-multifield
parkertimmins Aug 11, 2025
501f385
Dont add wildcard and match_only_text to block loader tests
parkertimmins Aug 12, 2025
787f4ae
Merge branch 'main' into parker/random-tests-match-only-text-multifield
parkertimmins Aug 12, 2025
4c2843c
Update multi-field name to subfield_keyword
parkertimmins Aug 12, 2025
1760919
Move multifield into separate data source handler
parkertimmins Aug 13, 2025
f136489
[CI] Auto commit changes from spotless
Aug 13, 2025
401c9e8
Small fix
parkertimmins Aug 13, 2025
7715c83
Change set to list to avoid non-determinism
parkertimmins Aug 14, 2025
5ce0810
improve naming
parkertimmins Aug 14, 2025
0037c8f
Improvements from review
parkertimmins Aug 14, 2025
cafc130
Merge branch 'main' into parker/random-tests-match-only-text-multifield
parkertimmins Aug 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques
"_field",
FieldType.KEYWORD.toString(),
request.eligibleCopyToFields(),
request.dynamicMapping()
request.dynamicMapping(),
request.includePluginTypesInMultiFields()
)
).mappingGenerator().get();

Expand All @@ -78,7 +79,8 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques
"_field",
FieldType.TEXT.toString(),
request.eligibleCopyToFields(),
request.dynamicMapping()
request.dynamicMapping(),
request.includePluginTypesInMultiFields()
)
).mappingGenerator().get();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,16 @@
* @param nestedFieldsLimit how many total nested fields can be present in a produced mapping
* @param fullyDynamicMapping if the mapping is fully dynamic, meaning none of the fields are mapped (essentially mapping is empty)
* @param predefinedFields predefined fields that must be present in mapping and documents. Only top level fields are supported.
* @param includePluginTypes whether types defined in plugins should be added to mapping
*/
public record DataGeneratorSpecification(
DataSource dataSource,
int maxFieldCountPerLevel,
int maxObjectDepth,
int nestedFieldsLimit,
boolean fullyDynamicMapping,
List<PredefinedField> predefinedFields
List<PredefinedField> predefinedFields,
boolean includePluginTypes
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lkts I added this because BlockLoaderTestCase tests were failing because I was adding wildcard and match_only_text multi-fields. Since these types are defined in plugins they cannot (I think) be added to BlockLoader tests in test. But I'm not sure this belongs up in the main specification. Since the data generation code provides lots of knobs for overriding behavior, I'm guessing there's a better way to do this.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the ping. We already use geo_shape and shape in DataGenerationHelper that are defined in plugins. Can you replicate that?

This is the configuration in DataGenerationHelper and you need to add a Gradle dependency on the plugin.

.withDataSourceHandlers(List.of(new GeoShapeDataSourceHandler(), new ShapeDataSourceHandler()))

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it makes sense to keep match_only_text in DefaultMappingParametersHandler. counted_keyword and wildcard types are from xpack already and don't pose a problem. I instead tried just moving the creation of multi-fields to it's own handler which allows the choice of multi-fields used to the code that adds the handler.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, if you can implement it in the "main" module it's fine. geo_shape is special because it relies on test helpers defined inside geo module.

) {

public static Builder builder() {
Expand All @@ -51,6 +53,7 @@ public static class Builder {
private int nestedFieldsLimit;
private boolean fullyDynamicMapping;
private List<PredefinedField> predefinedFields;
private boolean includePluginTypes;

public Builder() {
this.dataSourceHandlers = new ArrayList<>();
Expand All @@ -61,6 +64,7 @@ public Builder() {
this.nestedFieldsLimit = 50;
fullyDynamicMapping = false;
this.predefinedFields = new ArrayList<>();
this.includePluginTypes = true;
}

public Builder withDataSourceHandlers(Collection<DataSourceHandler> handlers) {
Expand Down Expand Up @@ -93,14 +97,20 @@ public Builder withPredefinedFields(List<PredefinedField> predefinedFields) {
return this;
}

public Builder withIncludePluginTypes(boolean includePluginTypes) {
this.includePluginTypes = includePluginTypes;
return this;
}

public DataGeneratorSpecification build() {
return new DataGeneratorSpecification(
new DataSource(dataSourceHandlers),
maxFieldCountPerLevel,
maxObjectDepth,
nestedFieldsLimit,
fullyDynamicMapping,
predefinedFields
predefinedFields,
includePluginTypes
);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.elasticsearch.datageneration.fields.leaf.IpFieldDataGenerator;
import org.elasticsearch.datageneration.fields.leaf.KeywordFieldDataGenerator;
import org.elasticsearch.datageneration.fields.leaf.LongFieldDataGenerator;
import org.elasticsearch.datageneration.fields.leaf.MatchOnlyTextFieldDataGenerator;
import org.elasticsearch.datageneration.fields.leaf.ScaledFloatFieldDataGenerator;
import org.elasticsearch.datageneration.fields.leaf.ShortFieldDataGenerator;
import org.elasticsearch.datageneration.fields.leaf.TextFieldDataGenerator;
Expand Down Expand Up @@ -50,7 +51,8 @@ public enum FieldType {
TEXT("text"),
IP("ip"),
CONSTANT_KEYWORD("constant_keyword"),
WILDCARD("wildcard");
WILDCARD("wildcard"),
MATCH_ONLY_TEXT("match_only_text");

private final String name;

Expand Down Expand Up @@ -78,6 +80,7 @@ public FieldDataGenerator generator(String fieldName, DataSource dataSource) {
case IP -> new IpFieldDataGenerator(dataSource);
case CONSTANT_KEYWORD -> new ConstantKeywordFieldDataGenerator();
case WILDCARD -> new WildcardFieldDataGenerator(dataSource);
case MATCH_ONLY_TEXT -> new MatchOnlyTextFieldDataGenerator(dataSource);
};
}

Expand All @@ -101,6 +104,7 @@ public static FieldType tryParse(String name) {
case "ip" -> FieldType.IP;
case "constant_keyword" -> FieldType.CONSTANT_KEYWORD;
case "wildcard" -> FieldType.WILDCARD;
case "match_only_text" -> FieldType.MATCH_ONLY_TEXT;
default -> null;
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ private void generateMapping(
fieldName,
leaf.type(),
context.eligibleCopyToDestinations(),
context.parentDynamicMapping()
context.parentDynamicMapping(),
specification.includePluginTypes()
)
)
.mappingGenerator();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,8 @@ record LeafMappingParametersGenerator(
String fieldName,
String fieldType,
Set<String> eligibleCopyToFields,
DynamicMapping dynamicMapping
DynamicMapping dynamicMapping,
boolean includePluginTypesInMultiFields
) implements DataSourceRequest<DataSourceResponse.LeafMappingParametersGenerator> {
public DataSourceResponse.LeafMappingParametersGenerator accept(DataSourceHandler handler) {
return handler.handle(this);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.function.Supplier;
Expand All @@ -37,17 +38,18 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques
}

return new DataSourceResponse.LeafMappingParametersGenerator(switch (fieldType) {
case KEYWORD -> keywordMapping(request);
case KEYWORD -> keywordMapping(false, request);
case LONG, INTEGER, SHORT, BYTE, DOUBLE, FLOAT, HALF_FLOAT, UNSIGNED_LONG -> numberMapping(fieldType);
case SCALED_FLOAT -> scaledFloatMapping();
case COUNTED_KEYWORD -> countedKeywordMapping();
case BOOLEAN -> booleanMapping();
case DATE -> dateMapping();
case GEO_POINT -> geoPointMapping();
case TEXT -> textMapping(request);
case TEXT -> textMapping(false, request);
case IP -> ipMapping();
case CONSTANT_KEYWORD -> constantKeywordMapping();
case WILDCARD -> wildcardMapping();
case WILDCARD -> wildcardMapping(false, request);
case MATCH_ONLY_TEXT -> matchOnlyTextMapping(false, request);
});
}

Expand Down Expand Up @@ -77,7 +79,7 @@ private Supplier<Map<String, Object>> numberMapping(FieldType fieldType) {
};
}

private Supplier<Map<String, Object>> keywordMapping(DataSourceRequest.LeafMappingParametersGenerator request) {
private Supplier<Map<String, Object>> keywordMapping(boolean hasParent, DataSourceRequest.LeafMappingParametersGenerator request) {
return () -> {
var mapping = commonMappingParameters();

Expand All @@ -96,12 +98,15 @@ private Supplier<Map<String, Object>> keywordMapping(DataSourceRequest.LeafMappi
}
}

if (ESTestCase.randomDouble() <= 0.2) {
mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 100));
if (ESTestCase.randomDouble() <= 0.3) {
mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 50));
}
if (ESTestCase.randomDouble() <= 0.2) {
mapping.put("null_value", ESTestCase.randomAlphaOfLengthBetween(0, 10));
}
if (hasParent == false && ESTestCase.randomBoolean()) {
mapping.put("fields", stringSubField(request));
}

return mapping;
};
Expand Down Expand Up @@ -196,19 +201,15 @@ private Supplier<Map<String, Object>> geoPointMapping() {
};
}

private Supplier<Map<String, Object>> textMapping(DataSourceRequest.LeafMappingParametersGenerator request) {
private Supplier<Map<String, Object>> textMapping(boolean hasParent, DataSourceRequest.LeafMappingParametersGenerator request) {
return () -> {
var mapping = new HashMap<String, Object>();

mapping.put("store", ESTestCase.randomBoolean());
mapping.put("index", ESTestCase.randomBoolean());

if (ESTestCase.randomDouble() <= 0.1) {
var keywordMultiFieldMapping = keywordMapping(request).get();
keywordMultiFieldMapping.put("type", "keyword");
keywordMultiFieldMapping.remove("copy_to");

mapping.put("fields", Map.of("kwd", keywordMultiFieldMapping));
if (hasParent == false && ESTestCase.randomBoolean()) {
mapping.put("fields", stringSubField(request));
}

return mapping;
Expand Down Expand Up @@ -243,21 +244,59 @@ private Supplier<Map<String, Object>> constantKeywordMapping() {
};
}

private Supplier<Map<String, Object>> wildcardMapping() {
private Supplier<Map<String, Object>> wildcardMapping(boolean hasParent, DataSourceRequest.LeafMappingParametersGenerator request) {
return () -> {
var mapping = new HashMap<String, Object>();

if (ESTestCase.randomDouble() <= 0.2) {
mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 100));
if (ESTestCase.randomDouble() <= 0.3) {
mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 50));
}
if (ESTestCase.randomDouble() <= 0.2) {
mapping.put("null_value", ESTestCase.randomAlphaOfLengthBetween(0, 10));
}
if (hasParent == false && ESTestCase.randomBoolean()) {
mapping.put("fields", stringSubField(request));
}

return mapping;
};
}

private Supplier<Map<String, Object>> matchOnlyTextMapping(
boolean hasParent,
DataSourceRequest.LeafMappingParametersGenerator request
) {
return () -> {
var mapping = new HashMap<String, Object>();
if (hasParent == false && ESTestCase.randomBoolean()) {
mapping.put("fields", stringSubField(request));
}
return mapping;
};
}

private Map<String, Object> stringSubField(DataSourceRequest.LeafMappingParametersGenerator request) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You would have to do some adjustments here to handle plugins. Instead of directly calling matchOnlyTextMapping(..) you need to get it from DataSource. Take a look at TextFieldWithParentBlockLoaderTests.

FieldType parent = FieldType.tryParse(request.fieldType());
List<FieldType> childTypes = request.includePluginTypesInMultiFields()
? List.of(FieldType.TEXT, FieldType.KEYWORD, FieldType.WILDCARD, FieldType.MATCH_ONLY_TEXT)
: List.of(FieldType.TEXT, FieldType.KEYWORD);
var childType = ESTestCase.randomValueOtherThan(parent, () -> ESTestCase.randomFrom(childTypes));
var child = switch (childType) {
case TEXT -> textMapping(true, request).get();
case KEYWORD -> {
var mapping = keywordMapping(true, request).get();
mapping.remove("copy_to");
yield mapping;
}
case MATCH_ONLY_TEXT -> matchOnlyTextMapping(true, request).get();
case WILDCARD -> wildcardMapping(true, request).get();
default -> throw new AssertionError("unreachable");
};

child.put("type", childType.toString());
return Map.of("subfield_" + childType, child);
}

public static HashMap<String, Object> commonMappingParameters() {
var map = new HashMap<String, Object>();
map.put("store", ESTestCase.randomBoolean());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.datageneration.fields.leaf;

import org.elasticsearch.datageneration.FieldDataGenerator;
import org.elasticsearch.datageneration.datasource.DataSource;

import java.util.Map;

public class MatchOnlyTextFieldDataGenerator implements FieldDataGenerator {
private final FieldDataGenerator textGenerator;

public MatchOnlyTextFieldDataGenerator(DataSource dataSource) {
this.textGenerator = new TextFieldDataGenerator(dataSource);
}

@Override
public Object generateValue(Map<String, Object> fieldMapping) {
return textGenerator.generateValue(fieldMapping);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ static Map<String, FieldSpecificMatcher> matchers(
put("shape", new ExactMatcher("shape", actualMappings, actualSettings, expectedMappings, expectedSettings));
put("geo_point", new GeoPointMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings));
put("text", new TextMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings));
put("match_only_text", new MatchOnlyTextMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings));
put("ip", new IpMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings));
put("constant_keyword", new ConstantKeywordMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings));
put("wildcard", new WildcardMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings));
Expand Down Expand Up @@ -621,6 +622,10 @@ class TextMatcher implements FieldSpecificMatcher {
this.expectedSettings = expectedSettings;
}

public String type() {
return "text";
}

@Override
@SuppressWarnings("unchecked")
public MatchResult match(
Expand All @@ -643,7 +648,7 @@ public MatchResult match(
if (multiFields != null) {
var keywordMatcher = new KeywordMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings);

var keywordFieldMapping = (Map<String, Object>) multiFields.get("kwd");
var keywordFieldMapping = (Map<String, Object>) multiFields.get("subfield_keyword");
var keywordMatchResult = keywordMatcher.match(actual, expected, keywordFieldMapping, keywordFieldMapping);
if (keywordMatchResult.isMatch()) {
return MatchResult.match();
Expand All @@ -656,7 +661,7 @@ public MatchResult match(
actualSettings,
expectedMappings,
expectedSettings,
"Values of type [text] don't match, " + prettyPrintCollections(actual, expected)
"Values of type [" + type() + "] don't match, " + prettyPrintCollections(actual, expected)
)
);
}
Expand All @@ -670,6 +675,22 @@ private Set<Object> normalize(List<Object> values) {
}
}

class MatchOnlyTextMatcher extends TextMatcher {
MatchOnlyTextMatcher(
XContentBuilder actualMappings,
Settings.Builder actualSettings,
XContentBuilder expectedMappings,
Settings.Builder expectedSettings
) {
super(actualMappings, actualSettings, expectedMappings, expectedSettings);
}

@Override
public String type() {
return "match_only_text";
}
}

class IpMatcher extends GenericMappingAwareMatcher {
IpMatcher(
XContentBuilder actualMappings,
Expand Down
Loading