Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
fc524df
Add random tests for match_only_text with multi-field
parkertimmins Aug 1, 2025
0861dcc
Skip phrase query for match_only_text in nested
parkertimmins Aug 1, 2025
f4ce957
Simplify phrase query generation
parkertimmins Aug 1, 2025
63098b0
Tune down number of ignove_above and multi-fields
parkertimmins Aug 1, 2025
770ad5d
Some cleanup and tune params
parkertimmins Aug 3, 2025
f96a1a8
[CI] Auto commit changes from spotless
Aug 4, 2025
560d937
Use text matcher for match_only_text
parkertimmins Aug 4, 2025
934b764
Merge branch 'main' into parker/random-tests-match-only-text-multifield
parkertimmins Aug 4, 2025
5446ac4
Merge branch 'main' into parker/random-tests-match-only-text-multifield
parkertimmins Aug 8, 2025
789222b
Remove wildcard as a subfield option
parkertimmins Aug 8, 2025
be6da22
Merge branch 'main' into parker/random-tests-match-only-text-multifield
parkertimmins Aug 11, 2025
501f385
Dont add wildcard and match_only_text to block loader tests
parkertimmins Aug 12, 2025
787f4ae
Merge branch 'main' into parker/random-tests-match-only-text-multifield
parkertimmins Aug 12, 2025
4c2843c
Update multi-field name to subfield_keyword
parkertimmins Aug 12, 2025
1760919
Move multifield into separate data source handler
parkertimmins Aug 13, 2025
f136489
[CI] Auto commit changes from spotless
Aug 13, 2025
401c9e8
Small fix
parkertimmins Aug 13, 2025
7715c83
Change set to list to avoid non-determinism
parkertimmins Aug 14, 2025
5ce0810
improve naming
parkertimmins Aug 14, 2025
0037c8f
Improvements from review
parkertimmins Aug 14, 2025
cafc130
Merge branch 'main' into parker/random-tests-match-only-text-multifield
parkertimmins Aug 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.elasticsearch.datageneration.fields.leaf.IpFieldDataGenerator;
import org.elasticsearch.datageneration.fields.leaf.KeywordFieldDataGenerator;
import org.elasticsearch.datageneration.fields.leaf.LongFieldDataGenerator;
import org.elasticsearch.datageneration.fields.leaf.MatchOnlyTextFieldDataGenerator;
import org.elasticsearch.datageneration.fields.leaf.ScaledFloatFieldDataGenerator;
import org.elasticsearch.datageneration.fields.leaf.ShortFieldDataGenerator;
import org.elasticsearch.datageneration.fields.leaf.TextFieldDataGenerator;
Expand Down Expand Up @@ -50,7 +51,8 @@ public enum FieldType {
TEXT("text"),
IP("ip"),
CONSTANT_KEYWORD("constant_keyword"),
WILDCARD("wildcard");
WILDCARD("wildcard"),
MATCH_ONLY_TEXT("match_only_text");

private final String name;

Expand Down Expand Up @@ -78,6 +80,7 @@ public FieldDataGenerator generator(String fieldName, DataSource dataSource) {
case IP -> new IpFieldDataGenerator(dataSource);
case CONSTANT_KEYWORD -> new ConstantKeywordFieldDataGenerator();
case WILDCARD -> new WildcardFieldDataGenerator(dataSource);
case MATCH_ONLY_TEXT -> new MatchOnlyTextFieldDataGenerator(dataSource);
};
}

Expand All @@ -101,6 +104,7 @@ public static FieldType tryParse(String name) {
case "ip" -> FieldType.IP;
case "constant_keyword" -> FieldType.CONSTANT_KEYWORD;
case "wildcard" -> FieldType.WILDCARD;
case "match_only_text" -> FieldType.MATCH_ONLY_TEXT;
default -> null;
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.function.Supplier;
Expand All @@ -37,17 +38,18 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques
}

return new DataSourceResponse.LeafMappingParametersGenerator(switch (fieldType) {
case KEYWORD -> keywordMapping(request);
case KEYWORD -> keywordMapping(false, request);
case LONG, INTEGER, SHORT, BYTE, DOUBLE, FLOAT, HALF_FLOAT, UNSIGNED_LONG -> numberMapping(fieldType);
case SCALED_FLOAT -> scaledFloatMapping();
case COUNTED_KEYWORD -> countedKeywordMapping();
case BOOLEAN -> booleanMapping();
case DATE -> dateMapping();
case GEO_POINT -> geoPointMapping();
case TEXT -> textMapping(request);
case TEXT -> textMapping(false, request);
case IP -> ipMapping();
case CONSTANT_KEYWORD -> constantKeywordMapping();
case WILDCARD -> wildcardMapping();
case WILDCARD -> wildcardMapping(false, request);
case MATCH_ONLY_TEXT -> matchOnlyTextMapping(false, request);
});
}

Expand Down Expand Up @@ -77,7 +79,7 @@ private Supplier<Map<String, Object>> numberMapping(FieldType fieldType) {
};
}

private Supplier<Map<String, Object>> keywordMapping(DataSourceRequest.LeafMappingParametersGenerator request) {
private Supplier<Map<String, Object>> keywordMapping(boolean hasParent, DataSourceRequest.LeafMappingParametersGenerator request) {
return () -> {
var mapping = commonMappingParameters();

Expand All @@ -96,12 +98,15 @@ private Supplier<Map<String, Object>> keywordMapping(DataSourceRequest.LeafMappi
}
}

if (ESTestCase.randomDouble() <= 0.2) {
mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 100));
if (ESTestCase.randomDouble() <= 0.3) {
mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 50));
}
if (ESTestCase.randomDouble() <= 0.2) {
mapping.put("null_value", ESTestCase.randomAlphaOfLengthBetween(0, 10));
}
if (hasParent == false && ESTestCase.randomBoolean()) {
mapping.put("fields", stringSubField(request));
}

return mapping;
};
Expand Down Expand Up @@ -196,19 +201,15 @@ private Supplier<Map<String, Object>> geoPointMapping() {
};
}

private Supplier<Map<String, Object>> textMapping(DataSourceRequest.LeafMappingParametersGenerator request) {
private Supplier<Map<String, Object>> textMapping(boolean hasParent, DataSourceRequest.LeafMappingParametersGenerator request) {
return () -> {
var mapping = new HashMap<String, Object>();

mapping.put("store", ESTestCase.randomBoolean());
mapping.put("index", ESTestCase.randomBoolean());

if (ESTestCase.randomDouble() <= 0.1) {
var keywordMultiFieldMapping = keywordMapping(request).get();
keywordMultiFieldMapping.put("type", "keyword");
keywordMultiFieldMapping.remove("copy_to");

mapping.put("fields", Map.of("kwd", keywordMultiFieldMapping));
if (hasParent == false && ESTestCase.randomBoolean()) {
mapping.put("fields", stringSubField(request));
}

return mapping;
Expand Down Expand Up @@ -243,21 +244,57 @@ private Supplier<Map<String, Object>> constantKeywordMapping() {
};
}

private Supplier<Map<String, Object>> wildcardMapping() {
private Supplier<Map<String, Object>> wildcardMapping(boolean hasParent, DataSourceRequest.LeafMappingParametersGenerator request) {
return () -> {
var mapping = new HashMap<String, Object>();

if (ESTestCase.randomDouble() <= 0.2) {
mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 100));
if (ESTestCase.randomDouble() <= 0.3) {
mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 50));
}
if (ESTestCase.randomDouble() <= 0.2) {
mapping.put("null_value", ESTestCase.randomAlphaOfLengthBetween(0, 10));
}
if (hasParent == false && ESTestCase.randomBoolean()) {
mapping.put("fields", stringSubField(request));
}

return mapping;
};
}

private Supplier<Map<String, Object>> matchOnlyTextMapping(
boolean hasParent,
DataSourceRequest.LeafMappingParametersGenerator request
) {
return () -> {
var mapping = new HashMap<String, Object>();
if (hasParent == false && ESTestCase.randomBoolean()) {
mapping.put("fields", stringSubField(request));
}
return mapping;
};
}

private Map<String, Object> stringSubField(DataSourceRequest.LeafMappingParametersGenerator request) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You would have to do some adjustments here to handle plugins. Instead of directly calling matchOnlyTextMapping(..) you need to get it from DataSource. Take a look at TextFieldWithParentBlockLoaderTests.

FieldType parent = FieldType.tryParse(request.fieldType());
List<FieldType> stringTypes = List.of(FieldType.TEXT, FieldType.MATCH_ONLY_TEXT, FieldType.KEYWORD, FieldType.WILDCARD);
var childType = ESTestCase.randomValueOtherThan(parent, () -> ESTestCase.randomFrom(stringTypes));
var child = switch (childType) {
case TEXT -> textMapping(true, request).get();
case MATCH_ONLY_TEXT -> matchOnlyTextMapping(true, request).get();
case WILDCARD -> wildcardMapping(true, request).get();
case KEYWORD -> {
var mapping = keywordMapping(true, request).get();
mapping.remove("copy_to");
yield mapping;
}
default -> throw new AssertionError("unreachable");
};

child.put("type", childType.toString());
return Map.of("subfield_" + childType, child);
}

public static HashMap<String, Object> commonMappingParameters() {
var map = new HashMap<String, Object>();
map.put("store", ESTestCase.randomBoolean());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.datageneration.fields.leaf;

import org.elasticsearch.datageneration.FieldDataGenerator;
import org.elasticsearch.datageneration.datasource.DataSource;

import java.util.Map;

public class MatchOnlyTextFieldDataGenerator implements FieldDataGenerator {
private final FieldDataGenerator textGenerator;

public MatchOnlyTextFieldDataGenerator(DataSource dataSource) {
this.textGenerator = new TextFieldDataGenerator(dataSource);
}

@Override
public Object generateValue(Map<String, Object> fieldMapping) {
return textGenerator.generateValue(fieldMapping);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ static Map<String, FieldSpecificMatcher> matchers(
put("shape", new ExactMatcher("shape", actualMappings, actualSettings, expectedMappings, expectedSettings));
put("geo_point", new GeoPointMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings));
put("text", new TextMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings));
put("match_only_text", new MatchOnlyTextMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings));
put("ip", new IpMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings));
put("constant_keyword", new ConstantKeywordMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings));
put("wildcard", new WildcardMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings));
Expand Down Expand Up @@ -621,6 +622,10 @@ class TextMatcher implements FieldSpecificMatcher {
this.expectedSettings = expectedSettings;
}

public String type() {
return "text";
}

@Override
@SuppressWarnings("unchecked")
public MatchResult match(
Expand All @@ -643,7 +648,7 @@ public MatchResult match(
if (multiFields != null) {
var keywordMatcher = new KeywordMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings);

var keywordFieldMapping = (Map<String, Object>) multiFields.get("kwd");
var keywordFieldMapping = (Map<String, Object>) multiFields.get("subfield_keyword");
var keywordMatchResult = keywordMatcher.match(actual, expected, keywordFieldMapping, keywordFieldMapping);
if (keywordMatchResult.isMatch()) {
return MatchResult.match();
Expand All @@ -656,7 +661,7 @@ public MatchResult match(
actualSettings,
expectedMappings,
expectedSettings,
"Values of type [text] don't match, " + prettyPrintCollections(actual, expected)
"Values of type [" + type() + "] don't match, " + prettyPrintCollections(actual, expected)
)
);
}
Expand All @@ -670,6 +675,22 @@ private Set<Object> normalize(List<Object> values) {
}
}

class MatchOnlyTextMatcher extends TextMatcher {
MatchOnlyTextMatcher(
XContentBuilder actualMappings,
Settings.Builder actualSettings,
XContentBuilder expectedMappings,
Settings.Builder expectedSettings
) {
super(actualMappings, actualSettings, expectedMappings, expectedSettings);
}

@Override
public String type() {
return "match_only_text";
}
}

class IpMatcher extends GenericMappingAwareMatcher {
IpMatcher(
XContentBuilder actualMappings,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,7 @@
import org.elasticsearch.datageneration.FieldType;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.test.ESTestCase;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;

Expand All @@ -28,7 +25,7 @@ public interface LeafQueryGenerator {
* @param type the type to build a query for
* @return a generator that can build queries for this type
*/
static LeafQueryGenerator buildForType(String type) {
static LeafQueryGenerator buildForType(String type, MappingPredicates mappingPredicates) {
LeafQueryGenerator noQueries = (Map<String, Object> fieldMapping, String path, Object value) -> List.of();

FieldType fieldType = FieldType.tryParse(type);
Expand All @@ -38,8 +35,9 @@ static LeafQueryGenerator buildForType(String type) {

return switch (fieldType) {
case KEYWORD -> new KeywordQueryGenerator();
case TEXT -> new TextQueryGenerator();
case WILDCARD -> new WildcardQueryGenerator();
case TEXT -> new TextQueryGenerator();
case MATCH_ONLY_TEXT -> new MatchOnlyTextQueryGenerator(mappingPredicates);
default -> noQueries;
};
}
Expand All @@ -53,13 +51,14 @@ public List<QueryBuilder> generate(Map<String, Object> fieldMapping, String path
return List.of();
}
}
return List.of(QueryBuilders.termQuery(path, value));
return List.of(QueryBuilders.termQuery(path, value), QueryBuilders.matchQuery(path, value));
}
}

class WildcardQueryGenerator implements LeafQueryGenerator {
public List<QueryBuilder> generate(Map<String, Object> fieldMapping, String path, Object value) {
// Queries with emojis can currently fail due to https://github.com/elastic/elasticsearch/issues/132144
// TODO remove when fixed
// queries with emojis can currently fail due to https://github.com/elastic/elasticsearch/issues/132144
if (containsHighSurrogates((String) value)) {
return List.of();
}
Expand All @@ -76,25 +75,20 @@ public List<QueryBuilder> generate(Map<String, Object> fieldMapping, String path
}
}

var results = new ArrayList<QueryBuilder>();
results.add(QueryBuilders.matchQuery(path, value));
var phraseQuery = buildPhraseQuery(path, (String) value);
if (phraseQuery != null) {
results.add(phraseQuery);
}
return results;
return List.of(QueryBuilders.matchQuery(path, value), QueryBuilders.matchPhraseQuery(path, value));
}
}

record MatchOnlyTextQueryGenerator(MappingPredicates mappingPredicates) implements LeafQueryGenerator {

private static QueryBuilder buildPhraseQuery(String path, String value) {
var tokens = Arrays.asList(value.split("[^a-zA-Z0-9]"));
if (tokens.isEmpty()) {
return null;
public List<QueryBuilder> generate(Map<String, Object> fieldMapping, String path, Object value) {
// TODO remove when fixed
// match_only_text in nested context fails for synthetic source https://github.com/elastic/elasticsearch/issues/132352
if (mappingPredicates.inNestedContext(path)) {
return List.of(QueryBuilders.matchQuery(path, value));
}

int low = ESTestCase.randomIntBetween(0, tokens.size() - 1);
int hi = ESTestCase.randomIntBetween(low + 1, tokens.size());
var phrase = String.join(" ", tokens.subList(low, hi));
return QueryBuilders.matchPhraseQuery(path, phrase);
return List.of(QueryBuilders.matchQuery(path, value), QueryBuilders.matchPhraseQuery(path, value));
}
}

Expand Down
Loading