From fc524df06f87ff8d1508f53edbd48eca6eaa2e2f Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Fri, 1 Aug 2025 09:42:14 -0500 Subject: [PATCH 01/16] Add random tests for match_only_text with multi-field --- .../datageneration/FieldType.java | 6 +- .../DefaultMappingParametersHandler.java | 67 ++++++++++++++---- .../leaf/MatchOnlyTextFieldDataGenerator.java | 28 ++++++++ .../matchers/source/FieldSpecificMatcher.java | 69 ++++++++++++++++++- .../queries/LeafQueryGenerator.java | 7 +- .../queries/QueryGenerator.java | 2 +- ...ardVersusLogsIndexModeChallengeRestIT.java | 2 + 7 files changed, 161 insertions(+), 20 deletions(-) create mode 100644 test/framework/src/main/java/org/elasticsearch/datageneration/fields/leaf/MatchOnlyTextFieldDataGenerator.java diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/FieldType.java b/test/framework/src/main/java/org/elasticsearch/datageneration/FieldType.java index eab2149019204..ef6cc21e90d8a 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/FieldType.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/FieldType.java @@ -23,6 +23,7 @@ import org.elasticsearch.datageneration.fields.leaf.IpFieldDataGenerator; import org.elasticsearch.datageneration.fields.leaf.KeywordFieldDataGenerator; import org.elasticsearch.datageneration.fields.leaf.LongFieldDataGenerator; +import org.elasticsearch.datageneration.fields.leaf.MatchOnlyTextFieldDataGenerator; import org.elasticsearch.datageneration.fields.leaf.ScaledFloatFieldDataGenerator; import org.elasticsearch.datageneration.fields.leaf.ShortFieldDataGenerator; import org.elasticsearch.datageneration.fields.leaf.TextFieldDataGenerator; @@ -50,7 +51,8 @@ public enum FieldType { TEXT("text"), IP("ip"), CONSTANT_KEYWORD("constant_keyword"), - WILDCARD("wildcard"); + WILDCARD("wildcard"), + MATCH_ONLY_TEXT("match_only_text"); private final String name; @@ -78,6 +80,7 @@ public FieldDataGenerator generator(String fieldName, DataSource dataSource) { case IP -> new IpFieldDataGenerator(dataSource); case CONSTANT_KEYWORD -> new ConstantKeywordFieldDataGenerator(); case WILDCARD -> new WildcardFieldDataGenerator(dataSource); + case MATCH_ONLY_TEXT -> new MatchOnlyTextFieldDataGenerator(dataSource); }; } @@ -101,6 +104,7 @@ public static FieldType tryParse(String name) { case "ip" -> FieldType.IP; case "constant_keyword" -> FieldType.CONSTANT_KEYWORD; case "wildcard" -> FieldType.WILDCARD; + case "match_only_text" -> FieldType.MATCH_ONLY_TEXT; default -> null; }; } diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java index 2e234f8aec41c..e279e8caf5315 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java @@ -22,6 +22,7 @@ import java.time.ZoneOffset; import java.time.format.DateTimeFormatter; import java.util.HashMap; +import java.util.List; import java.util.Locale; import java.util.Map; import java.util.function.Supplier; @@ -37,17 +38,18 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques } return new DataSourceResponse.LeafMappingParametersGenerator(switch (fieldType) { - case KEYWORD -> keywordMapping(request); + case KEYWORD -> keywordMapping(false, request); case LONG, INTEGER, SHORT, BYTE, DOUBLE, FLOAT, HALF_FLOAT, UNSIGNED_LONG -> numberMapping(fieldType); case SCALED_FLOAT -> scaledFloatMapping(); case COUNTED_KEYWORD -> countedKeywordMapping(); case BOOLEAN -> booleanMapping(); case DATE -> dateMapping(); case GEO_POINT -> geoPointMapping(); - case TEXT -> textMapping(request); + case TEXT -> textMapping(false, request); case IP -> ipMapping(); case CONSTANT_KEYWORD -> constantKeywordMapping(); - case WILDCARD -> wildcardMapping(); + case WILDCARD -> wildcardMapping(false, request); + case MATCH_ONLY_TEXT -> matchOnlyTextMapping(false, request); }); } @@ -77,7 +79,7 @@ private Supplier> numberMapping(FieldType fieldType) { }; } - private Supplier> keywordMapping(DataSourceRequest.LeafMappingParametersGenerator request) { + private Supplier> keywordMapping(boolean hasParent, DataSourceRequest.LeafMappingParametersGenerator request) { return () -> { var mapping = commonMappingParameters(); @@ -96,12 +98,15 @@ private Supplier> keywordMapping(DataSourceRequest.LeafMappi } } - if (ESTestCase.randomDouble() <= 0.2) { - mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 100)); + if (ESTestCase.randomDouble() <= 0.5) { + mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 30)); } if (ESTestCase.randomDouble() <= 0.2) { mapping.put("null_value", ESTestCase.randomAlphaOfLengthBetween(0, 10)); } + if (hasParent == false && ESTestCase.randomDouble() <= 0.5) { + mapping.put("fields", stringSubField(FieldType.KEYWORD, request)); + } return mapping; }; @@ -196,19 +201,15 @@ private Supplier> geoPointMapping() { }; } - private Supplier> textMapping(DataSourceRequest.LeafMappingParametersGenerator request) { + private Supplier> textMapping(boolean hasParent, DataSourceRequest.LeafMappingParametersGenerator request) { return () -> { var mapping = new HashMap(); mapping.put("store", ESTestCase.randomBoolean()); mapping.put("index", ESTestCase.randomBoolean()); - if (ESTestCase.randomDouble() <= 0.1) { - var keywordMultiFieldMapping = keywordMapping(request).get(); - keywordMultiFieldMapping.put("type", "keyword"); - keywordMultiFieldMapping.remove("copy_to"); - - mapping.put("fields", Map.of("kwd", keywordMultiFieldMapping)); + if (hasParent == false && ESTestCase.randomDouble() <= 0.5) { + mapping.put("fields", stringSubField(FieldType.TEXT, request)); } return mapping; @@ -243,21 +244,57 @@ private Supplier> constantKeywordMapping() { }; } - private Supplier> wildcardMapping() { + private Supplier> wildcardMapping(boolean hasParent, DataSourceRequest.LeafMappingParametersGenerator request) { return () -> { var mapping = new HashMap(); - if (ESTestCase.randomDouble() <= 0.2) { + if (ESTestCase.randomDouble() <= 0.3) { mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 100)); } if (ESTestCase.randomDouble() <= 0.2) { mapping.put("null_value", ESTestCase.randomAlphaOfLengthBetween(0, 10)); } + if (hasParent == false && ESTestCase.randomDouble() <= 0.5) { + mapping.put("fields", stringSubField(FieldType.WILDCARD, request)); + } + + return mapping; + }; + } + private Supplier> matchOnlyTextMapping( + boolean hasParent, + DataSourceRequest.LeafMappingParametersGenerator request + ) { + return () -> { + var mapping = new HashMap(); + if (hasParent == false && ESTestCase.randomDouble() <= 0.5) { + mapping.put("fields", stringSubField(FieldType.MATCH_ONLY_TEXT, request)); + } return mapping; }; } + private Map stringSubField(FieldType parent, DataSourceRequest.LeafMappingParametersGenerator request) { + + List stringTypes = List.of(FieldType.TEXT, FieldType.MATCH_ONLY_TEXT, FieldType.KEYWORD, FieldType.WILDCARD); + var childType = ESTestCase.randomValueOtherThan(parent, () -> ESTestCase.randomFrom(stringTypes)); + var child = switch (childType) { + case TEXT -> textMapping(true, request).get(); + case MATCH_ONLY_TEXT -> matchOnlyTextMapping(true, request).get(); + case WILDCARD -> wildcardMapping(true, request).get(); + case KEYWORD -> { + var mapping = keywordMapping(true, request).get(); + mapping.remove("copy_to"); + yield mapping; + } + default -> throw new AssertionError("unreachable"); + }; + + child.put("type", childType.toString()); + return Map.of("subfield_" + childType, child); + } + public static HashMap commonMappingParameters() { var map = new HashMap(); map.put("store", ESTestCase.randomBoolean()); diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/fields/leaf/MatchOnlyTextFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/datageneration/fields/leaf/MatchOnlyTextFieldDataGenerator.java new file mode 100644 index 0000000000000..f4493fd9b4ee9 --- /dev/null +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/fields/leaf/MatchOnlyTextFieldDataGenerator.java @@ -0,0 +1,28 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.datageneration.fields.leaf; + +import org.elasticsearch.datageneration.FieldDataGenerator; +import org.elasticsearch.datageneration.datasource.DataSource; + +import java.util.Map; + +public class MatchOnlyTextFieldDataGenerator implements FieldDataGenerator { + private final FieldDataGenerator textGenerator; + + public MatchOnlyTextFieldDataGenerator(DataSource dataSource) { + this.textGenerator = new TextFieldDataGenerator(dataSource); + } + + @Override + public Object generateValue(Map fieldMapping) { + return textGenerator.generateValue(fieldMapping); + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/matchers/source/FieldSpecificMatcher.java b/test/framework/src/main/java/org/elasticsearch/datageneration/matchers/source/FieldSpecificMatcher.java index 7adf98ef9d6ee..b8a17122beb31 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/matchers/source/FieldSpecificMatcher.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/matchers/source/FieldSpecificMatcher.java @@ -63,6 +63,7 @@ static Map matchers( put("shape", new ExactMatcher("shape", actualMappings, actualSettings, expectedMappings, expectedSettings)); put("geo_point", new GeoPointMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings)); put("text", new TextMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings)); + put("match_only_text", new MatchOnlyTextMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings)); put("ip", new IpMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings)); put("constant_keyword", new ConstantKeywordMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings)); put("wildcard", new WildcardMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings)); @@ -603,6 +604,72 @@ public MatchResult match( } } + class MatchOnlyTextMatcher implements FieldSpecificMatcher { + private final XContentBuilder actualMappings; + private final Settings.Builder actualSettings; + private final XContentBuilder expectedMappings; + private final Settings.Builder expectedSettings; + + MatchOnlyTextMatcher ( + XContentBuilder actualMappings, + Settings.Builder actualSettings, + XContentBuilder expectedMappings, + Settings.Builder expectedSettings + ) { + this.actualMappings = actualMappings; + this.actualSettings = actualSettings; + this.expectedMappings = expectedMappings; + this.expectedSettings = expectedSettings; + } + + @Override + @SuppressWarnings("unchecked") + public MatchResult match( + List actual, + List expected, + Map actualMapping, + Map expectedMapping + ) { + var expectedNormalized = normalize(expected); + var actualNormalized = normalize(actual); + + // Match simply as match_only_text first. + if (actualNormalized.equals(expectedNormalized)) { + return MatchResult.match(); + } +// + var multiFields = (Map) getMappingParameter("fields", actualMapping, expectedMapping); + if (multiFields != null) { + var keywordMatcher = new KeywordMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings); + + var keywordFieldMapping = (Map) multiFields.get("subfield_keyword"); + var keywordMatchResult = keywordMatcher.match(actual, expected, keywordFieldMapping, keywordFieldMapping); + if (keywordMatchResult.isMatch()) { + return MatchResult.match(); + } + } + + return MatchResult.noMatch( + formatErrorMessage( + actualMappings, + actualSettings, + expectedMappings, + expectedSettings, + "Values of type [match_only_text] don't match, " + prettyPrintCollections(actual, expected) + ) + ); + } + + private Set normalize(List values) { + if (values == null) { + return Set.of(); + } + + return values.stream().filter(Objects::nonNull).collect(Collectors.toSet()); + } + } + + class TextMatcher implements FieldSpecificMatcher { private final XContentBuilder actualMappings; private final Settings.Builder actualSettings; @@ -643,7 +710,7 @@ public MatchResult match( if (multiFields != null) { var keywordMatcher = new KeywordMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings); - var keywordFieldMapping = (Map) multiFields.get("kwd"); + var keywordFieldMapping = (Map) multiFields.get("subfield_keyword"); var keywordMatchResult = keywordMatcher.match(actual, expected, keywordFieldMapping, keywordFieldMapping); if (keywordMatchResult.isMatch()) { return MatchResult.match(); diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java index be26db580edf6..3c2ecb839e613 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java @@ -38,7 +38,7 @@ static LeafQueryGenerator buildForType(String type) { return switch (fieldType) { case KEYWORD -> new KeywordQueryGenerator(); - case TEXT -> new TextQueryGenerator(); + case TEXT, MATCH_ONLY_TEXT -> new TextQueryGenerator(); case WILDCARD -> new WildcardQueryGenerator(); default -> noQueries; }; @@ -53,7 +53,10 @@ public List generate(Map fieldMapping, String path return List.of(); } } - return List.of(QueryBuilders.termQuery(path, value)); + return List.of( + QueryBuilders.termQuery(path, value), + QueryBuilders.matchQuery(path, value) + ); } } diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/QueryGenerator.java b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/QueryGenerator.java index 9db0b628f85da..8b3dd1a4f2099 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/QueryGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/QueryGenerator.java @@ -42,7 +42,7 @@ public List generateQueries(String type, String path, Object value return leafQueries.stream().map(q -> wrapInNestedQuery(path, q)).toList(); } - private QueryBuilder wrapInNestedQuery(String path, QueryBuilder leafQuery) { + public QueryBuilder wrapInNestedQuery(String path, QueryBuilder leafQuery) { String[] parts = path.split("\\."); List nestedPaths = getNestedPathPrefixes(parts); QueryBuilder query = leafQuery; diff --git a/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java b/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java index 51e450c2a2da0..132ae4327473e 100644 --- a/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java +++ b/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java @@ -71,6 +71,7 @@ protected StandardVersusLogsIndexModeChallengeRestIT(DataGenerationHelper dataGe @Override public void baselineMappings(XContentBuilder builder) throws IOException { dataGenerationHelper.writeStandardMapping(builder); + int x = 5; } @Override @@ -143,6 +144,7 @@ public void testMatchAllQuery() throws IOException { assertTrue(matchResult.getMessage(), matchResult.isMatch()); } + @SuppressWarnings("unchecked") public void testRandomQueries() throws IOException { int numberOfDocuments = ESTestCase.randomIntBetween(10, 50); final List documents = generateDocuments(numberOfDocuments); From 0861dcc0f516121e05410eb6704d42a1f62a52c4 Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Fri, 1 Aug 2025 14:08:39 -0500 Subject: [PATCH 02/16] Skip phrase query for match_only_text in nested --- .../queries/LeafQueryGenerator.java | 45 ++++++++--- .../queries/MappingContextHelper.java | 74 +++++++++++++++++++ .../queries/QueryGenerator.java | 55 ++------------ 3 files changed, 113 insertions(+), 61 deletions(-) create mode 100644 test/framework/src/main/java/org/elasticsearch/datageneration/queries/MappingContextHelper.java diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java index 3c2ecb839e613..ef4bf9e663ecb 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java @@ -9,6 +9,7 @@ package org.elasticsearch.datageneration.queries; +import org.elasticsearch.common.MacAddressProvider; import org.elasticsearch.datageneration.FieldType; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; @@ -28,7 +29,7 @@ public interface LeafQueryGenerator { * @param type the type to build a query for * @return a generator that can build queries for this type */ - static LeafQueryGenerator buildForType(String type) { + static LeafQueryGenerator buildForType(String type, MappingContextHelper mappingContextHelper) { LeafQueryGenerator noQueries = (Map fieldMapping, String path, Object value) -> List.of(); FieldType fieldType = FieldType.tryParse(type); @@ -38,8 +39,9 @@ static LeafQueryGenerator buildForType(String type) { return switch (fieldType) { case KEYWORD -> new KeywordQueryGenerator(); - case TEXT, MATCH_ONLY_TEXT -> new TextQueryGenerator(); case WILDCARD -> new WildcardQueryGenerator(); + case TEXT -> new TextQueryGenerator(); + case MATCH_ONLY_TEXT -> new MatchOnlyTextQueryGenerator(mappingContextHelper); default -> noQueries; }; } @@ -62,7 +64,8 @@ public List generate(Map fieldMapping, String path class WildcardQueryGenerator implements LeafQueryGenerator { public List generate(Map fieldMapping, String path, Object value) { - // Queries with emojis can currently fail due to https://github.com/elastic/elasticsearch/issues/132144 + // TODO remove when fixed + // queries with emojis can currently fail due to https://github.com/elastic/elasticsearch/issues/132144 if (containsHighSurrogates((String) value)) { return List.of(); } @@ -87,20 +90,40 @@ public List generate(Map fieldMapping, String path } return results; } + } + + record MatchOnlyTextQueryGenerator(MappingContextHelper mappingContextHelper) implements LeafQueryGenerator { + + public List generate(Map fieldMapping, String path, Object value) { + var results = new ArrayList(); + results.add(QueryBuilders.matchQuery(path, value)); - private static QueryBuilder buildPhraseQuery(String path, String value) { - var tokens = Arrays.asList(value.split("[^a-zA-Z0-9]")); - if (tokens.isEmpty()) { - return null; + // TODO remove when fixed + // match_only_text in nested context fails for synthetic source https://github.com/elastic/elasticsearch/issues/132352 + if (mappingContextHelper.inNestedContext(path)) { + return results; } - int low = ESTestCase.randomIntBetween(0, tokens.size() - 1); - int hi = ESTestCase.randomIntBetween(low + 1, tokens.size()); - var phrase = String.join(" ", tokens.subList(low, hi)); - return QueryBuilders.matchPhraseQuery(path, phrase); + var phraseQuery = buildPhraseQuery(path, (String) value); + if (phraseQuery != null) { + results.add(phraseQuery); + } + return results; } } + private static QueryBuilder buildPhraseQuery(String path, String value) { + var tokens = Arrays.asList(value.split("[^a-zA-Z0-9]")); + if (tokens.isEmpty()) { + return null; + } + + int low = ESTestCase.randomIntBetween(0, tokens.size() - 1); + int hi = ESTestCase.randomIntBetween(low + 1, tokens.size()); + var phrase = String.join(" ", tokens.subList(low, hi)); + return QueryBuilders.matchPhraseQuery(path, phrase); + } + static boolean containsHighSurrogates(String s) { for (int i = 0; i < s.length(); i++) { if (Character.isHighSurrogate(s.charAt(i))) { diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/MappingContextHelper.java b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/MappingContextHelper.java new file mode 100644 index 0000000000000..c18d1646f98c0 --- /dev/null +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/MappingContextHelper.java @@ -0,0 +1,74 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.datageneration.queries; + +import org.elasticsearch.datageneration.Mapping; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +public class MappingContextHelper { + + private final Mapping mapping; + + public MappingContextHelper(Mapping mapping) { + this.mapping = mapping; + } + + @SuppressWarnings("unchecked") + public List getNestedPathPrefixes(String[] path) { + Map mapping = this.mapping.raw(); + mapping = (Map) mapping.get("_doc"); + mapping = (Map) mapping.get("properties"); + + var result = new ArrayList(); + for (int i = 0; i < path.length - 1; i++) { + var field = path[i]; + mapping = (Map) mapping.get(field); + + // dynamic field + if (mapping == null) { + break; + } + + boolean nested = "nested".equals(mapping.get("type")); + if (nested) { + result.add(String.join(".", Arrays.copyOfRange(path, 0, i + 1))); + } + mapping = (Map) mapping.get("properties"); + } + return result; + } + + @SuppressWarnings("unchecked") + public boolean isRuntimeField(String path) { + String[] parts = path.split("\\."); + var topLevelMapping = (Map) mapping.raw().get("_doc"); + boolean inRuntimeContext = "runtime".equals(topLevelMapping.get("dynamic")); + for (int i = 0; i < parts.length - 1; i++) { + var pathToHere = String.join(".", Arrays.copyOfRange(parts, 0, i + 1)); + Map fieldMapping = mapping.lookup().get(pathToHere); + if (fieldMapping == null) { + break; + } + if (fieldMapping.containsKey("dynamic")) { + // lower down dynamic definitions override higher up behavior + inRuntimeContext = "runtime".equals(fieldMapping.get("dynamic")); + } + } + return inRuntimeContext; + } + + public boolean inNestedContext(String path) { + return getNestedPathPrefixes(path.split("\\.")).isEmpty() == false; + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/QueryGenerator.java b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/QueryGenerator.java index 8b3dd1a4f2099..e91c36ae75bd0 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/QueryGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/QueryGenerator.java @@ -14,17 +14,16 @@ import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; -import java.util.ArrayList; -import java.util.Arrays; import java.util.List; -import java.util.Map; public class QueryGenerator { private final Mapping mapping; + private final MappingContextHelper mappingContextHelper; public QueryGenerator(Mapping mapping) { this.mapping = mapping; + this.mappingContextHelper = new MappingContextHelper(mapping); } public List generateQueries(String type, String path, Object value) { @@ -33,10 +32,10 @@ public List generateQueries(String type, String path, Object value return List.of(); } // Can handle dynamically mapped fields, but not runtime fields - if (isRuntimeField(path)) { + if (mappingContextHelper.isRuntimeField(path)) { return List.of(); } - var leafQueryGenerator = LeafQueryGenerator.buildForType(type); + var leafQueryGenerator = LeafQueryGenerator.buildForType(type, mappingContextHelper); var fieldMapping = mapping.lookup().get(path); var leafQueries = leafQueryGenerator.generate(fieldMapping, path, value); return leafQueries.stream().map(q -> wrapInNestedQuery(path, q)).toList(); @@ -44,55 +43,11 @@ public List generateQueries(String type, String path, Object value public QueryBuilder wrapInNestedQuery(String path, QueryBuilder leafQuery) { String[] parts = path.split("\\."); - List nestedPaths = getNestedPathPrefixes(parts); + List nestedPaths = mappingContextHelper.getNestedPathPrefixes(parts); QueryBuilder query = leafQuery; for (String nestedPath : nestedPaths.reversed()) { query = QueryBuilders.nestedQuery(nestedPath, query, ScoreMode.Max); } return query; } - - @SuppressWarnings("unchecked") - private List getNestedPathPrefixes(String[] path) { - Map mapping = this.mapping.raw(); - mapping = (Map) mapping.get("_doc"); - mapping = (Map) mapping.get("properties"); - - var result = new ArrayList(); - for (int i = 0; i < path.length - 1; i++) { - var field = path[i]; - mapping = (Map) mapping.get(field); - - // dynamic field - if (mapping == null) { - break; - } - - boolean nested = "nested".equals(mapping.get("type")); - if (nested) { - result.add(String.join(".", Arrays.copyOfRange(path, 0, i + 1))); - } - mapping = (Map) mapping.get("properties"); - } - return result; - } - - @SuppressWarnings("unchecked") - private boolean isRuntimeField(String path) { - String[] parts = path.split("\\."); - var topLevelMapping = (Map) mapping.raw().get("_doc"); - boolean inRuntimeContext = "runtime".equals(topLevelMapping.get("dynamic")); - for (int i = 0; i < parts.length - 1; i++) { - var pathToHere = String.join(".", Arrays.copyOfRange(parts, 0, i + 1)); - Map fieldMapping = mapping.lookup().get(pathToHere); - if (fieldMapping == null) { - break; - } - if (fieldMapping.containsKey("dynamic")) { - // lower down dynamic definitions override higher up behavior - inRuntimeContext = "runtime".equals(fieldMapping.get("dynamic")); - } - } - return inRuntimeContext; - } } From f4ce957598942dc390b610b7a959070fc1168f20 Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Fri, 1 Aug 2025 16:59:07 -0500 Subject: [PATCH 03/16] Simplify phrase query generation --- .../queries/LeafQueryGenerator.java | 37 +++++-------------- 1 file changed, 9 insertions(+), 28 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java index ef4bf9e663ecb..45c84ed4f75d6 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java @@ -82,46 +82,27 @@ public List generate(Map fieldMapping, String path } } - var results = new ArrayList(); - results.add(QueryBuilders.matchQuery(path, value)); - var phraseQuery = buildPhraseQuery(path, (String) value); - if (phraseQuery != null) { - results.add(phraseQuery); - } - return results; + return List.of( + QueryBuilders.matchQuery(path, value), + QueryBuilders.matchPhraseQuery(path, value) + ); } } record MatchOnlyTextQueryGenerator(MappingContextHelper mappingContextHelper) implements LeafQueryGenerator { public List generate(Map fieldMapping, String path, Object value) { - var results = new ArrayList(); - results.add(QueryBuilders.matchQuery(path, value)); - // TODO remove when fixed // match_only_text in nested context fails for synthetic source https://github.com/elastic/elasticsearch/issues/132352 if (mappingContextHelper.inNestedContext(path)) { - return results; + return List.of(QueryBuilders.matchQuery(path, value)); } - var phraseQuery = buildPhraseQuery(path, (String) value); - if (phraseQuery != null) { - results.add(phraseQuery); - } - return results; - } - } - - private static QueryBuilder buildPhraseQuery(String path, String value) { - var tokens = Arrays.asList(value.split("[^a-zA-Z0-9]")); - if (tokens.isEmpty()) { - return null; + return List.of( + QueryBuilders.matchQuery(path, value), + QueryBuilders.matchPhraseQuery(path, value) + ); } - - int low = ESTestCase.randomIntBetween(0, tokens.size() - 1); - int hi = ESTestCase.randomIntBetween(low + 1, tokens.size()); - var phrase = String.join(" ", tokens.subList(low, hi)); - return QueryBuilders.matchPhraseQuery(path, phrase); } static boolean containsHighSurrogates(String s) { From 63098b06963743009732efb0cb20d340ec75af3a Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Fri, 1 Aug 2025 17:14:09 -0500 Subject: [PATCH 04/16] Tune down number of ignove_above and multi-fields --- .../DefaultMappingParametersHandler.java | 28 +++++++++---------- .../queries/QueryGenerator.java | 2 +- ...ardVersusLogsIndexModeChallengeRestIT.java | 1 - 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java index e279e8caf5315..940b8a860a02b 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java @@ -98,14 +98,14 @@ private Supplier> keywordMapping(boolean hasParent, DataSour } } - if (ESTestCase.randomDouble() <= 0.5) { - mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 30)); + if (ESTestCase.randomDouble() <= 0.2) { + mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 50)); } if (ESTestCase.randomDouble() <= 0.2) { mapping.put("null_value", ESTestCase.randomAlphaOfLengthBetween(0, 10)); } - if (hasParent == false && ESTestCase.randomDouble() <= 0.5) { - mapping.put("fields", stringSubField(FieldType.KEYWORD, request)); + if (hasParent == false && ESTestCase.randomDouble() <= 0.3) { + mapping.put("fields", stringSubField(request)); } return mapping; @@ -208,8 +208,8 @@ private Supplier> textMapping(boolean hasParent, DataSourceR mapping.put("store", ESTestCase.randomBoolean()); mapping.put("index", ESTestCase.randomBoolean()); - if (hasParent == false && ESTestCase.randomDouble() <= 0.5) { - mapping.put("fields", stringSubField(FieldType.TEXT, request)); + if (hasParent == false && ESTestCase.randomDouble() <= 0.3) { + mapping.put("fields", stringSubField(request)); } return mapping; @@ -248,14 +248,14 @@ private Supplier> wildcardMapping(boolean hasParent, DataSou return () -> { var mapping = new HashMap(); - if (ESTestCase.randomDouble() <= 0.3) { - mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 100)); + if (ESTestCase.randomDouble() <= 0.2) { + mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 50)); } if (ESTestCase.randomDouble() <= 0.2) { mapping.put("null_value", ESTestCase.randomAlphaOfLengthBetween(0, 10)); } - if (hasParent == false && ESTestCase.randomDouble() <= 0.5) { - mapping.put("fields", stringSubField(FieldType.WILDCARD, request)); + if (hasParent == false && ESTestCase.randomDouble() <= 0.3) { + mapping.put("fields", stringSubField(request)); } return mapping; @@ -268,15 +268,15 @@ private Supplier> matchOnlyTextMapping( ) { return () -> { var mapping = new HashMap(); - if (hasParent == false && ESTestCase.randomDouble() <= 0.5) { - mapping.put("fields", stringSubField(FieldType.MATCH_ONLY_TEXT, request)); + if (hasParent == false && ESTestCase.randomDouble() <= 0.3) { + mapping.put("fields", stringSubField(request)); } return mapping; }; } - private Map stringSubField(FieldType parent, DataSourceRequest.LeafMappingParametersGenerator request) { - + private Map stringSubField(DataSourceRequest.LeafMappingParametersGenerator request) { + FieldType parent = FieldType.tryParse(request.fieldType()); List stringTypes = List.of(FieldType.TEXT, FieldType.MATCH_ONLY_TEXT, FieldType.KEYWORD, FieldType.WILDCARD); var childType = ESTestCase.randomValueOtherThan(parent, () -> ESTestCase.randomFrom(stringTypes)); var child = switch (childType) { diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/QueryGenerator.java b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/QueryGenerator.java index e91c36ae75bd0..73f360003576a 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/QueryGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/QueryGenerator.java @@ -41,7 +41,7 @@ public List generateQueries(String type, String path, Object value return leafQueries.stream().map(q -> wrapInNestedQuery(path, q)).toList(); } - public QueryBuilder wrapInNestedQuery(String path, QueryBuilder leafQuery) { + private QueryBuilder wrapInNestedQuery(String path, QueryBuilder leafQuery) { String[] parts = path.split("\\."); List nestedPaths = mappingContextHelper.getNestedPathPrefixes(parts); QueryBuilder query = leafQuery; diff --git a/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java b/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java index 132ae4327473e..c2e060a3a6912 100644 --- a/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java +++ b/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java @@ -71,7 +71,6 @@ protected StandardVersusLogsIndexModeChallengeRestIT(DataGenerationHelper dataGe @Override public void baselineMappings(XContentBuilder builder) throws IOException { dataGenerationHelper.writeStandardMapping(builder); - int x = 5; } @Override From 770ad5d0a771683876473fa0ed374d697d8f38f6 Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Sun, 3 Aug 2025 11:30:58 -0500 Subject: [PATCH 05/16] Some cleanup and tune params --- .../DefaultMappingParametersHandler.java | 12 ++-- .../queries/LeafQueryGenerator.java | 10 +--- ...textHelper.java => MappingPredicates.java} | 58 +++++++++---------- .../queries/QueryGenerator.java | 11 ++-- 4 files changed, 40 insertions(+), 51 deletions(-) rename test/framework/src/main/java/org/elasticsearch/datageneration/queries/{MappingContextHelper.java => MappingPredicates.java} (54%) diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java index 940b8a860a02b..09ff84ace0971 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java @@ -98,13 +98,13 @@ private Supplier> keywordMapping(boolean hasParent, DataSour } } - if (ESTestCase.randomDouble() <= 0.2) { + if (ESTestCase.randomDouble() <= 0.3) { mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 50)); } if (ESTestCase.randomDouble() <= 0.2) { mapping.put("null_value", ESTestCase.randomAlphaOfLengthBetween(0, 10)); } - if (hasParent == false && ESTestCase.randomDouble() <= 0.3) { + if (hasParent == false && ESTestCase.randomBoolean()) { mapping.put("fields", stringSubField(request)); } @@ -208,7 +208,7 @@ private Supplier> textMapping(boolean hasParent, DataSourceR mapping.put("store", ESTestCase.randomBoolean()); mapping.put("index", ESTestCase.randomBoolean()); - if (hasParent == false && ESTestCase.randomDouble() <= 0.3) { + if (hasParent == false && ESTestCase.randomBoolean()) { mapping.put("fields", stringSubField(request)); } @@ -248,13 +248,13 @@ private Supplier> wildcardMapping(boolean hasParent, DataSou return () -> { var mapping = new HashMap(); - if (ESTestCase.randomDouble() <= 0.2) { + if (ESTestCase.randomDouble() <= 0.3) { mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 50)); } if (ESTestCase.randomDouble() <= 0.2) { mapping.put("null_value", ESTestCase.randomAlphaOfLengthBetween(0, 10)); } - if (hasParent == false && ESTestCase.randomDouble() <= 0.3) { + if (hasParent == false && ESTestCase.randomBoolean()) { mapping.put("fields", stringSubField(request)); } @@ -268,7 +268,7 @@ private Supplier> matchOnlyTextMapping( ) { return () -> { var mapping = new HashMap(); - if (hasParent == false && ESTestCase.randomDouble() <= 0.3) { + if (hasParent == false && ESTestCase.randomBoolean()) { mapping.put("fields", stringSubField(request)); } return mapping; diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java index 45c84ed4f75d6..c93b28789093a 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java @@ -9,14 +9,10 @@ package org.elasticsearch.datageneration.queries; -import org.elasticsearch.common.MacAddressProvider; import org.elasticsearch.datageneration.FieldType; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.test.ESTestCase; -import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import java.util.Map; @@ -29,7 +25,7 @@ public interface LeafQueryGenerator { * @param type the type to build a query for * @return a generator that can build queries for this type */ - static LeafQueryGenerator buildForType(String type, MappingContextHelper mappingContextHelper) { + static LeafQueryGenerator buildForType(String type, MappingPredicates mappingContextHelper) { LeafQueryGenerator noQueries = (Map fieldMapping, String path, Object value) -> List.of(); FieldType fieldType = FieldType.tryParse(type); @@ -89,12 +85,12 @@ public List generate(Map fieldMapping, String path } } - record MatchOnlyTextQueryGenerator(MappingContextHelper mappingContextHelper) implements LeafQueryGenerator { + record MatchOnlyTextQueryGenerator(MappingPredicates mappingPredicates) implements LeafQueryGenerator { public List generate(Map fieldMapping, String path, Object value) { // TODO remove when fixed // match_only_text in nested context fails for synthetic source https://github.com/elastic/elasticsearch/issues/132352 - if (mappingContextHelper.inNestedContext(path)) { + if (mappingPredicates.inNestedContext(path)) { return List.of(QueryBuilders.matchQuery(path, value)); } diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/MappingContextHelper.java b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/MappingPredicates.java similarity index 54% rename from test/framework/src/main/java/org/elasticsearch/datageneration/queries/MappingContextHelper.java rename to test/framework/src/main/java/org/elasticsearch/datageneration/queries/MappingPredicates.java index c18d1646f98c0..2d2c05150d3c1 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/MappingContextHelper.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/MappingPredicates.java @@ -16,59 +16,53 @@ import java.util.List; import java.util.Map; -public class MappingContextHelper { +public class MappingPredicates { private final Mapping mapping; - public MappingContextHelper(Mapping mapping) { + public MappingPredicates(Mapping mapping) { this.mapping = mapping; } - @SuppressWarnings("unchecked") - public List getNestedPathPrefixes(String[] path) { - Map mapping = this.mapping.raw(); - mapping = (Map) mapping.get("_doc"); - mapping = (Map) mapping.get("properties"); - - var result = new ArrayList(); - for (int i = 0; i < path.length - 1; i++) { - var field = path[i]; - mapping = (Map) mapping.get(field); + record PathMapping(String path, Map mapping) {} - // dynamic field - if (mapping == null) { + private List getPathMapping(String path) { + String[] parts = path.split("\\."); + var result = new ArrayList(); + for (int i = 0; i < parts.length; i++) { + var pathToHere = String.join(".", Arrays.copyOfRange(parts, 0, i + 1)); + Map fieldMapping = mapping.lookup().get(pathToHere); + if (fieldMapping == null) { break; } - - boolean nested = "nested".equals(mapping.get("type")); - if (nested) { - result.add(String.join(".", Arrays.copyOfRange(path, 0, i + 1))); - } - mapping = (Map) mapping.get("properties"); + result.add(new PathMapping(pathToHere, fieldMapping)); } return result; } + public List getNestedPathPrefixes(String fullPath) { + return getPathMapping(fullPath).stream() + .filter(pm -> "nested".equals(pm.mapping().get("type"))) + .map(PathMapping::path) + .toList(); + } + + public boolean inNestedContext(String fullPath) { + return getPathMapping(fullPath).stream() + .anyMatch(pm -> "nested".equals(pm.mapping().get("type"))); + } + @SuppressWarnings("unchecked") public boolean isRuntimeField(String path) { - String[] parts = path.split("\\."); var topLevelMapping = (Map) mapping.raw().get("_doc"); boolean inRuntimeContext = "runtime".equals(topLevelMapping.get("dynamic")); - for (int i = 0; i < parts.length - 1; i++) { - var pathToHere = String.join(".", Arrays.copyOfRange(parts, 0, i + 1)); - Map fieldMapping = mapping.lookup().get(pathToHere); - if (fieldMapping == null) { - break; - } - if (fieldMapping.containsKey("dynamic")) { + for (var pm : getPathMapping(path)) { + if (pm.mapping().containsKey("dynamic")) { // lower down dynamic definitions override higher up behavior - inRuntimeContext = "runtime".equals(fieldMapping.get("dynamic")); + inRuntimeContext = "runtime".equals(pm.mapping().get("dynamic")); } } return inRuntimeContext; } - public boolean inNestedContext(String path) { - return getNestedPathPrefixes(path.split("\\.")).isEmpty() == false; - } } diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/QueryGenerator.java b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/QueryGenerator.java index 73f360003576a..7630d810acdc5 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/QueryGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/QueryGenerator.java @@ -19,11 +19,11 @@ public class QueryGenerator { private final Mapping mapping; - private final MappingContextHelper mappingContextHelper; + private final MappingPredicates mappingPredicates; public QueryGenerator(Mapping mapping) { this.mapping = mapping; - this.mappingContextHelper = new MappingContextHelper(mapping); + this.mappingPredicates = new MappingPredicates(mapping); } public List generateQueries(String type, String path, Object value) { @@ -32,18 +32,17 @@ public List generateQueries(String type, String path, Object value return List.of(); } // Can handle dynamically mapped fields, but not runtime fields - if (mappingContextHelper.isRuntimeField(path)) { + if (mappingPredicates.isRuntimeField(path)) { return List.of(); } - var leafQueryGenerator = LeafQueryGenerator.buildForType(type, mappingContextHelper); + var leafQueryGenerator = LeafQueryGenerator.buildForType(type, mappingPredicates); var fieldMapping = mapping.lookup().get(path); var leafQueries = leafQueryGenerator.generate(fieldMapping, path, value); return leafQueries.stream().map(q -> wrapInNestedQuery(path, q)).toList(); } private QueryBuilder wrapInNestedQuery(String path, QueryBuilder leafQuery) { - String[] parts = path.split("\\."); - List nestedPaths = mappingContextHelper.getNestedPathPrefixes(parts); + List nestedPaths = mappingPredicates.getNestedPathPrefixes(path); QueryBuilder query = leafQuery; for (String nestedPath : nestedPaths.reversed()) { query = QueryBuilders.nestedQuery(nestedPath, query, ScoreMode.Max); From f96a1a8ff427671881cc65ff80b0978973a8d512 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 4 Aug 2025 02:58:17 +0000 Subject: [PATCH 06/16] [CI] Auto commit changes from spotless --- .../matchers/source/FieldSpecificMatcher.java | 5 ++--- .../queries/LeafQueryGenerator.java | 15 +++------------ .../datageneration/queries/MappingPredicates.java | 8 ++------ 3 files changed, 7 insertions(+), 21 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/matchers/source/FieldSpecificMatcher.java b/test/framework/src/main/java/org/elasticsearch/datageneration/matchers/source/FieldSpecificMatcher.java index b8a17122beb31..9601f565f777d 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/matchers/source/FieldSpecificMatcher.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/matchers/source/FieldSpecificMatcher.java @@ -610,7 +610,7 @@ class MatchOnlyTextMatcher implements FieldSpecificMatcher { private final XContentBuilder expectedMappings; private final Settings.Builder expectedSettings; - MatchOnlyTextMatcher ( + MatchOnlyTextMatcher( XContentBuilder actualMappings, Settings.Builder actualSettings, XContentBuilder expectedMappings, @@ -637,7 +637,7 @@ public MatchResult match( if (actualNormalized.equals(expectedNormalized)) { return MatchResult.match(); } -// + // var multiFields = (Map) getMappingParameter("fields", actualMapping, expectedMapping); if (multiFields != null) { var keywordMatcher = new KeywordMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings); @@ -669,7 +669,6 @@ private Set normalize(List values) { } } - class TextMatcher implements FieldSpecificMatcher { private final XContentBuilder actualMappings; private final Settings.Builder actualSettings; diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java index c93b28789093a..90297ab3f83e6 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java @@ -51,10 +51,7 @@ public List generate(Map fieldMapping, String path return List.of(); } } - return List.of( - QueryBuilders.termQuery(path, value), - QueryBuilders.matchQuery(path, value) - ); + return List.of(QueryBuilders.termQuery(path, value), QueryBuilders.matchQuery(path, value)); } } @@ -78,10 +75,7 @@ public List generate(Map fieldMapping, String path } } - return List.of( - QueryBuilders.matchQuery(path, value), - QueryBuilders.matchPhraseQuery(path, value) - ); + return List.of(QueryBuilders.matchQuery(path, value), QueryBuilders.matchPhraseQuery(path, value)); } } @@ -94,10 +88,7 @@ public List generate(Map fieldMapping, String path return List.of(QueryBuilders.matchQuery(path, value)); } - return List.of( - QueryBuilders.matchQuery(path, value), - QueryBuilders.matchPhraseQuery(path, value) - ); + return List.of(QueryBuilders.matchQuery(path, value), QueryBuilders.matchPhraseQuery(path, value)); } } diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/MappingPredicates.java b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/MappingPredicates.java index 2d2c05150d3c1..a1500e6612c0f 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/MappingPredicates.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/MappingPredicates.java @@ -41,15 +41,11 @@ private List getPathMapping(String path) { } public List getNestedPathPrefixes(String fullPath) { - return getPathMapping(fullPath).stream() - .filter(pm -> "nested".equals(pm.mapping().get("type"))) - .map(PathMapping::path) - .toList(); + return getPathMapping(fullPath).stream().filter(pm -> "nested".equals(pm.mapping().get("type"))).map(PathMapping::path).toList(); } public boolean inNestedContext(String fullPath) { - return getPathMapping(fullPath).stream() - .anyMatch(pm -> "nested".equals(pm.mapping().get("type"))); + return getPathMapping(fullPath).stream().anyMatch(pm -> "nested".equals(pm.mapping().get("type"))); } @SuppressWarnings("unchecked") From 560d937fb913742ebfa548f6fbad0f6cae93d0a7 Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Sun, 3 Aug 2025 22:01:49 -0500 Subject: [PATCH 07/16] Use text matcher for match_only_text --- .../matchers/source/FieldSpecificMatcher.java | 77 ++++--------------- .../queries/LeafQueryGenerator.java | 4 +- 2 files changed, 18 insertions(+), 63 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/matchers/source/FieldSpecificMatcher.java b/test/framework/src/main/java/org/elasticsearch/datageneration/matchers/source/FieldSpecificMatcher.java index 9601f565f777d..1503cbd8e9adb 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/matchers/source/FieldSpecificMatcher.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/matchers/source/FieldSpecificMatcher.java @@ -604,13 +604,13 @@ public MatchResult match( } } - class MatchOnlyTextMatcher implements FieldSpecificMatcher { + class TextMatcher implements FieldSpecificMatcher { private final XContentBuilder actualMappings; private final Settings.Builder actualSettings; private final XContentBuilder expectedMappings; private final Settings.Builder expectedSettings; - MatchOnlyTextMatcher( + TextMatcher( XContentBuilder actualMappings, Settings.Builder actualSettings, XContentBuilder expectedMappings, @@ -622,6 +622,10 @@ class MatchOnlyTextMatcher implements FieldSpecificMatcher { this.expectedSettings = expectedSettings; } + public String type() { + return "text"; + } + @Override @SuppressWarnings("unchecked") public MatchResult match( @@ -633,11 +637,13 @@ public MatchResult match( var expectedNormalized = normalize(expected); var actualNormalized = normalize(actual); - // Match simply as match_only_text first. + // Match simply as text first. if (actualNormalized.equals(expectedNormalized)) { return MatchResult.match(); } - // + + // In some cases synthetic source for text fields is synthesized using the keyword multi field. + // So in this case it's appropriate to match it using keyword matching logic (mainly to cover `null_value`). var multiFields = (Map) getMappingParameter("fields", actualMapping, expectedMapping); if (multiFields != null) { var keywordMatcher = new KeywordMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings); @@ -655,7 +661,7 @@ public MatchResult match( actualSettings, expectedMappings, expectedSettings, - "Values of type [match_only_text] don't match, " + prettyPrintCollections(actual, expected) + "Values of type [" + type() + "] don't match, " + prettyPrintCollections(actual, expected) ) ); } @@ -669,70 +675,19 @@ private Set normalize(List values) { } } - class TextMatcher implements FieldSpecificMatcher { - private final XContentBuilder actualMappings; - private final Settings.Builder actualSettings; - private final XContentBuilder expectedMappings; - private final Settings.Builder expectedSettings; - - TextMatcher( + class MatchOnlyTextMatcher extends TextMatcher { + MatchOnlyTextMatcher( XContentBuilder actualMappings, Settings.Builder actualSettings, XContentBuilder expectedMappings, Settings.Builder expectedSettings ) { - this.actualMappings = actualMappings; - this.actualSettings = actualSettings; - this.expectedMappings = expectedMappings; - this.expectedSettings = expectedSettings; + super(actualMappings, actualSettings, expectedMappings, expectedSettings); } @Override - @SuppressWarnings("unchecked") - public MatchResult match( - List actual, - List expected, - Map actualMapping, - Map expectedMapping - ) { - var expectedNormalized = normalize(expected); - var actualNormalized = normalize(actual); - - // Match simply as text first. - if (actualNormalized.equals(expectedNormalized)) { - return MatchResult.match(); - } - - // In some cases synthetic source for text fields is synthesized using the keyword multi field. - // So in this case it's appropriate to match it using keyword matching logic (mainly to cover `null_value`). - var multiFields = (Map) getMappingParameter("fields", actualMapping, expectedMapping); - if (multiFields != null) { - var keywordMatcher = new KeywordMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings); - - var keywordFieldMapping = (Map) multiFields.get("subfield_keyword"); - var keywordMatchResult = keywordMatcher.match(actual, expected, keywordFieldMapping, keywordFieldMapping); - if (keywordMatchResult.isMatch()) { - return MatchResult.match(); - } - } - - return MatchResult.noMatch( - formatErrorMessage( - actualMappings, - actualSettings, - expectedMappings, - expectedSettings, - "Values of type [text] don't match, " + prettyPrintCollections(actual, expected) - ) - ); - } - - private Set normalize(List values) { - if (values == null) { - return Set.of(); - } - - return values.stream().filter(Objects::nonNull).collect(Collectors.toSet()); + public String type() { + return "match_only_text"; } } diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java index 90297ab3f83e6..cb102e185c9b5 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java @@ -25,7 +25,7 @@ public interface LeafQueryGenerator { * @param type the type to build a query for * @return a generator that can build queries for this type */ - static LeafQueryGenerator buildForType(String type, MappingPredicates mappingContextHelper) { + static LeafQueryGenerator buildForType(String type, MappingPredicates mappingPredicates) { LeafQueryGenerator noQueries = (Map fieldMapping, String path, Object value) -> List.of(); FieldType fieldType = FieldType.tryParse(type); @@ -37,7 +37,7 @@ static LeafQueryGenerator buildForType(String type, MappingPredicates mappingCon case KEYWORD -> new KeywordQueryGenerator(); case WILDCARD -> new WildcardQueryGenerator(); case TEXT -> new TextQueryGenerator(); - case MATCH_ONLY_TEXT -> new MatchOnlyTextQueryGenerator(mappingContextHelper); + case MATCH_ONLY_TEXT -> new MatchOnlyTextQueryGenerator(mappingPredicates); default -> noQueries; }; } From 789222bc8b6edbc1ebbde5546c2fce24423db8ba Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Fri, 8 Aug 2025 14:18:56 -0500 Subject: [PATCH 08/16] Remove wildcard as a subfield option Ideally, wildcard would be tested as a subfield. But data generation code is used in server, and wildcard type is in xpack. With some better wiring this could be fixed, but that will have to wait for the future. --- .../datasource/DefaultMappingParametersHandler.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java index 09ff84ace0971..fbb274b4a98cb 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java @@ -277,12 +277,11 @@ private Supplier> matchOnlyTextMapping( private Map stringSubField(DataSourceRequest.LeafMappingParametersGenerator request) { FieldType parent = FieldType.tryParse(request.fieldType()); - List stringTypes = List.of(FieldType.TEXT, FieldType.MATCH_ONLY_TEXT, FieldType.KEYWORD, FieldType.WILDCARD); - var childType = ESTestCase.randomValueOtherThan(parent, () -> ESTestCase.randomFrom(stringTypes)); + List childTypes = List.of(FieldType.TEXT, FieldType.MATCH_ONLY_TEXT, FieldType.KEYWORD); + var childType = ESTestCase.randomValueOtherThan(parent, () -> ESTestCase.randomFrom(childTypes)); var child = switch (childType) { case TEXT -> textMapping(true, request).get(); case MATCH_ONLY_TEXT -> matchOnlyTextMapping(true, request).get(); - case WILDCARD -> wildcardMapping(true, request).get(); case KEYWORD -> { var mapping = keywordMapping(true, request).get(); mapping.remove("copy_to"); From 501f385600eafb8ce66430b08c5298e6206c2a07 Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Tue, 12 Aug 2025 14:43:38 -0500 Subject: [PATCH 09/16] Dont add wildcard and match_only_text to block loader tests --- .../TextFieldWithParentBlockLoaderTests.java | 6 ++++-- .../datageneration/DataGeneratorSpecification.java | 14 ++++++++++++-- .../datageneration/MappingGenerator.java | 3 ++- .../datasource/DataSourceRequest.java | 3 ++- .../DefaultMappingParametersHandler.java | 7 +++++-- .../index/mapper/BlockLoaderTestCase.java | 4 +++- 6 files changed, 28 insertions(+), 9 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldWithParentBlockLoaderTests.java b/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldWithParentBlockLoaderTests.java index 6343aeea2d9de..9345d024439f6 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldWithParentBlockLoaderTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldWithParentBlockLoaderTests.java @@ -68,7 +68,8 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques "_field", FieldType.KEYWORD.toString(), request.eligibleCopyToFields(), - request.dynamicMapping() + request.dynamicMapping(), + request.includePluginTypesInMultiFields() ) ).mappingGenerator().get(); @@ -78,7 +79,8 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques "_field", FieldType.TEXT.toString(), request.eligibleCopyToFields(), - request.dynamicMapping() + request.dynamicMapping(), + request.includePluginTypesInMultiFields() ) ).mappingGenerator().get(); diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/DataGeneratorSpecification.java b/test/framework/src/main/java/org/elasticsearch/datageneration/DataGeneratorSpecification.java index 96ab809ff048c..ac5984d1f4a56 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/DataGeneratorSpecification.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/DataGeneratorSpecification.java @@ -26,6 +26,7 @@ * @param nestedFieldsLimit how many total nested fields can be present in a produced mapping * @param fullyDynamicMapping if the mapping is fully dynamic, meaning none of the fields are mapped (essentially mapping is empty) * @param predefinedFields predefined fields that must be present in mapping and documents. Only top level fields are supported. + * @param includePluginTypes whether types defined in plugins should be added to mapping */ public record DataGeneratorSpecification( DataSource dataSource, @@ -33,7 +34,8 @@ public record DataGeneratorSpecification( int maxObjectDepth, int nestedFieldsLimit, boolean fullyDynamicMapping, - List predefinedFields + List predefinedFields, + boolean includePluginTypes ) { public static Builder builder() { @@ -51,6 +53,7 @@ public static class Builder { private int nestedFieldsLimit; private boolean fullyDynamicMapping; private List predefinedFields; + private boolean includePluginTypes; public Builder() { this.dataSourceHandlers = new ArrayList<>(); @@ -61,6 +64,7 @@ public Builder() { this.nestedFieldsLimit = 50; fullyDynamicMapping = false; this.predefinedFields = new ArrayList<>(); + this.includePluginTypes = true; } public Builder withDataSourceHandlers(Collection handlers) { @@ -93,6 +97,11 @@ public Builder withPredefinedFields(List predefinedFields) { return this; } + public Builder withIncludePluginTypes(boolean includePluginTypes) { + this.includePluginTypes = includePluginTypes; + return this; + } + public DataGeneratorSpecification build() { return new DataGeneratorSpecification( new DataSource(dataSourceHandlers), @@ -100,7 +109,8 @@ public DataGeneratorSpecification build() { maxObjectDepth, nestedFieldsLimit, fullyDynamicMapping, - predefinedFields + predefinedFields, + includePluginTypes ); } } diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/MappingGenerator.java b/test/framework/src/main/java/org/elasticsearch/datageneration/MappingGenerator.java index 795302e0972c7..7eeb7c0625b69 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/MappingGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/MappingGenerator.java @@ -108,7 +108,8 @@ private void generateMapping( fieldName, leaf.type(), context.eligibleCopyToDestinations(), - context.parentDynamicMapping() + context.parentDynamicMapping(), + specification.includePluginTypes() ) ) .mappingGenerator(); diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DataSourceRequest.java b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DataSourceRequest.java index 1323fa23d226e..57692ccbf6aa5 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DataSourceRequest.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DataSourceRequest.java @@ -203,7 +203,8 @@ record LeafMappingParametersGenerator( String fieldName, String fieldType, Set eligibleCopyToFields, - DynamicMapping dynamicMapping + DynamicMapping dynamicMapping, + boolean includePluginTypesInMultiFields ) implements DataSourceRequest { public DataSourceResponse.LeafMappingParametersGenerator accept(DataSourceHandler handler) { return handler.handle(this); diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java index fbb274b4a98cb..bb28914b903be 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java @@ -277,16 +277,19 @@ private Supplier> matchOnlyTextMapping( private Map stringSubField(DataSourceRequest.LeafMappingParametersGenerator request) { FieldType parent = FieldType.tryParse(request.fieldType()); - List childTypes = List.of(FieldType.TEXT, FieldType.MATCH_ONLY_TEXT, FieldType.KEYWORD); + List childTypes = request.includePluginTypesInMultiFields() + ? List.of(FieldType.TEXT, FieldType.KEYWORD, FieldType.WILDCARD, FieldType.MATCH_ONLY_TEXT) + : List.of(FieldType.TEXT, FieldType.KEYWORD); var childType = ESTestCase.randomValueOtherThan(parent, () -> ESTestCase.randomFrom(childTypes)); var child = switch (childType) { case TEXT -> textMapping(true, request).get(); - case MATCH_ONLY_TEXT -> matchOnlyTextMapping(true, request).get(); case KEYWORD -> { var mapping = keywordMapping(true, request).get(); mapping.remove("copy_to"); yield mapping; } + case MATCH_ONLY_TEXT -> matchOnlyTextMapping(true, request).get(); + case WILDCARD -> wildcardMapping(true, request).get(); default -> throw new AssertionError("unreachable"); }; diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/BlockLoaderTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/BlockLoaderTestCase.java index 3b6c4ec5e0123..244004fdafacb 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/BlockLoaderTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/BlockLoaderTestCase.java @@ -176,7 +176,8 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques "_field", request.fieldType(), request.eligibleCopyToFields(), - request.dynamicMapping() + request.dynamicMapping(), + request.includePluginTypesInMultiFields() ) ).mappingGenerator().get(); @@ -210,6 +211,7 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques public static DataGeneratorSpecification buildSpecification(Collection customHandlers) { return DataGeneratorSpecification.builder() + .withIncludePluginTypes(false) .withFullyDynamicMapping(false) // Disable dynamic mapping and disabled objects .withDataSourceHandlers(List.of(new DataSourceHandler() { From 4c2843cd876eace699001c1685717629d2df438b Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Tue, 12 Aug 2025 15:45:41 -0500 Subject: [PATCH 10/16] Update multi-field name to subfield_keyword --- .../index/mapper/blockloader/TextFieldBlockLoaderTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldBlockLoaderTests.java b/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldBlockLoaderTests.java index ce5482b15b0ee..d78c7eba351e2 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldBlockLoaderTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldBlockLoaderTests.java @@ -41,7 +41,7 @@ public static Object expectedValue(Map fieldMapping, Object valu var fields = (Map) fieldMapping.get("fields"); if (fields != null) { - var keywordMultiFieldMapping = (Map) fields.get("kwd"); + var keywordMultiFieldMapping = (Map) fields.get("subfield_keyword"); Object normalizer = fields.get("normalizer"); boolean docValues = hasDocValues(keywordMultiFieldMapping, true); boolean store = keywordMultiFieldMapping.getOrDefault("store", false).equals(true); From 1760919cb72b3c2e8190a0a142e16eb02d558b34 Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Wed, 13 Aug 2025 16:11:34 -0500 Subject: [PATCH 11/16] Move multifield into separate data source handler --- .../TextFieldWithParentBlockLoaderTests.java | 57 ++---------- .../DataGeneratorSpecification.java | 14 +-- .../datageneration/MappingGenerator.java | 3 +- .../datasource/DataSourceRequest.java | 3 +- .../DefaultMappingParametersHandler.java | 60 ++----------- .../datasource/MultifieldAddonHandler.java | 90 +++++++++++++++++++ .../index/mapper/BlockLoaderTestCase.java | 5 +- .../xpack/logsdb/qa/DataGenerationHelper.java | 4 +- 8 files changed, 113 insertions(+), 123 deletions(-) create mode 100644 test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java diff --git a/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldWithParentBlockLoaderTests.java b/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldWithParentBlockLoaderTests.java index 9345d024439f6..c74e133611071 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldWithParentBlockLoaderTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldWithParentBlockLoaderTests.java @@ -15,9 +15,7 @@ import org.elasticsearch.datageneration.FieldType; import org.elasticsearch.datageneration.MappingGenerator; import org.elasticsearch.datageneration.Template; -import org.elasticsearch.datageneration.datasource.DataSourceHandler; -import org.elasticsearch.datageneration.datasource.DataSourceRequest; -import org.elasticsearch.datageneration.datasource.DataSourceResponse; +import org.elasticsearch.datageneration.datasource.MultifieldAddonHandler; import org.elasticsearch.index.mapper.BlockLoaderTestCase; import org.elasticsearch.index.mapper.BlockLoaderTestRunner; import org.elasticsearch.index.mapper.MapperServiceTestCase; @@ -49,53 +47,8 @@ public TextFieldWithParentBlockLoaderTests(BlockLoaderTestCase.Params params) { // of text multi field in a keyword field. public void testBlockLoaderOfParentField() throws IOException { var template = new Template(Map.of("parent", new Template.Leaf("parent", FieldType.KEYWORD.toString()))); - var specification = buildSpecification(List.of(new DataSourceHandler() { - @Override - public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceRequest.LeafMappingParametersGenerator request) { - // This is a bit tricky meta-logic. - // We want to customize mapping but to do this we need the mapping for the same field type - // so we use name to untangle this. - if (request.fieldName().equals("parent") == false) { - return null; - } - - return new DataSourceResponse.LeafMappingParametersGenerator(() -> { - var dataSource = request.dataSource(); - - var keywordParentMapping = dataSource.get( - new DataSourceRequest.LeafMappingParametersGenerator( - dataSource, - "_field", - FieldType.KEYWORD.toString(), - request.eligibleCopyToFields(), - request.dynamicMapping(), - request.includePluginTypesInMultiFields() - ) - ).mappingGenerator().get(); - - var textMultiFieldMapping = dataSource.get( - new DataSourceRequest.LeafMappingParametersGenerator( - dataSource, - "_field", - FieldType.TEXT.toString(), - request.eligibleCopyToFields(), - request.dynamicMapping(), - request.includePluginTypesInMultiFields() - ) - ).mappingGenerator().get(); - - // we don't need this here - keywordParentMapping.remove("copy_to"); - - textMultiFieldMapping.put("type", "text"); - textMultiFieldMapping.remove("fields"); - - keywordParentMapping.put("fields", Map.of("mf", textMultiFieldMapping)); - - return keywordParentMapping; - }); - } - })); + var specification = buildSpecification(List.of(new MultifieldAddonHandler(Map.of(FieldType.KEYWORD, List.of(FieldType.TEXT)), 1f))); + var mapping = new MappingGenerator(specification).generate(template); var fieldMapping = mapping.lookup().get("parent"); @@ -108,7 +61,7 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques ? createSytheticSourceMapperService(mappingXContent) : createMapperService(mappingXContent); - runner.runTest(mapperService, document, expected, "parent.mf"); + runner.runTest(mapperService, document, expected, "parent.subfield_text"); } @SuppressWarnings("unchecked") @@ -125,7 +78,7 @@ private Object expected(Map fieldMapping, Object value, BlockLoa } // we are using block loader of the text field itself - var textFieldMapping = (Map) ((Map) fieldMapping.get("fields")).get("mf"); + var textFieldMapping = (Map) ((Map) fieldMapping.get("fields")).get("subfield_text"); return TextFieldBlockLoaderTests.expectedValue(textFieldMapping, value, params, testContext); } } diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/DataGeneratorSpecification.java b/test/framework/src/main/java/org/elasticsearch/datageneration/DataGeneratorSpecification.java index ac5984d1f4a56..96ab809ff048c 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/DataGeneratorSpecification.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/DataGeneratorSpecification.java @@ -26,7 +26,6 @@ * @param nestedFieldsLimit how many total nested fields can be present in a produced mapping * @param fullyDynamicMapping if the mapping is fully dynamic, meaning none of the fields are mapped (essentially mapping is empty) * @param predefinedFields predefined fields that must be present in mapping and documents. Only top level fields are supported. - * @param includePluginTypes whether types defined in plugins should be added to mapping */ public record DataGeneratorSpecification( DataSource dataSource, @@ -34,8 +33,7 @@ public record DataGeneratorSpecification( int maxObjectDepth, int nestedFieldsLimit, boolean fullyDynamicMapping, - List predefinedFields, - boolean includePluginTypes + List predefinedFields ) { public static Builder builder() { @@ -53,7 +51,6 @@ public static class Builder { private int nestedFieldsLimit; private boolean fullyDynamicMapping; private List predefinedFields; - private boolean includePluginTypes; public Builder() { this.dataSourceHandlers = new ArrayList<>(); @@ -64,7 +61,6 @@ public Builder() { this.nestedFieldsLimit = 50; fullyDynamicMapping = false; this.predefinedFields = new ArrayList<>(); - this.includePluginTypes = true; } public Builder withDataSourceHandlers(Collection handlers) { @@ -97,11 +93,6 @@ public Builder withPredefinedFields(List predefinedFields) { return this; } - public Builder withIncludePluginTypes(boolean includePluginTypes) { - this.includePluginTypes = includePluginTypes; - return this; - } - public DataGeneratorSpecification build() { return new DataGeneratorSpecification( new DataSource(dataSourceHandlers), @@ -109,8 +100,7 @@ public DataGeneratorSpecification build() { maxObjectDepth, nestedFieldsLimit, fullyDynamicMapping, - predefinedFields, - includePluginTypes + predefinedFields ); } } diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/MappingGenerator.java b/test/framework/src/main/java/org/elasticsearch/datageneration/MappingGenerator.java index 7eeb7c0625b69..795302e0972c7 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/MappingGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/MappingGenerator.java @@ -108,8 +108,7 @@ private void generateMapping( fieldName, leaf.type(), context.eligibleCopyToDestinations(), - context.parentDynamicMapping(), - specification.includePluginTypes() + context.parentDynamicMapping() ) ) .mappingGenerator(); diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DataSourceRequest.java b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DataSourceRequest.java index 57692ccbf6aa5..1323fa23d226e 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DataSourceRequest.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DataSourceRequest.java @@ -203,8 +203,7 @@ record LeafMappingParametersGenerator( String fieldName, String fieldType, Set eligibleCopyToFields, - DynamicMapping dynamicMapping, - boolean includePluginTypesInMultiFields + DynamicMapping dynamicMapping ) implements DataSourceRequest { public DataSourceResponse.LeafMappingParametersGenerator accept(DataSourceHandler handler) { return handler.handle(this); diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java index bb28914b903be..8e759946f2ee4 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java @@ -22,7 +22,6 @@ import java.time.ZoneOffset; import java.time.format.DateTimeFormatter; import java.util.HashMap; -import java.util.List; import java.util.Locale; import java.util.Map; import java.util.function.Supplier; @@ -38,18 +37,18 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques } return new DataSourceResponse.LeafMappingParametersGenerator(switch (fieldType) { - case KEYWORD -> keywordMapping(false, request); + case KEYWORD -> keywordMapping(request); case LONG, INTEGER, SHORT, BYTE, DOUBLE, FLOAT, HALF_FLOAT, UNSIGNED_LONG -> numberMapping(fieldType); case SCALED_FLOAT -> scaledFloatMapping(); case COUNTED_KEYWORD -> countedKeywordMapping(); case BOOLEAN -> booleanMapping(); case DATE -> dateMapping(); case GEO_POINT -> geoPointMapping(); - case TEXT -> textMapping(false, request); + case TEXT -> textMapping(); case IP -> ipMapping(); case CONSTANT_KEYWORD -> constantKeywordMapping(); - case WILDCARD -> wildcardMapping(false, request); - case MATCH_ONLY_TEXT -> matchOnlyTextMapping(false, request); + case WILDCARD -> wildcardMapping(); + case MATCH_ONLY_TEXT -> matchOnlyTextMapping(); }); } @@ -79,7 +78,7 @@ private Supplier> numberMapping(FieldType fieldType) { }; } - private Supplier> keywordMapping(boolean hasParent, DataSourceRequest.LeafMappingParametersGenerator request) { + private Supplier> keywordMapping(DataSourceRequest.LeafMappingParametersGenerator request) { return () -> { var mapping = commonMappingParameters(); @@ -104,9 +103,6 @@ private Supplier> keywordMapping(boolean hasParent, DataSour if (ESTestCase.randomDouble() <= 0.2) { mapping.put("null_value", ESTestCase.randomAlphaOfLengthBetween(0, 10)); } - if (hasParent == false && ESTestCase.randomBoolean()) { - mapping.put("fields", stringSubField(request)); - } return mapping; }; @@ -201,17 +197,13 @@ private Supplier> geoPointMapping() { }; } - private Supplier> textMapping(boolean hasParent, DataSourceRequest.LeafMappingParametersGenerator request) { + private Supplier> textMapping() { return () -> { var mapping = new HashMap(); mapping.put("store", ESTestCase.randomBoolean()); mapping.put("index", ESTestCase.randomBoolean()); - if (hasParent == false && ESTestCase.randomBoolean()) { - mapping.put("fields", stringSubField(request)); - } - return mapping; }; } @@ -244,7 +236,7 @@ private Supplier> constantKeywordMapping() { }; } - private Supplier> wildcardMapping(boolean hasParent, DataSourceRequest.LeafMappingParametersGenerator request) { + private Supplier> wildcardMapping() { return () -> { var mapping = new HashMap(); @@ -254,47 +246,13 @@ private Supplier> wildcardMapping(boolean hasParent, DataSou if (ESTestCase.randomDouble() <= 0.2) { mapping.put("null_value", ESTestCase.randomAlphaOfLengthBetween(0, 10)); } - if (hasParent == false && ESTestCase.randomBoolean()) { - mapping.put("fields", stringSubField(request)); - } - - return mapping; - }; - } - private Supplier> matchOnlyTextMapping( - boolean hasParent, - DataSourceRequest.LeafMappingParametersGenerator request - ) { - return () -> { - var mapping = new HashMap(); - if (hasParent == false && ESTestCase.randomBoolean()) { - mapping.put("fields", stringSubField(request)); - } return mapping; }; } - private Map stringSubField(DataSourceRequest.LeafMappingParametersGenerator request) { - FieldType parent = FieldType.tryParse(request.fieldType()); - List childTypes = request.includePluginTypesInMultiFields() - ? List.of(FieldType.TEXT, FieldType.KEYWORD, FieldType.WILDCARD, FieldType.MATCH_ONLY_TEXT) - : List.of(FieldType.TEXT, FieldType.KEYWORD); - var childType = ESTestCase.randomValueOtherThan(parent, () -> ESTestCase.randomFrom(childTypes)); - var child = switch (childType) { - case TEXT -> textMapping(true, request).get(); - case KEYWORD -> { - var mapping = keywordMapping(true, request).get(); - mapping.remove("copy_to"); - yield mapping; - } - case MATCH_ONLY_TEXT -> matchOnlyTextMapping(true, request).get(); - case WILDCARD -> wildcardMapping(true, request).get(); - default -> throw new AssertionError("unreachable"); - }; - - child.put("type", childType.toString()); - return Map.of("subfield_" + childType, child); + private Supplier> matchOnlyTextMapping() { + return HashMap::new; } public static HashMap commonMappingParameters() { diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java new file mode 100644 index 0000000000000..a8aa93e52401b --- /dev/null +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java @@ -0,0 +1,90 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.datageneration.datasource; + +import org.elasticsearch.datageneration.FieldType; +import org.elasticsearch.test.ESTestCase; + +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +public class MultifieldAddonHandler implements DataSourceHandler { + + private static final String PLACEHOLDER_NAME = "_an_improbably_placeholder_name"; + private static final float DEFAULT_CHANCE_OF_CHILD_FIELD = 0.5f; + private final Map> subfieldTypes; + private final float chanceOfChildField; + + private static final Set STRING_TYPES = Set.of(FieldType.TEXT, FieldType.KEYWORD, FieldType.MATCH_ONLY_TEXT, FieldType.WILDCARD); + public static MultifieldAddonHandler ALL_STRING_TYPES = new MultifieldAddonHandler(STRING_TYPES.stream().collect(Collectors.toMap(t -> t, t -> STRING_TYPES.stream().filter(s -> s != t).toList()))); + + public MultifieldAddonHandler(Map> subfieldTypes, float chanceOfChildField) { + this.subfieldTypes = subfieldTypes; + this.chanceOfChildField = chanceOfChildField; + } + + public MultifieldAddonHandler(Map> subfieldTypes) { + this(subfieldTypes, DEFAULT_CHANCE_OF_CHILD_FIELD); + } + + @Override + public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceRequest.LeafMappingParametersGenerator request) { + + // Need to delegate creation of the same type of field to other handlers. So skip request + // if it's for the placeholder name used when creating the child and parent fields. + if (request.fieldName().equals(PLACEHOLDER_NAME)) { + return null; + } + + FieldType parentType = FieldType.tryParse(request.fieldType()); + List childTypes = subfieldTypes.get(parentType); + if (childTypes == null) { + return null; + } + + return new DataSourceResponse.LeafMappingParametersGenerator(() -> { + assert parentType != null; + var parent = getMappingForType(parentType, request); + if (ESTestCase.randomFloat() > chanceOfChildField) { + return parent; + } + + var childType = ESTestCase.randomFrom(childTypes); + var child = getChildMappingForType(childType, request); + + child.put("type", childType.toString()); + String childName = "subfield_" + childType; + parent.put("fields", Map.of(childName, child)); + return parent; + }); + } + + private static Map getChildMappingForType(FieldType type, DataSourceRequest.LeafMappingParametersGenerator request) { + Map mapping = getMappingForType(type, request); + if (type == FieldType.KEYWORD) { + mapping.remove("copy_to"); + } + return mapping; + } + + private static Map getMappingForType(FieldType type, DataSourceRequest.LeafMappingParametersGenerator request) { + return request.dataSource().get( + new DataSourceRequest.LeafMappingParametersGenerator( + request.dataSource(), + PLACEHOLDER_NAME, + type.toString(), + request.eligibleCopyToFields(), + request.dynamicMapping() + ) + ).mappingGenerator().get(); + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/BlockLoaderTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/BlockLoaderTestCase.java index 244004fdafacb..2e628493daceb 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/BlockLoaderTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/BlockLoaderTestCase.java @@ -18,6 +18,7 @@ import org.elasticsearch.datageneration.datasource.DataSourceHandler; import org.elasticsearch.datageneration.datasource.DataSourceRequest; import org.elasticsearch.datageneration.datasource.DataSourceResponse; +import org.elasticsearch.datageneration.datasource.MultifieldAddonHandler; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentType; @@ -176,8 +177,7 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques "_field", request.fieldType(), request.eligibleCopyToFields(), - request.dynamicMapping(), - request.includePluginTypesInMultiFields() + request.dynamicMapping() ) ).mappingGenerator().get(); @@ -211,7 +211,6 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques public static DataGeneratorSpecification buildSpecification(Collection customHandlers) { return DataGeneratorSpecification.builder() - .withIncludePluginTypes(false) .withFullyDynamicMapping(false) // Disable dynamic mapping and disabled objects .withDataSourceHandlers(List.of(new DataSourceHandler() { diff --git a/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/DataGenerationHelper.java b/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/DataGenerationHelper.java index 86c435ba2a4e8..d5952f45a687d 100644 --- a/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/DataGenerationHelper.java +++ b/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/DataGenerationHelper.java @@ -18,6 +18,7 @@ import org.elasticsearch.datageneration.datasource.DataSourceHandler; import org.elasticsearch.datageneration.datasource.DataSourceRequest; import org.elasticsearch.datageneration.datasource.DataSourceResponse; +import org.elasticsearch.datageneration.datasource.MultifieldAddonHandler; import org.elasticsearch.datageneration.fields.PredefinedField; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.test.ESTestCase; @@ -105,7 +106,8 @@ public DataSourceResponse.FieldTypeGenerator.FieldTypeInfo get() { } }); } - })); + })) + .withDataSourceHandlers(List.of(MultifieldAddonHandler.ALL_STRING_TYPES)); // Customize builder if necessary builderConfigurator.accept(specificationBuilder); From f136489e2c41cd09c8ed81e7bb7a325f38e712cd Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 13 Aug 2025 21:19:28 +0000 Subject: [PATCH 12/16] [CI] Auto commit changes from spotless --- .../datasource/MultifieldAddonHandler.java | 30 ++++++++++++------- .../index/mapper/BlockLoaderTestCase.java | 1 - 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java index a8aa93e52401b..af28920cba2e4 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java @@ -24,8 +24,15 @@ public class MultifieldAddonHandler implements DataSourceHandler { private final Map> subfieldTypes; private final float chanceOfChildField; - private static final Set STRING_TYPES = Set.of(FieldType.TEXT, FieldType.KEYWORD, FieldType.MATCH_ONLY_TEXT, FieldType.WILDCARD); - public static MultifieldAddonHandler ALL_STRING_TYPES = new MultifieldAddonHandler(STRING_TYPES.stream().collect(Collectors.toMap(t -> t, t -> STRING_TYPES.stream().filter(s -> s != t).toList()))); + private static final Set STRING_TYPES = Set.of( + FieldType.TEXT, + FieldType.KEYWORD, + FieldType.MATCH_ONLY_TEXT, + FieldType.WILDCARD + ); + public static MultifieldAddonHandler ALL_STRING_TYPES = new MultifieldAddonHandler( + STRING_TYPES.stream().collect(Collectors.toMap(t -> t, t -> STRING_TYPES.stream().filter(s -> s != t).toList())) + ); public MultifieldAddonHandler(Map> subfieldTypes, float chanceOfChildField) { this.subfieldTypes = subfieldTypes; @@ -77,14 +84,17 @@ private static Map getChildMappingForType(FieldType type, DataSo } private static Map getMappingForType(FieldType type, DataSourceRequest.LeafMappingParametersGenerator request) { - return request.dataSource().get( - new DataSourceRequest.LeafMappingParametersGenerator( - request.dataSource(), - PLACEHOLDER_NAME, - type.toString(), - request.eligibleCopyToFields(), - request.dynamicMapping() + return request.dataSource() + .get( + new DataSourceRequest.LeafMappingParametersGenerator( + request.dataSource(), + PLACEHOLDER_NAME, + type.toString(), + request.eligibleCopyToFields(), + request.dynamicMapping() + ) ) - ).mappingGenerator().get(); + .mappingGenerator() + .get(); } } diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/BlockLoaderTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/BlockLoaderTestCase.java index 2e628493daceb..3b6c4ec5e0123 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/BlockLoaderTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/BlockLoaderTestCase.java @@ -18,7 +18,6 @@ import org.elasticsearch.datageneration.datasource.DataSourceHandler; import org.elasticsearch.datageneration.datasource.DataSourceRequest; import org.elasticsearch.datageneration.datasource.DataSourceResponse; -import org.elasticsearch.datageneration.datasource.MultifieldAddonHandler; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentType; From 401c9e8d13e902574dc8e7061c7adff24a3945b7 Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Wed, 13 Aug 2025 16:44:41 -0500 Subject: [PATCH 13/16] Small fix --- .../datageneration/datasource/MultifieldAddonHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java index af28920cba2e4..e6f4e12d0a7cc 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java @@ -19,7 +19,7 @@ public class MultifieldAddonHandler implements DataSourceHandler { - private static final String PLACEHOLDER_NAME = "_an_improbably_placeholder_name"; + private static final String PLACEHOLDER_NAME = "_an_improbable_placeholder_name"; private static final float DEFAULT_CHANCE_OF_CHILD_FIELD = 0.5f; private final Map> subfieldTypes; private final float chanceOfChildField; From 7715c830759ce9153ce7fca28b00e5bd08cbc5fc Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Thu, 14 Aug 2025 00:17:16 -0500 Subject: [PATCH 14/16] Change set to list to avoid non-determinism --- .../datageneration/datasource/MultifieldAddonHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java index e6f4e12d0a7cc..648046fd39f3c 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java @@ -24,7 +24,7 @@ public class MultifieldAddonHandler implements DataSourceHandler { private final Map> subfieldTypes; private final float chanceOfChildField; - private static final Set STRING_TYPES = Set.of( + private static final List STRING_TYPES = List.of( FieldType.TEXT, FieldType.KEYWORD, FieldType.MATCH_ONLY_TEXT, From 5ce0810c140c2c92639ae89a742c69da03e7ded4 Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Thu, 14 Aug 2025 09:11:07 -0500 Subject: [PATCH 15/16] improve naming --- .../datageneration/datasource/MultifieldAddonHandler.java | 3 +-- .../elasticsearch/xpack/logsdb/qa/DataGenerationHelper.java | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java index 648046fd39f3c..8dad162406a7d 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java @@ -14,7 +14,6 @@ import java.util.List; import java.util.Map; -import java.util.Set; import java.util.stream.Collectors; public class MultifieldAddonHandler implements DataSourceHandler { @@ -30,7 +29,7 @@ public class MultifieldAddonHandler implements DataSourceHandler { FieldType.MATCH_ONLY_TEXT, FieldType.WILDCARD ); - public static MultifieldAddonHandler ALL_STRING_TYPES = new MultifieldAddonHandler( + public static MultifieldAddonHandler STRING_TYPE_HANDLER = new MultifieldAddonHandler( STRING_TYPES.stream().collect(Collectors.toMap(t -> t, t -> STRING_TYPES.stream().filter(s -> s != t).toList())) ); diff --git a/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/DataGenerationHelper.java b/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/DataGenerationHelper.java index d5952f45a687d..257689c8aa558 100644 --- a/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/DataGenerationHelper.java +++ b/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/DataGenerationHelper.java @@ -107,7 +107,7 @@ public DataSourceResponse.FieldTypeGenerator.FieldTypeInfo get() { }); } })) - .withDataSourceHandlers(List.of(MultifieldAddonHandler.ALL_STRING_TYPES)); + .withDataSourceHandlers(List.of(MultifieldAddonHandler.STRING_TYPE_HANDLER)); // Customize builder if necessary builderConfigurator.accept(specificationBuilder); From 0037c8fe359c5aa56227e233ae1585a20cf01486 Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Thu, 14 Aug 2025 10:44:57 -0500 Subject: [PATCH 16/16] Improvements from review --- .../datasource/DefaultObjectGenerationHandler.java | 11 +++++++++++ .../datasource/MultifieldAddonHandler.java | 10 ++++------ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultObjectGenerationHandler.java b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultObjectGenerationHandler.java index bf660779186ca..0938e59903099 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultObjectGenerationHandler.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultObjectGenerationHandler.java @@ -20,6 +20,14 @@ import static org.elasticsearch.test.ESTestCase.randomRealisticUnicodeOfCodepointLengthBetween; public class DefaultObjectGenerationHandler implements DataSourceHandler { + + /** + * Field names will not be generated which start with `_reserved_`. Handlers can safely + * create field names starting with this prefix without the concern of randomly generated + * fields having the same name. + */ + public static final String RESERVED_FIELD_NAME_PREFIX = "_reserved_"; + @Override public DataSourceResponse.ChildFieldGenerator handle(DataSourceRequest.ChildFieldGenerator request) { return new DataSourceResponse.ChildFieldGenerator() { @@ -57,6 +65,9 @@ public String generateFieldName() { if (fieldName.indexOf('.') != -1) { continue; } + if (fieldName.startsWith(RESERVED_FIELD_NAME_PREFIX)) { + continue; + } return fieldName; } diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java index 8dad162406a7d..886629beaf9d2 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java @@ -18,7 +18,7 @@ public class MultifieldAddonHandler implements DataSourceHandler { - private static final String PLACEHOLDER_NAME = "_an_improbable_placeholder_name"; + private static final String PLACEHOLDER = DefaultObjectGenerationHandler.RESERVED_FIELD_NAME_PREFIX + "multifield"; private static final float DEFAULT_CHANCE_OF_CHILD_FIELD = 0.5f; private final Map> subfieldTypes; private final float chanceOfChildField; @@ -47,7 +47,7 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques // Need to delegate creation of the same type of field to other handlers. So skip request // if it's for the placeholder name used when creating the child and parent fields. - if (request.fieldName().equals(PLACEHOLDER_NAME)) { + if (request.fieldName().equals(PLACEHOLDER)) { return null; } @@ -76,9 +76,7 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques private static Map getChildMappingForType(FieldType type, DataSourceRequest.LeafMappingParametersGenerator request) { Map mapping = getMappingForType(type, request); - if (type == FieldType.KEYWORD) { - mapping.remove("copy_to"); - } + mapping.remove("copy_to"); return mapping; } @@ -87,7 +85,7 @@ private static Map getMappingForType(FieldType type, DataSourceR .get( new DataSourceRequest.LeafMappingParametersGenerator( request.dataSource(), - PLACEHOLDER_NAME, + PLACEHOLDER, type.toString(), request.eligibleCopyToFields(), request.dynamicMapping()