Skip to content

Commit 061920b

Browse files
Counted keyword randomized testing (#121462)
This patch adds the needed data generator and source matcher to include counted_keyword fields in our randomized testing. This patch also updates the source matcher such that field-specific matchers are checked before the generic matcher is used. It seems that this is the correct behavior, and the only reason the generic matcher was checked first was as a workaround for issue #111916, which is now closed.
1 parent 21218c3 commit 061920b

File tree

12 files changed

+182
-14
lines changed

12 files changed

+182
-14
lines changed

test/framework/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ dependencies {
3333
api "org.elasticsearch:mocksocket:${versions.mocksocket}"
3434

3535
testImplementation project(':x-pack:plugin:mapper-unsigned-long')
36+
testImplementation project(':x-pack:plugin:mapper-counted-keyword')
3637
testImplementation project(":modules:mapper-extras")
3738
}
3839

test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/FieldType.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import org.elasticsearch.logsdb.datageneration.datasource.DataSource;
1313
import org.elasticsearch.logsdb.datageneration.fields.leaf.ByteFieldDataGenerator;
14+
import org.elasticsearch.logsdb.datageneration.fields.leaf.CountedKeywordFieldDataGenerator;
1415
import org.elasticsearch.logsdb.datageneration.fields.leaf.DoubleFieldDataGenerator;
1516
import org.elasticsearch.logsdb.datageneration.fields.leaf.FloatFieldDataGenerator;
1617
import org.elasticsearch.logsdb.datageneration.fields.leaf.HalfFloatFieldDataGenerator;
@@ -34,7 +35,8 @@ public enum FieldType {
3435
DOUBLE("double"),
3536
FLOAT("float"),
3637
HALF_FLOAT("half_float"),
37-
SCALED_FLOAT("scaled_float");
38+
SCALED_FLOAT("scaled_float"),
39+
COUNTED_KEYWORD("counted_keyword");
3840

3941
private final String name;
4042

@@ -54,6 +56,7 @@ public FieldDataGenerator generator(String fieldName, DataSource dataSource) {
5456
case FLOAT -> new FloatFieldDataGenerator(fieldName, dataSource);
5557
case HALF_FLOAT -> new HalfFloatFieldDataGenerator(fieldName, dataSource);
5658
case SCALED_FLOAT -> new ScaledFloatFieldDataGenerator(fieldName, dataSource);
59+
case COUNTED_KEYWORD -> new CountedKeywordFieldDataGenerator(fieldName, dataSource);
5760
};
5861
}
5962

test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceHandler.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ default DataSourceResponse.ArrayWrapper handle(DataSourceRequest.ArrayWrapper re
5454
return null;
5555
}
5656

57+
default DataSourceResponse.RepeatingWrapper handle(DataSourceRequest.RepeatingWrapper request) {
58+
return null;
59+
}
60+
5761
default DataSourceResponse.ChildFieldGenerator handle(DataSourceRequest.ChildFieldGenerator request) {
5862
return null;
5963
}

test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceRequest.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,12 @@ public DataSourceResponse.ArrayWrapper accept(DataSourceHandler handler) {
8585
}
8686
}
8787

88+
record RepeatingWrapper() implements DataSourceRequest<DataSourceResponse.RepeatingWrapper> {
89+
public DataSourceResponse.RepeatingWrapper accept(DataSourceHandler handler) {
90+
return handler.handle(this);
91+
}
92+
}
93+
8894
record ChildFieldGenerator(DataGeneratorSpecification specification)
8995
implements
9096
DataSourceRequest<DataSourceResponse.ChildFieldGenerator> {

test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceResponse.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ record NullWrapper(Function<Supplier<Object>, Supplier<Object>> wrapper) impleme
3939

4040
record ArrayWrapper(Function<Supplier<Object>, Supplier<Object>> wrapper) implements DataSourceResponse {}
4141

42+
record RepeatingWrapper(Function<Supplier<Object>, Supplier<Object>> wrapper) implements DataSourceResponse {}
43+
4244
interface ChildFieldGenerator extends DataSourceResponse {
4345
int generateChildFieldCount();
4446

test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultMappingParametersHandler.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques
3333
case KEYWORD -> keywordMapping(request, map);
3434
case LONG, INTEGER, SHORT, BYTE, DOUBLE, FLOAT, HALF_FLOAT, UNSIGNED_LONG -> plain(map);
3535
case SCALED_FLOAT -> scaledFloatMapping(map);
36+
case COUNTED_KEYWORD -> plain(Map.of("index", ESTestCase.randomBoolean()));
3637
});
3738
}
3839

test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultWrappersHandler.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import org.elasticsearch.test.ESTestCase;
1313

14+
import java.util.HashSet;
1415
import java.util.function.Function;
1516
import java.util.function.Supplier;
1617
import java.util.stream.IntStream;
@@ -26,6 +27,11 @@ public DataSourceResponse.ArrayWrapper handle(DataSourceRequest.ArrayWrapper ign
2627
return new DataSourceResponse.ArrayWrapper(wrapInArray());
2728
}
2829

30+
@Override
31+
public DataSourceResponse.RepeatingWrapper handle(DataSourceRequest.RepeatingWrapper ignored) {
32+
return new DataSourceResponse.RepeatingWrapper(repeatValues());
33+
}
34+
2935
private static Function<Supplier<Object>, Supplier<Object>> injectNulls() {
3036
// Inject some nulls but majority of data should be non-null (as it likely is in reality).
3137
return (values) -> () -> ESTestCase.randomDouble() <= 0.05 ? null : values.get();
@@ -41,4 +47,19 @@ private static Function<Supplier<Object>, Supplier<Object>> wrapInArray() {
4147
return values.get();
4248
};
4349
}
50+
51+
private static Function<Supplier<Object>, Supplier<Object>> repeatValues() {
52+
return (values) -> {
53+
HashSet<Object> previousValues = new HashSet<>();
54+
return () -> {
55+
if (previousValues.size() > 0 && ESTestCase.randomBoolean()) {
56+
return ESTestCase.randomFrom(previousValues);
57+
} else {
58+
var value = values.get();
59+
previousValues.add(value);
60+
return value;
61+
}
62+
};
63+
};
64+
}
4465
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.logsdb.datageneration.fields.leaf;
11+
12+
import org.elasticsearch.logsdb.datageneration.FieldDataGenerator;
13+
import org.elasticsearch.logsdb.datageneration.datasource.DataSource;
14+
import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
15+
16+
import java.util.HashSet;
17+
import java.util.Set;
18+
import java.util.function.Supplier;
19+
20+
public class CountedKeywordFieldDataGenerator implements FieldDataGenerator {
21+
private final Supplier<Object> valueGenerator;
22+
private final Set<String> previousStrings = new HashSet<>();
23+
24+
public CountedKeywordFieldDataGenerator(String fieldName, DataSource dataSource) {
25+
var strings = dataSource.get(new DataSourceRequest.StringGenerator());
26+
var nulls = dataSource.get(new DataSourceRequest.NullWrapper());
27+
var arrays = dataSource.get(new DataSourceRequest.ArrayWrapper());
28+
var repeats = dataSource.get(new DataSourceRequest.RepeatingWrapper());
29+
30+
this.valueGenerator = arrays.wrapper().compose(nulls.wrapper().compose(repeats.wrapper())).apply(() -> strings.generator().get());
31+
}
32+
33+
@Override
34+
public Object generateValue() {
35+
return valueGenerator.get();
36+
}
37+
}

test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/matchers/source/FieldSpecificMatcher.java

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.elasticsearch.xcontent.XContentBuilder;
1616

1717
import java.math.BigInteger;
18+
import java.util.HashMap;
1819
import java.util.List;
1920
import java.util.Map;
2021
import java.util.Objects;
@@ -223,4 +224,68 @@ private static BigInteger toBigInteger(Object value) {
223224
return (BigInteger) value;
224225
}
225226
}
227+
228+
class CountedKeywordMatcher implements FieldSpecificMatcher {
229+
private final XContentBuilder actualMappings;
230+
private final Settings.Builder actualSettings;
231+
private final XContentBuilder expectedMappings;
232+
private final Settings.Builder expectedSettings;
233+
234+
CountedKeywordMatcher(
235+
XContentBuilder actualMappings,
236+
Settings.Builder actualSettings,
237+
XContentBuilder expectedMappings,
238+
Settings.Builder expectedSettings
239+
) {
240+
this.actualMappings = actualMappings;
241+
this.actualSettings = actualSettings;
242+
this.expectedMappings = expectedMappings;
243+
this.expectedSettings = expectedSettings;
244+
}
245+
246+
private static List<String> normalize(List<Object> values) {
247+
return values.stream().filter(Objects::nonNull).map(it -> (String) it).toList();
248+
}
249+
250+
private static boolean matchCountsEqualExact(List<String> actualNormalized, List<String> expectedNormalized) {
251+
HashMap<String, Integer> counts = new HashMap<>();
252+
for (String value : actualNormalized) {
253+
counts.put(value, counts.getOrDefault(value, 0) + 1);
254+
}
255+
for (String value : expectedNormalized) {
256+
int newCount = counts.getOrDefault(value, 0) - 1;
257+
if (newCount == 0) {
258+
counts.remove(value);
259+
} else {
260+
counts.put(value, newCount);
261+
}
262+
}
263+
264+
return counts.isEmpty();
265+
}
266+
267+
@Override
268+
public MatchResult match(
269+
List<Object> actual,
270+
List<Object> expected,
271+
Map<String, Object> actualMapping,
272+
Map<String, Object> expectedMapping
273+
) {
274+
var actualNormalized = normalize(actual);
275+
var expectedNormalized = normalize(expected);
276+
277+
return matchCountsEqualExact(actualNormalized, expectedNormalized)
278+
? MatchResult.match()
279+
: MatchResult.noMatch(
280+
formatErrorMessage(
281+
actualMappings,
282+
actualSettings,
283+
expectedMappings,
284+
expectedSettings,
285+
"Values of type [counted_keyword] don't match after normalization, normalized"
286+
+ prettyPrintCollections(actualNormalized, expectedNormalized)
287+
)
288+
);
289+
}
290+
}
226291
}

test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/matchers/source/SourceMatcher.java

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,9 @@ public SourceMatcher(
5757
"scaled_float",
5858
new FieldSpecificMatcher.ScaledFloatMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings),
5959
"unsigned_long",
60-
new FieldSpecificMatcher.UnsignedLongMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings)
60+
new FieldSpecificMatcher.UnsignedLongMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings),
61+
"counted_keyword",
62+
new FieldSpecificMatcher.CountedKeywordMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings)
6163
);
6264
this.dynamicFieldMatcher = new DynamicFieldMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings);
6365
}
@@ -100,17 +102,8 @@ private MatchResult compareSource(Map<String, List<Object>> actual, Map<String,
100102
var actualValues = actual.get(name);
101103
var expectedValues = expectedFieldEntry.getValue();
102104

103-
// There are cases when field values are stored in ignored source
104-
// so we try to match them as is first and then apply field specific matcher.
105-
// This is temporary, we should be able to tell when source is exact using mappings.
106-
// See #111916.
107-
var genericMatchResult = matchWithGenericMatcher(actualValues, expectedValues);
108-
if (genericMatchResult.isMatch()) {
109-
continue;
110-
}
111-
112-
var matchIncludingFieldSpecificMatchers = matchWithFieldSpecificMatcher(name, actualValues, expectedValues).orElse(
113-
genericMatchResult
105+
var matchIncludingFieldSpecificMatchers = matchWithFieldSpecificMatcher(name, actualValues, expectedValues).orElseGet(
106+
() -> matchWithGenericMatcher(actualValues, expectedValues)
114107
);
115108
if (matchIncludingFieldSpecificMatchers.isMatch() == false) {
116109
var message = "Source documents don't match for field [" + name + "]: " + matchIncludingFieldSpecificMatchers.getMessage();

0 commit comments

Comments
 (0)