Skip to content

Commit d52be5f

Browse files
authored
Add ignore_malformed and null_values to test data generation (#121983)
1 parent 743e5d4 commit d52be5f

24 files changed

+528
-248
lines changed

test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DocumentGenerator.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,10 @@ public DocumentGenerator(DataGeneratorSpecification specification) {
4040
public Map<String, Object> generate(Template template, Mapping mapping) {
4141
var documentMap = new TreeMap<String, Object>();
4242
for (var predefinedField : specification.predefinedFields()) {
43-
documentMap.put(predefinedField.name(), predefinedField.generator(specification.dataSource()).generateValue());
43+
documentMap.put(
44+
predefinedField.name(),
45+
predefinedField.generator(specification.dataSource()).generateValue(predefinedField.mapping())
46+
);
4447
}
4548

4649
generateFields(documentMap, template.template(), new Context("", mapping.lookup()));
@@ -53,16 +56,18 @@ private void generateFields(Map<String, Object> document, Map<String, Template.E
5356
Template.Entry templateEntry = entry.getValue();
5457

5558
if (templateEntry instanceof Template.Leaf leaf) {
59+
var fieldMapping = context.mappingLookup().get(context.pathTo(fieldName));
60+
5661
// Unsigned long does not play well when dynamically mapped because
5762
// it gets mapped as just long and large values fail to index.
5863
// Just skip it.
59-
if (leaf.type() == FieldType.UNSIGNED_LONG && context.mappingLookup().get(context.pathTo(fieldName)) == null) {
64+
if (leaf.type() == FieldType.UNSIGNED_LONG && fieldMapping == null) {
6065
continue;
6166
}
6267

6368
var generator = leaf.type().generator(fieldName, specification.dataSource());
6469

65-
document.put(fieldName, generator.generateValue());
70+
document.put(fieldName, generator.generateValue(fieldMapping));
6671
} else if (templateEntry instanceof Template.Object object) {
6772
Optional<Integer> arrayLength = objectArrayGenerator.lengthGenerator().get();
6873

test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/FieldDataGenerator.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,13 @@
99

1010
package org.elasticsearch.logsdb.datageneration;
1111

12+
import java.util.Map;
13+
1214
/**
1315
* Entity responsible for generating a valid value for a field.
1416
*
1517
* Generator is expected to produce a different value on every call.
1618
*/
1719
public interface FieldDataGenerator {
18-
Object generateValue();
20+
Object generateValue(Map<String, Object> fieldMapping);
1921
}

test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceHandler.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,10 @@ default DataSourceResponse.RepeatingWrapper handle(DataSourceRequest.RepeatingWr
5858
return null;
5959
}
6060

61+
default DataSourceResponse.MalformedWrapper handle(DataSourceRequest.MalformedWrapper request) {
62+
return null;
63+
}
64+
6165
default DataSourceResponse.ChildFieldGenerator handle(DataSourceRequest.ChildFieldGenerator request) {
6266
return null;
6367
}

test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceRequest.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.elasticsearch.logsdb.datageneration.fields.DynamicMapping;
1616

1717
import java.util.Set;
18+
import java.util.function.Supplier;
1819

1920
public interface DataSourceRequest<TResponse extends DataSourceResponse> {
2021
TResponse accept(DataSourceHandler handler);
@@ -91,6 +92,12 @@ public DataSourceResponse.RepeatingWrapper accept(DataSourceHandler handler) {
9192
}
9293
}
9394

95+
record MalformedWrapper(Supplier<Object> malformedValues) implements DataSourceRequest<DataSourceResponse.MalformedWrapper> {
96+
public DataSourceResponse.MalformedWrapper accept(DataSourceHandler handler) {
97+
return handler.handle(this);
98+
}
99+
}
100+
94101
record ChildFieldGenerator(DataGeneratorSpecification specification)
95102
implements
96103
DataSourceRequest<DataSourceResponse.ChildFieldGenerator> {

test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceResponse.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ record ArrayWrapper(Function<Supplier<Object>, Supplier<Object>> wrapper) implem
4141

4242
record RepeatingWrapper(Function<Supplier<Object>, Supplier<Object>> wrapper) implements DataSourceResponse {}
4343

44+
record MalformedWrapper(Function<Supplier<Object>, Supplier<Object>> wrapper) implements DataSourceResponse {}
45+
4446
interface ChildFieldGenerator extends DataSourceResponse {
4547
int generateChildFieldCount();
4648

test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultMappingParametersHandler.java

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import org.elasticsearch.index.mapper.Mapper;
1313
import org.elasticsearch.index.mapper.ObjectMapper;
14+
import org.elasticsearch.logsdb.datageneration.FieldType;
1415
import org.elasticsearch.test.ESTestCase;
1516

1617
import java.util.HashMap;
@@ -31,7 +32,7 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques
3132

3233
return new DataSourceResponse.LeafMappingParametersGenerator(switch (request.fieldType()) {
3334
case KEYWORD -> keywordMapping(request, map);
34-
case LONG, INTEGER, SHORT, BYTE, DOUBLE, FLOAT, HALF_FLOAT, UNSIGNED_LONG -> plain(map);
35+
case LONG, INTEGER, SHORT, BYTE, DOUBLE, FLOAT, HALF_FLOAT, UNSIGNED_LONG -> numberMapping(map, request.fieldType());
3536
case SCALED_FLOAT -> scaledFloatMapping(map);
3637
case COUNTED_KEYWORD -> plain(Map.of("index", ESTestCase.randomBoolean()));
3738
});
@@ -41,6 +42,30 @@ private Supplier<Map<String, Object>> plain(Map<String, Object> injected) {
4142
return () -> injected;
4243
}
4344

45+
private Supplier<Map<String, Object>> numberMapping(Map<String, Object> injected, FieldType fieldType) {
46+
return () -> {
47+
if (ESTestCase.randomBoolean()) {
48+
injected.put("ignore_malformed", ESTestCase.randomBoolean());
49+
}
50+
if (ESTestCase.randomDouble() <= 0.2) {
51+
Number value = switch (fieldType) {
52+
case LONG -> ESTestCase.randomLong();
53+
case UNSIGNED_LONG -> ESTestCase.randomNonNegativeLong();
54+
case INTEGER -> ESTestCase.randomInt();
55+
case SHORT -> ESTestCase.randomShort();
56+
case BYTE -> ESTestCase.randomByte();
57+
case DOUBLE -> ESTestCase.randomDouble();
58+
case FLOAT, HALF_FLOAT -> ESTestCase.randomFloat();
59+
default -> throw new IllegalStateException("Unexpected field type");
60+
};
61+
62+
injected.put("null_value", value);
63+
}
64+
65+
return injected;
66+
};
67+
}
68+
4469
private Supplier<Map<String, Object>> keywordMapping(
4570
DataSourceRequest.LeafMappingParametersGenerator request,
4671
Map<String, Object> injected
@@ -75,6 +100,15 @@ private Supplier<Map<String, Object>> keywordMapping(
75100
private Supplier<Map<String, Object>> scaledFloatMapping(Map<String, Object> injected) {
76101
return () -> {
77102
injected.put("scaling_factor", ESTestCase.randomFrom(10, 1000, 100000, 100.5));
103+
104+
if (ESTestCase.randomDouble() <= 0.2) {
105+
injected.put("null_value", ESTestCase.randomFloat());
106+
}
107+
108+
if (ESTestCase.randomBoolean()) {
109+
injected.put("ignore_malformed", ESTestCase.randomBoolean());
110+
}
111+
78112
return injected;
79113
};
80114
}

test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultWrappersHandler.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ public DataSourceResponse.RepeatingWrapper handle(DataSourceRequest.RepeatingWra
3232
return new DataSourceResponse.RepeatingWrapper(repeatValues());
3333
}
3434

35+
@Override
36+
public DataSourceResponse.MalformedWrapper handle(DataSourceRequest.MalformedWrapper request) {
37+
return new DataSourceResponse.MalformedWrapper(injectMalformed(request.malformedValues()));
38+
}
39+
3540
private static Function<Supplier<Object>, Supplier<Object>> injectNulls() {
3641
// Inject some nulls but majority of data should be non-null (as it likely is in reality).
3742
return (values) -> () -> ESTestCase.randomDouble() <= 0.05 ? null : values.get();
@@ -62,4 +67,8 @@ private static Function<Supplier<Object>, Supplier<Object>> repeatValues() {
6267
};
6368
};
6469
}
70+
71+
private static Function<Supplier<Object>, Supplier<Object>> injectMalformed(Supplier<Object> malformedValues) {
72+
return (values) -> () -> ESTestCase.randomDouble() <= 0.1 ? malformedValues.get() : values.get();
73+
}
6574
}

test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/ByteFieldDataGenerator.java

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,21 +13,28 @@
1313
import org.elasticsearch.logsdb.datageneration.datasource.DataSource;
1414
import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
1515

16+
import java.util.Map;
1617
import java.util.function.Supplier;
1718

1819
public class ByteFieldDataGenerator implements FieldDataGenerator {
1920
private final Supplier<Object> valueGenerator;
21+
private final Supplier<Object> valueGeneratorWithMalformed;
2022

2123
public ByteFieldDataGenerator(String fieldName, DataSource dataSource) {
22-
var bytes = dataSource.get(new DataSourceRequest.ByteGenerator());
23-
var nulls = dataSource.get(new DataSourceRequest.NullWrapper());
24-
var arrays = dataSource.get(new DataSourceRequest.ArrayWrapper());
24+
var bytes = dataSource.get(new DataSourceRequest.ByteGenerator()).generator();
2525

26-
this.valueGenerator = arrays.wrapper().compose(nulls.wrapper()).apply(() -> bytes.generator().get());
26+
this.valueGenerator = Wrappers.defaults(bytes::get, dataSource);
27+
28+
var strings = dataSource.get(new DataSourceRequest.StringGenerator()).generator();
29+
this.valueGeneratorWithMalformed = Wrappers.defaultsWithMalformed(bytes::get, strings::get, dataSource);
2730
}
2831

2932
@Override
30-
public Object generateValue() {
33+
public Object generateValue(Map<String, Object> fieldMapping) {
34+
if (fieldMapping != null && (Boolean) fieldMapping.getOrDefault("ignore_malformed", false)) {
35+
return valueGeneratorWithMalformed.get();
36+
}
37+
3138
return valueGenerator.get();
3239
}
3340
}

test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/CountedKeywordFieldDataGenerator.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
1515

1616
import java.util.HashSet;
17+
import java.util.Map;
1718
import java.util.Set;
1819
import java.util.function.Supplier;
1920

@@ -31,7 +32,7 @@ public CountedKeywordFieldDataGenerator(String fieldName, DataSource dataSource)
3132
}
3233

3334
@Override
34-
public Object generateValue() {
35+
public Object generateValue(Map<String, Object> fieldMapping) {
3536
return valueGenerator.get();
3637
}
3738
}

test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/DoubleFieldDataGenerator.java

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,21 +13,28 @@
1313
import org.elasticsearch.logsdb.datageneration.datasource.DataSource;
1414
import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
1515

16+
import java.util.Map;
1617
import java.util.function.Supplier;
1718

1819
public class DoubleFieldDataGenerator implements FieldDataGenerator {
1920
private final Supplier<Object> valueGenerator;
21+
private final Supplier<Object> valueGeneratorWithMalformed;
2022

2123
public DoubleFieldDataGenerator(String fieldName, DataSource dataSource) {
22-
var doubles = dataSource.get(new DataSourceRequest.DoubleGenerator());
23-
var nulls = dataSource.get(new DataSourceRequest.NullWrapper());
24-
var arrays = dataSource.get(new DataSourceRequest.ArrayWrapper());
24+
var doubles = dataSource.get(new DataSourceRequest.DoubleGenerator()).generator();
2525

26-
this.valueGenerator = arrays.wrapper().compose(nulls.wrapper()).apply(() -> doubles.generator().get());
26+
this.valueGenerator = Wrappers.defaults(doubles::get, dataSource);
27+
28+
var strings = dataSource.get(new DataSourceRequest.StringGenerator()).generator();
29+
this.valueGeneratorWithMalformed = Wrappers.defaultsWithMalformed(doubles::get, strings::get, dataSource);
2730
}
2831

2932
@Override
30-
public Object generateValue() {
33+
public Object generateValue(Map<String, Object> fieldMapping) {
34+
if (fieldMapping != null && (Boolean) fieldMapping.getOrDefault("ignore_malformed", false)) {
35+
return valueGeneratorWithMalformed.get();
36+
}
37+
3138
return valueGenerator.get();
3239
}
3340
}

0 commit comments

Comments
 (0)