Skip to content

Commit c401a71

Browse files
authored
Make mapping a distinct concept in logsdb data generation (#114370)
1 parent 16864e9 commit c401a71

35 files changed

+728
-1157
lines changed

modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/DataGenerationHelper.java

Lines changed: 43 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -10,23 +10,30 @@
1010
package org.elasticsearch.datastreams.logsdb.qa;
1111

1212
import org.elasticsearch.common.settings.Settings;
13-
import org.elasticsearch.core.CheckedConsumer;
1413
import org.elasticsearch.index.mapper.Mapper;
15-
import org.elasticsearch.logsdb.datageneration.DataGenerator;
1614
import org.elasticsearch.logsdb.datageneration.DataGeneratorSpecification;
17-
import org.elasticsearch.logsdb.datageneration.FieldDataGenerator;
15+
import org.elasticsearch.logsdb.datageneration.DocumentGenerator;
16+
import org.elasticsearch.logsdb.datageneration.FieldType;
17+
import org.elasticsearch.logsdb.datageneration.Mapping;
18+
import org.elasticsearch.logsdb.datageneration.MappingGenerator;
19+
import org.elasticsearch.logsdb.datageneration.Template;
20+
import org.elasticsearch.logsdb.datageneration.TemplateGenerator;
1821
import org.elasticsearch.logsdb.datageneration.fields.PredefinedField;
1922
import org.elasticsearch.test.ESTestCase;
2023
import org.elasticsearch.xcontent.XContentBuilder;
2124

2225
import java.io.IOException;
2326
import java.util.List;
27+
import java.util.Map;
2428
import java.util.function.Consumer;
2529

2630
public class DataGenerationHelper {
2731
private final boolean keepArraySource;
2832

29-
private final DataGenerator dataGenerator;
33+
private final DocumentGenerator documentGenerator;
34+
35+
private final Template template;
36+
private final Mapping mapping;
3037

3138
public DataGenerationHelper() {
3239
this(b -> {});
@@ -40,67 +47,59 @@ public DataGenerationHelper(Consumer<DataGeneratorSpecification.Builder> builder
4047
.withPredefinedFields(
4148
List.of(
4249
// Customized because it always needs doc_values for aggregations.
43-
new PredefinedField.WithGenerator("host.name", new FieldDataGenerator() {
44-
@Override
45-
public CheckedConsumer<XContentBuilder, IOException> mappingWriter() {
46-
return b -> b.startObject().field("type", "keyword").endObject();
47-
}
48-
49-
@Override
50-
public CheckedConsumer<XContentBuilder, IOException> fieldValueGenerator() {
51-
return b -> b.value(ESTestCase.randomAlphaOfLength(5));
52-
}
53-
}),
50+
new PredefinedField.WithGenerator(
51+
"host.name",
52+
FieldType.KEYWORD,
53+
Map.of("type", "keyword"),
54+
() -> ESTestCase.randomAlphaOfLength(5)
55+
),
5456
// Needed for terms query
55-
new PredefinedField.WithGenerator("method", new FieldDataGenerator() {
56-
@Override
57-
public CheckedConsumer<XContentBuilder, IOException> mappingWriter() {
58-
return b -> b.startObject().field("type", "keyword").endObject();
59-
}
60-
61-
@Override
62-
public CheckedConsumer<XContentBuilder, IOException> fieldValueGenerator() {
63-
return b -> b.value(ESTestCase.randomFrom("put", "post", "get"));
64-
}
65-
}),
57+
new PredefinedField.WithGenerator(
58+
"method",
59+
FieldType.KEYWORD,
60+
Map.of("type", "keyword"),
61+
() -> ESTestCase.randomFrom("put", "post", "get")
62+
),
6663

6764
// Needed for histogram aggregation
68-
new PredefinedField.WithGenerator("memory_usage_bytes", new FieldDataGenerator() {
69-
@Override
70-
public CheckedConsumer<XContentBuilder, IOException> mappingWriter() {
71-
return b -> b.startObject().field("type", "long").endObject();
72-
}
73-
74-
@Override
75-
public CheckedConsumer<XContentBuilder, IOException> fieldValueGenerator() {
76-
// We can generate this using standard long field but we would get "too many buckets"
77-
return b -> b.value(ESTestCase.randomLongBetween(1000, 2000));
78-
}
79-
})
65+
new PredefinedField.WithGenerator(
66+
"memory_usage_bytes",
67+
FieldType.LONG,
68+
Map.of("type", "long"),
69+
() -> ESTestCase.randomLongBetween(1000, 2000)
70+
)
8071
)
8172
);
8273

8374
// Customize builder if necessary
8475
builderConfigurator.accept(specificationBuilder);
8576

86-
this.dataGenerator = new DataGenerator(specificationBuilder.build());
87-
}
77+
var specification = specificationBuilder.build();
78+
79+
this.documentGenerator = new DocumentGenerator(specification);
8880

89-
DataGenerator getDataGenerator() {
90-
return dataGenerator;
81+
this.template = new TemplateGenerator(specification).generate();
82+
this.mapping = new MappingGenerator(specification).generate(template);
9183
}
9284

9385
void logsDbMapping(XContentBuilder builder) throws IOException {
94-
dataGenerator.writeMapping(builder);
86+
builder.map(mapping.raw());
9587
}
9688

9789
void standardMapping(XContentBuilder builder) throws IOException {
98-
dataGenerator.writeMapping(builder);
90+
builder.map(mapping.raw());
9991
}
10092

10193
void logsDbSettings(Settings.Builder builder) {
10294
if (keepArraySource) {
10395
builder.put(Mapper.SYNTHETIC_SOURCE_KEEP_INDEX_SETTING.getKey(), "arrays");
10496
}
10597
}
98+
99+
void generateDocument(XContentBuilder document, Map<String, Object> additionalFields) throws IOException {
100+
var generated = documentGenerator.generate(template, mapping);
101+
generated.putAll(additionalFields);
102+
103+
document.map(generated);
104+
}
106105
}

modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
* This test uses simple mapping and document structure in order to allow easier debugging of the test itself.
5454
*/
5555
public class StandardVersusLogsIndexModeChallengeRestIT extends AbstractChallengeRestTest {
56-
private final int numShards = randomBoolean() ? randomIntBetween(2, 5) : 0;
56+
private final int numShards = randomBoolean() ? randomIntBetween(2, 4) : 0;
5757
private final int numReplicas = randomBoolean() ? randomIntBetween(1, 3) : 0;
5858
private final boolean fullyDynamicMapping = randomBoolean();
5959

modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeRandomDataChallengeRestIT.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import java.io.IOException;
1919
import java.time.Instant;
20+
import java.util.Map;
2021

2122
/**
2223
* Challenge test (see {@link StandardVersusLogsIndexModeChallengeRestIT}) that uses randomly generated
@@ -53,10 +54,10 @@ public void contenderSettings(Settings.Builder builder) {
5354
@Override
5455
protected XContentBuilder generateDocument(final Instant timestamp) throws IOException {
5556
var document = XContentFactory.jsonBuilder();
56-
dataGenerationHelper.getDataGenerator().generateDocument(document, doc -> {
57-
doc.field("@timestamp", DateFormatter.forPattern(FormatNames.STRICT_DATE_OPTIONAL_TIME.getName()).format(timestamp));
58-
});
59-
57+
dataGenerationHelper.generateDocument(
58+
document,
59+
Map.of("@timestamp", DateFormatter.forPattern(FormatNames.STRICT_DATE_OPTIONAL_TIME.getName()).format(timestamp))
60+
);
6061
return document;
6162
}
6263
}

test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGenerator.java

Lines changed: 0 additions & 75 deletions
This file was deleted.

test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSpecification.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import java.util.List;
1919

2020
/**
21-
* Allows configuring behavior of {@link DataGenerator}.
21+
* Allows configuring behavior of data generation components.
2222
* @param dataSource source of generated data
2323
* @param maxFieldCountPerLevel maximum number of fields that an individual object in mapping has.
2424
* Applies to subobjects.
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.logsdb.datageneration;
11+
12+
import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
13+
import org.elasticsearch.logsdb.datageneration.datasource.DataSourceResponse;
14+
15+
import java.util.ArrayList;
16+
import java.util.Map;
17+
import java.util.Optional;
18+
import java.util.TreeMap;
19+
20+
/**
21+
* Generator that generates a valid random document that follows the structure of provided {@link Template}.
22+
*/
23+
public class DocumentGenerator {
24+
private final DataGeneratorSpecification specification;
25+
26+
private final DataSourceResponse.ObjectArrayGenerator objectArrayGenerator;
27+
28+
public DocumentGenerator(DataGeneratorSpecification specification) {
29+
this.specification = specification;
30+
31+
this.objectArrayGenerator = specification.dataSource().get(new DataSourceRequest.ObjectArrayGenerator());
32+
}
33+
34+
/**
35+
* Generates a valid random document following the provided template.
36+
* @param template template for the document
37+
* @param mapping generated mapping that will be applied to the destination index of this document
38+
* @return document as a map where subobjects are represented as nested maps
39+
*/
40+
public Map<String, Object> generate(Template template, Mapping mapping) {
41+
var documentMap = new TreeMap<String, Object>();
42+
for (var predefinedField : specification.predefinedFields()) {
43+
documentMap.put(predefinedField.name(), predefinedField.generator(specification.dataSource()).generateValue());
44+
}
45+
46+
generateFields(documentMap, template.template(), new Context("", mapping.lookup()));
47+
return documentMap;
48+
}
49+
50+
private void generateFields(Map<String, Object> document, Map<String, Template.Entry> template, Context context) {
51+
for (var entry : template.entrySet()) {
52+
String fieldName = entry.getKey();
53+
Template.Entry templateEntry = entry.getValue();
54+
55+
if (templateEntry instanceof Template.Leaf leaf) {
56+
// Unsigned long does not play well when dynamically mapped because
57+
// it gets mapped as just long and large values fail to index.
58+
// Just skip it.
59+
if (leaf.type() == FieldType.UNSIGNED_LONG && context.mappingLookup().get(context.pathTo(fieldName)) == null) {
60+
continue;
61+
}
62+
63+
var generator = leaf.type().generator(fieldName, specification.dataSource());
64+
65+
document.put(fieldName, generator.generateValue());
66+
} else if (templateEntry instanceof Template.Object object) {
67+
Optional<Integer> arrayLength = objectArrayGenerator.lengthGenerator().get();
68+
69+
if (arrayLength.isPresent()) {
70+
var children = new ArrayList<>(arrayLength.get());
71+
document.put(object.name(), children);
72+
73+
for (int i = 0; i < arrayLength.get(); i++) {
74+
children.add(generateObject(object, context));
75+
}
76+
} else {
77+
document.put(object.name(), generateObject(object, context));
78+
}
79+
}
80+
}
81+
}
82+
83+
private Map<String, Object> generateObject(Template.Object object, Context context) {
84+
var children = new TreeMap<String, Object>();
85+
generateFields(children, object.children(), context.stepIntoObject(object.name()));
86+
return children;
87+
}
88+
89+
record Context(String path, Map<String, Map<String, Object>> mappingLookup) {
90+
Context stepIntoObject(String name) {
91+
return new Context(pathTo(name), mappingLookup);
92+
}
93+
94+
String pathTo(String leafFieldName) {
95+
return path.isEmpty() ? leafFieldName : path + "." + leafFieldName;
96+
}
97+
}
98+
}

test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/FieldDataGenerator.java

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,11 @@
99

1010
package org.elasticsearch.logsdb.datageneration;
1111

12-
import org.elasticsearch.core.CheckedConsumer;
13-
import org.elasticsearch.xcontent.XContentBuilder;
14-
15-
import java.io.IOException;
16-
1712
/**
18-
* Entity responsible for generating a valid randomized mapping for a field
19-
* and a generator of field values valid for this mapping.
13+
* Entity responsible for generating a valid value for a field.
2014
*
21-
* Generator is expected to produce the same mapping per instance of generator.
22-
* Function returned by {@link FieldDataGenerator#fieldValueGenerator() } is expected
23-
* to produce a randomized value each time.
15+
* Generator is expected to produce a different value on every call.
2416
*/
2517
public interface FieldDataGenerator {
26-
CheckedConsumer<XContentBuilder, IOException> mappingWriter();
27-
28-
CheckedConsumer<XContentBuilder, IOException> fieldValueGenerator();
18+
Object generateValue();
2919
}

0 commit comments

Comments
 (0)