Skip to content

A random-random test for time-series data #132556

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 23 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
613bda8
First test case in prototype messy test file
pabloem Aug 7, 2025
2d70cef
[CI] Auto commit changes from spotless
Aug 8, 2025
19e2f8a
First two randomized test cases
pabloem Aug 9, 2025
14b4c17
smol cleanup
pabloem Aug 9, 2025
836d49a
Merge branch 'main' into pem-randomrandom-testing
pabloem Aug 9, 2025
64624be
[CI] Auto commit changes from spotless
Aug 9, 2025
77a368b
cleanup and ready for first check
pabloem Aug 11, 2025
f53f904
Merge branch 'main' into pem-randomrandom-testing
pabloem Aug 11, 2025
afdac85
Address comments
pabloem Aug 12, 2025
c384e0c
addressing comments
pabloem Aug 12, 2025
f5bfbf1
more addressing comments
pabloem Aug 12, 2025
820d950
Merge branch 'main' into pem-randomrandom-testing
pabloem Aug 13, 2025
3d9325a
include values check that guarantees avg,count are off
pabloem Aug 15, 2025
0c02d2a
Merge branch 'main' into pem-randomrandom-testing
pabloem Aug 15, 2025
9fc14e1
Merge branch 'main' into pem-randomrandom-testing
pabloem Aug 15, 2025
0e2de20
fixed computation
pabloem Aug 18, 2025
51b4083
fixed computation
pabloem Aug 18, 2025
46c3feb
Merge branch 'main' into pem-randomrandom-testing
pabloem Aug 18, 2025
82a17e9
Merge branch 'main' into pem-randomrandom-testing
pabloem Aug 18, 2025
2349873
Merge branch 'main' into pem-randomrandom-testing
pabloem Aug 18, 2025
079f73a
fixup
pabloem Aug 19, 2025
5249814
Merge branch 'main' into pem-randomrandom-testing
pabloem Aug 19, 2025
b36fcf3
Merge branch 'main' into pem-randomrandom-testing
pabloem Aug 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ public enum FieldType {
TEXT("text"),
IP("ip"),
CONSTANT_KEYWORD("constant_keyword"),
PASSTHROUGH("passthrough"), // For now this field type does not have default generators.
WILDCARD("wildcard");

private final String name;
Expand Down Expand Up @@ -78,6 +79,7 @@ public FieldDataGenerator generator(String fieldName, DataSource dataSource) {
case IP -> new IpFieldDataGenerator(dataSource);
case CONSTANT_KEYWORD -> new ConstantKeywordFieldDataGenerator();
case WILDCARD -> new WildcardFieldDataGenerator(dataSource);
case PASSTHROUGH -> throw new IllegalArgumentException("Passthrough field type does not have a default generator");
};
}

Expand All @@ -101,7 +103,8 @@ public static FieldType tryParse(String name) {
case "ip" -> FieldType.IP;
case "constant_keyword" -> FieldType.CONSTANT_KEYWORD;
case "wildcard" -> FieldType.WILDCARD;
default -> null;
case "passthrough" -> FieldType.PASSTHROUGH;
default -> throw new IllegalArgumentException("Unknown field type: " + name);
};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,8 @@ public Mapping generate(Template template) {

rawMapping.put("_doc", topLevelMappingParameters);

if (specification.fullyDynamicMapping()) {
// Has to be "true" for fully dynamic mapping
if (specification.fullyDynamicMapping() == false) {
topLevelMappingParameters.remove("dynamic");

return new Mapping(rawMapping, lookup);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,12 @@ public <T extends DataSourceResponse> T get(DataSourceRequest<T> request) {
return response;
}
}

throw new IllegalStateException("Request is not supported by data source");
throw new IllegalStateException(
"Request is not supported by data source. Request: "
+ request.toString()
+ "\n"
+ "Available handlers: "
+ handlers.stream().map(Object::getClass).map(Class::getName).toList().toString()
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques
case IP -> ipMapping();
case CONSTANT_KEYWORD -> constantKeywordMapping();
case WILDCARD -> wildcardMapping();
case PASSTHROUGH -> throw new IllegalArgumentException("Unsupported field type: " + fieldType);
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@
import org.elasticsearch.datageneration.FieldType;
import org.elasticsearch.test.ESTestCase;

import java.util.Arrays;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;

import static org.elasticsearch.test.ESTestCase.randomDouble;
import static org.elasticsearch.test.ESTestCase.randomIntBetween;
Expand Down Expand Up @@ -66,13 +68,18 @@ public String generateFieldName() {

// UNSIGNED_LONG is excluded because it is mapped as long
// and values larger than long fail to parse.
private static final Set<FieldType> EXCLUDED_FROM_DYNAMIC_MAPPING = Set.of(FieldType.UNSIGNED_LONG);
private static final Set<FieldType> EXCLUDED_FROM_DYNAMIC_MAPPING = Set.of(FieldType.UNSIGNED_LONG, FieldType.PASSTHROUGH);

@Override
public DataSourceResponse.FieldTypeGenerator handle(DataSourceRequest.FieldTypeGenerator request) {
return new DataSourceResponse.FieldTypeGenerator(
() -> new DataSourceResponse.FieldTypeGenerator.FieldTypeInfo(ESTestCase.randomFrom(FieldType.values()).toString())
);
return new DataSourceResponse.FieldTypeGenerator(() -> {
// All field types minus the excluded ones.
var fieldTypes = Arrays.stream(FieldType.values())
.filter(fieldType -> EXCLUDED_FROM_DYNAMIC_MAPPING.contains(fieldType) == false)
.collect(Collectors.toSet());
var fieldType = ESTestCase.randomFrom(fieldTypes);
return new DataSourceResponse.FieldTypeGenerator.FieldTypeInfo(fieldType.toString());
});
}

@Override
Expand Down
2 changes: 2 additions & 0 deletions x-pack/plugin/esql/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ dependencies {
compileOnly project(':modules:lang-painless:spi')
compileOnly project(xpackModule('esql-core'))
compileOnly project(xpackModule('ml'))
compileOnly project(path: xpackModule('mapper-aggregate-metric'))
compileOnly project(path: xpackModule('downsample'))
implementation project(xpackModule('kql'))
implementation project('compute')
implementation project('compute:ann')
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.esql.action;

import org.elasticsearch.Build;
import org.elasticsearch.action.DocWriteRequest;
import org.elasticsearch.action.admin.indices.template.put.TransportPutComposableIndexTemplateAction;
import org.elasticsearch.cluster.metadata.ComposableIndexTemplate;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.datastreams.DataStreamsPlugin;
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentFactory;
import org.elasticsearch.xcontent.XContentType;
import org.elasticsearch.xpack.aggregatemetric.AggregateMetricMapperPlugin;
import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin;
import org.elasticsearch.xpack.esql.plugin.EsqlPlugin;
import org.junit.Before;

import java.io.IOException;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.stream.Collectors;

import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.hamcrest.Matchers.closeTo;
import static org.hamcrest.Matchers.equalTo;

public class GenerativeTSIT extends AbstractEsqlIntegTestCase {

private static final Long NUM_DOCS = 1000L;
private static final String DATASTREAM_NAME = "tsit_ds";
private List<XContentBuilder> documents = null;
private TSDataGenerationHelper dataGenerationHelper;

Map<List<String>, List<Map<String, Object>>> groupedRows(
List<XContentBuilder> docs,
List<String> groupingAttributes,
int secondsInWindow
) {
Map<List<String>, List<Map<String, Object>>> groupedMap = new HashMap<>();
for (XContentBuilder doc : docs) {
Map<String, Object> docMap = XContentHelper.convertToMap(BytesReference.bytes(doc), false, XContentType.JSON).v2();
@SuppressWarnings("unchecked")
List<String> groupingPairs = groupingAttributes.stream()
.map(
attr -> Tuple.tuple(
attr,
((Map<String, Object>) docMap.getOrDefault("attributes", Map.of())).getOrDefault(attr, "").toString()
)
)
.filter(val -> val.v2().isEmpty() == false) // Filter out empty values
.map(tup -> tup.v1() + ":" + tup.v2())
.toList();
long timeBucketStart = windowStart(docMap.get("@timestamp"), secondsInWindow);
var keyList = new ArrayList<>(groupingPairs);
keyList.add(Long.toString(timeBucketStart));
groupedMap.computeIfAbsent(keyList, k -> new ArrayList<>()).add(docMap);
}
return groupedMap;
}

static Long windowStart(Object timestampCell, int secondsInWindow) {
// The timestamp is in the 4th column (index 3)
return Instant.parse((String) timestampCell).toEpochMilli() / 1000 / secondsInWindow * secondsInWindow;
}

static List<String> getRowKey(List<Object> row, List<String> groupingAttributes) {
List<String> rowKey = new ArrayList<>();
for (int i = 0; i < groupingAttributes.size(); i++) {
Object value = row.get(i + 4); // Skip the first four columns
if (value != null) {
rowKey.add(groupingAttributes.get(i) + ":" + value);
}
}
rowKey.add(Long.toString(Instant.parse((String) row.get(3)).toEpochMilli() / 1000));
return rowKey;
}

@Override
public EsqlQueryResponse run(EsqlQueryRequest request) {
assumeTrue("time series available in snapshot builds only", Build.current().isSnapshot());
return super.run(request);
}

@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return List.of(
DataStreamsPlugin.class,
LocalStateCompositeXPackPlugin.class,
// Downsample.class, // TODO(pabloem): What are these
AggregateMetricMapperPlugin.class,
EsqlPlugin.class
);
}

void putTSDBIndexTemplate(List<String> patterns, @Nullable String mappingString) throws IOException {
Settings.Builder settingsBuilder = Settings.builder();
// Ensure it will be a TSDB data stream
settingsBuilder.put(IndexSettings.MODE.getKey(), IndexMode.TIME_SERIES);
settingsBuilder.putList("index.routing_path", List.of("attributes.*"));
CompressedXContent mappings = mappingString == null ? null : CompressedXContent.fromJSON(mappingString);
// print the mapping
TransportPutComposableIndexTemplateAction.Request request = new TransportPutComposableIndexTemplateAction.Request(
GenerativeTSIT.DATASTREAM_NAME
);
request.indexTemplate(
ComposableIndexTemplate.builder()
.indexPatterns(patterns)
.template(org.elasticsearch.cluster.metadata.Template.builder().settings(settingsBuilder).mappings(mappings))
.metadata(null)
.dataStreamTemplate(new ComposableIndexTemplate.DataStreamTemplate())
.build()
);
assertAcked(client().execute(TransportPutComposableIndexTemplateAction.TYPE, request));
}

@Before
public void populateIndex() throws IOException {
dataGenerationHelper = new TSDataGenerationHelper(NUM_DOCS);
final XContentBuilder builder = XContentFactory.jsonBuilder();
builder.map(dataGenerationHelper.mapping.raw());
final String jsonMappings = Strings.toString(builder);

putTSDBIndexTemplate(List.of(DATASTREAM_NAME + "*"), jsonMappings);
// Now we can push data into the data stream.
for (int i = 0; i < NUM_DOCS; i++) {
var document = dataGenerationHelper.generateDocument(Map.of());
if (documents == null) {
documents = new ArrayList<>();
}
documents.add(document);
var indexRequest = client().prepareIndex(DATASTREAM_NAME).setOpType(DocWriteRequest.OpType.CREATE).setSource(document);
indexRequest.setRefreshPolicy(org.elasticsearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE);
indexRequest.get();
}
}

/**
* This test validates Gauge metrics aggregation with grouping by time bucket and a subset of dimensions.
* The subset of dimensions is a random subset of the dimensions present in the data.
* The test checks that the max, min, and avg values of the gauge metric - and calculates
* the same values from the documents in the group.
*/
public void testGroupBySubset() {
var dimensions = ESTestCase.randomNonEmptySubsetOf(dataGenerationHelper.attributesForMetrics);
var dimensionsStr = dimensions.stream().map(d -> "attributes." + d).collect(Collectors.joining(", "));
try (EsqlQueryResponse resp = run(String.format(Locale.ROOT, """
TS %s
| STATS max(max_over_time(metrics.gauge_hdd.bytes.used)),
min(min_over_time(metrics.gauge_hdd.bytes.used)),
avg(avg_over_time(metrics.gauge_hdd.bytes.used))
BY tbucket=bucket(@timestamp, 1 minute), %s
| SORT tbucket
| LIMIT 1000""", DATASTREAM_NAME, dimensionsStr))) {
var groups = groupedRows(documents, dimensions, 60);
List<List<Object>> rows = new ArrayList<>();
resp.rows().forEach(rowIter -> {
List<Object> row = new ArrayList<>();
rowIter.forEach(row::add);
rows.add(row);
});
for (List<Object> row : rows) {
var rowKey = getRowKey(row, dimensions);
List<Map<String, Object>> pointsInGroup = groups.get(rowKey);
@SuppressWarnings("unchecked")
var docValues = pointsInGroup.stream()
.map(doc -> ((Map<String, Integer>) doc.get("metrics")).get("gauge_hdd.bytes.used"))
.toList();
// Verify that the first column is the max value (the query gets max, avg, min in that order)
docValues.stream().max(Integer::compareTo).ifPresentOrElse(maxValue -> {
var res = ((Long) row.getFirst()).intValue();
assertThat(res, equalTo(maxValue));
}, () -> { throw new AssertionError("No values found for group: " + rowKey); });
// Verify that the second column is the min value (thus why row.get(1))
docValues.stream().min(Integer::compareTo).ifPresentOrElse(minValue -> {
var res = ((Long) row.get(1)).intValue();
assertThat(res, equalTo(minValue));
}, () -> { throw new AssertionError("No values found for group: " + rowKey); });
// Verify that the second column is the avg value (thus why row.get(2))
docValues.stream().mapToDouble(Integer::doubleValue).average().ifPresentOrElse(avgValue -> {
var res = (Double) row.get(2);
assertThat(res, closeTo(avgValue, res * 0.5));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need the 0.5 factor?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that was a mistake (I meant to do 5% not 50%).
However, average calculation does seem to have up to 20-25% error between ES and test-framework numbers. Should I check if that's a bug and how to deal with it?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There should be no error here (Double.compare() should return 0), so there's a bug somewhere. Let's investigate separately.

}, () -> { throw new AssertionError("No values found for group: " + rowKey); });
}
}
}

/**
* This test validates Gauge metrics aggregation with grouping by time bucket only.
* The test checks that the max, min, and avg values of the gauge metric - and calculates
* the same values from the documents in the group. Because there is no grouping by dimensions,
* there is only one metric group per time bucket.
*/
public void testGroupByNothing() {
try (EsqlQueryResponse resp = run(String.format(Locale.ROOT, """
TS %s
| STATS
max(max_over_time(metrics.gauge_hdd.bytes.used)),
avg(avg_over_time(metrics.gauge_hdd.bytes.used)),
min(min_over_time(metrics.gauge_hdd.bytes.used)) BY tbucket=bucket(@timestamp, 1 minute)
| SORT tbucket
| LIMIT 1000""", DATASTREAM_NAME))) {
List<List<Object>> rows = new ArrayList<>();
resp.rows().forEach(rowIter -> {
List<Object> row = new ArrayList<>();
rowIter.forEach(row::add);
rows.add(row);
});
var groups = groupedRows(documents, List.of(), 60);
for (List<Object> row : rows) {
var windowStart = windowStart(row.get(3), 60);
List<Map<String, Object>> windowDataPoints = groups.get(List.of(Long.toString(windowStart)));
@SuppressWarnings("unchecked")
var docValues = windowDataPoints.stream()
.map(doc -> ((Map<String, Integer>) doc.get("metrics")).get("gauge_hdd.bytes.used"))
.toList();
// Verify that the first column is the max value (the query gets max, avg, min in that order)
docValues.stream().max(Integer::compareTo).ifPresentOrElse(maxValue -> {
var res = ((Long) row.getFirst()).intValue();
assertThat(res, equalTo(maxValue));
}, () -> { throw new AssertionError("No values found for window starting at " + windowStart); });
// Verify that the second column is the avg value (thus why row.get(1))
docValues.stream().mapToDouble(Integer::doubleValue).average().ifPresentOrElse(avgValue -> {
var res = (Double) row.get(1);
assertThat(res, closeTo(avgValue, res * 0.5));
}, () -> {
;
throw new AssertionError("No values found for window starting at " + windowStart);
});
// Verify that the third column is the min value (thus why row.get(2))
docValues.stream().min(Integer::compareTo).ifPresentOrElse(minValue -> {
var res = ((Long) row.get(2)).intValue();
assertThat(res, equalTo(minValue));
}, () -> { throw new AssertionError("No values found for window starting at " + windowStart); });
}
}
}
}
Loading