Skip to content

Commit a857290

Browse files
Add random queries to logsdb data generation tests (#132109)
The existing logsdb vs standard tests do queries on fields with known static values. Add a test that queries fields using the randomly generated documents. Currently, only make queries for keyword, text, and wildcard fields. Multiple queries can be provided per value, and each query must return at least one document.
1 parent dd3e3c9 commit a857290

File tree

5 files changed

+283
-7
lines changed

5 files changed

+283
-7
lines changed

test/framework/src/main/java/org/elasticsearch/datageneration/matchers/source/SourceTransforms.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import java.util.function.Function;
1919
import java.util.stream.Collectors;
2020

21-
class SourceTransforms {
21+
public class SourceTransforms {
2222
/**
2323
* This preprocessing step makes it easier to match the document using a unified structure.
2424
* It performs following modifications:
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.datageneration.queries;
11+
12+
import org.elasticsearch.datageneration.FieldType;
13+
import org.elasticsearch.index.query.QueryBuilder;
14+
import org.elasticsearch.index.query.QueryBuilders;
15+
import org.elasticsearch.test.ESTestCase;
16+
17+
import java.util.ArrayList;
18+
import java.util.Arrays;
19+
import java.util.List;
20+
import java.util.Map;
21+
22+
public interface LeafQueryGenerator {
23+
24+
List<QueryBuilder> generate(Map<String, Object> fieldMapping, String path, Object value);
25+
26+
/**
27+
* Build a query for a specific type. If the field is nested, this query will need to be wrapped in nested queries.
28+
* @param type the type to build a query for
29+
* @return a generator that can build queries for this type
30+
*/
31+
static LeafQueryGenerator buildForType(String type) {
32+
LeafQueryGenerator noQueries = (Map<String, Object> fieldMapping, String path, Object value) -> List.of();
33+
34+
FieldType fieldType = FieldType.tryParse(type);
35+
if (fieldType == null) {
36+
return noQueries;
37+
}
38+
39+
return switch (fieldType) {
40+
case KEYWORD -> new KeywordQueryGenerator();
41+
case TEXT -> new TextQueryGenerator();
42+
case WILDCARD -> new WildcardQueryGenerator();
43+
default -> noQueries;
44+
};
45+
}
46+
47+
class KeywordQueryGenerator implements LeafQueryGenerator {
48+
public List<QueryBuilder> generate(Map<String, Object> fieldMapping, String path, Object value) {
49+
if (fieldMapping != null) {
50+
boolean isIndexed = (Boolean) fieldMapping.getOrDefault("index", true);
51+
boolean hasDocValues = (Boolean) fieldMapping.getOrDefault("doc_values", true);
52+
if (isIndexed == false && hasDocValues == false) {
53+
return List.of();
54+
}
55+
}
56+
return List.of(QueryBuilders.termQuery(path, value));
57+
}
58+
}
59+
60+
class WildcardQueryGenerator implements LeafQueryGenerator {
61+
public List<QueryBuilder> generate(Map<String, Object> fieldMapping, String path, Object value) {
62+
// Queries with emojis can currently fail due to https://github.com/elastic/elasticsearch/issues/132144
63+
if (containsHighSurrogates((String) value)) {
64+
return List.of();
65+
}
66+
return List.of(QueryBuilders.termQuery(path, value), QueryBuilders.wildcardQuery(path, value + "*"));
67+
}
68+
}
69+
70+
class TextQueryGenerator implements LeafQueryGenerator {
71+
public List<QueryBuilder> generate(Map<String, Object> fieldMapping, String path, Object value) {
72+
if (fieldMapping != null) {
73+
boolean isIndexed = (Boolean) fieldMapping.getOrDefault("index", true);
74+
if (isIndexed == false) {
75+
return List.of();
76+
}
77+
}
78+
79+
var results = new ArrayList<QueryBuilder>();
80+
results.add(QueryBuilders.matchQuery(path, value));
81+
var phraseQuery = buildPhraseQuery(path, (String) value);
82+
if (phraseQuery != null) {
83+
results.add(phraseQuery);
84+
}
85+
return results;
86+
}
87+
88+
private static QueryBuilder buildPhraseQuery(String path, String value) {
89+
var tokens = Arrays.asList(value.split("[^a-zA-Z0-9]"));
90+
if (tokens.isEmpty()) {
91+
return null;
92+
}
93+
94+
int low = ESTestCase.randomIntBetween(0, tokens.size() - 1);
95+
int hi = ESTestCase.randomIntBetween(low + 1, tokens.size());
96+
var phrase = String.join(" ", tokens.subList(low, hi));
97+
return QueryBuilders.matchPhraseQuery(path, phrase);
98+
}
99+
}
100+
101+
static boolean containsHighSurrogates(String s) {
102+
for (int i = 0; i < s.length(); i++) {
103+
if (Character.isHighSurrogate(s.charAt(i))) {
104+
return true;
105+
}
106+
}
107+
return false;
108+
}
109+
}
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.datageneration.queries;
11+
12+
import org.apache.lucene.search.join.ScoreMode;
13+
import org.elasticsearch.datageneration.Mapping;
14+
import org.elasticsearch.index.query.QueryBuilder;
15+
import org.elasticsearch.index.query.QueryBuilders;
16+
17+
import java.util.ArrayList;
18+
import java.util.Arrays;
19+
import java.util.List;
20+
import java.util.Map;
21+
22+
public class QueryGenerator {
23+
24+
private final Mapping mapping;
25+
26+
public QueryGenerator(Mapping mapping) {
27+
this.mapping = mapping;
28+
}
29+
30+
public List<QueryBuilder> generateQueries(String type, String path, Object value) {
31+
// This query generator cannot handle fields with periods in the name.
32+
if (path.equals("host.name")) {
33+
return List.of();
34+
}
35+
// Can handle dynamically mapped fields, but not runtime fields
36+
if (isRuntimeField(path)) {
37+
return List.of();
38+
}
39+
var leafQueryGenerator = LeafQueryGenerator.buildForType(type);
40+
var fieldMapping = mapping.lookup().get(path);
41+
var leafQueries = leafQueryGenerator.generate(fieldMapping, path, value);
42+
return leafQueries.stream().map(q -> wrapInNestedQuery(path, q)).toList();
43+
}
44+
45+
private QueryBuilder wrapInNestedQuery(String path, QueryBuilder leafQuery) {
46+
String[] parts = path.split("\\.");
47+
List<String> nestedPaths = getNestedPathPrefixes(parts);
48+
QueryBuilder query = leafQuery;
49+
for (String nestedPath : nestedPaths.reversed()) {
50+
query = QueryBuilders.nestedQuery(nestedPath, query, ScoreMode.Max);
51+
}
52+
return query;
53+
}
54+
55+
@SuppressWarnings("unchecked")
56+
private List<String> getNestedPathPrefixes(String[] path) {
57+
Map<String, Object> mapping = this.mapping.raw();
58+
mapping = (Map<String, Object>) mapping.get("_doc");
59+
mapping = (Map<String, Object>) mapping.get("properties");
60+
61+
var result = new ArrayList<String>();
62+
for (int i = 0; i < path.length - 1; i++) {
63+
var field = path[i];
64+
mapping = (Map<String, Object>) mapping.get(field);
65+
66+
// dynamic field
67+
if (mapping == null) {
68+
break;
69+
}
70+
71+
boolean nested = "nested".equals(mapping.get("type"));
72+
if (nested) {
73+
result.add(String.join(".", Arrays.copyOfRange(path, 0, i + 1)));
74+
}
75+
mapping = (Map<String, Object>) mapping.get("properties");
76+
}
77+
return result;
78+
}
79+
80+
@SuppressWarnings("unchecked")
81+
private boolean isRuntimeField(String path) {
82+
String[] parts = path.split("\\.");
83+
var topLevelMapping = (Map<String, Object>) mapping.raw().get("_doc");
84+
boolean inRuntimeContext = "runtime".equals(topLevelMapping.get("dynamic"));
85+
for (int i = 0; i < parts.length - 1; i++) {
86+
var pathToHere = String.join(".", Arrays.copyOfRange(parts, 0, i + 1));
87+
Map<String, Object> fieldMapping = mapping.lookup().get(pathToHere);
88+
if (fieldMapping == null) {
89+
break;
90+
}
91+
if (fieldMapping.containsKey("dynamic")) {
92+
// lower down dynamic definitions override higher up behavior
93+
inRuntimeContext = "runtime".equals(fieldMapping.get("dynamic"));
94+
}
95+
}
96+
return inRuntimeContext;
97+
}
98+
}

x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/DataGenerationHelper.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import java.util.Arrays;
3030
import java.util.List;
3131
import java.util.Map;
32+
import java.util.TreeMap;
3233
import java.util.function.Consumer;
3334
import java.util.function.Supplier;
3435
import java.util.stream.Collectors;
@@ -121,6 +122,26 @@ Mapping mapping() {
121122
return this.mapping;
122123
}
123124

125+
public Map<String, String> getTemplateFieldTypes() {
126+
Map<String, String> allPaths = new TreeMap<>();
127+
gatherFieldTypes(allPaths, "", template.template());
128+
return allPaths;
129+
}
130+
131+
private static void gatherFieldTypes(Map<String, String> paths, String pathToHere, Map<String, Template.Entry> template) {
132+
for (var entry : template.entrySet()) {
133+
var field = entry.getKey();
134+
var child = entry.getValue();
135+
var pathToChild = pathToHere.isEmpty() ? field : pathToHere + "." + field;
136+
if (child instanceof Template.Object object) {
137+
gatherFieldTypes(paths, pathToChild, object.children());
138+
} else {
139+
var leaf = (Template.Leaf) child;
140+
paths.put(pathToChild, leaf.type());
141+
}
142+
}
143+
}
144+
124145
void writeLogsDbMapping(XContentBuilder builder) throws IOException {
125146
builder.map(mapping.raw());
126147
}

x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java

Lines changed: 54 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,16 @@
1111
import org.elasticsearch.client.Response;
1212
import org.elasticsearch.client.ResponseException;
1313
import org.elasticsearch.client.RestClient;
14+
import org.elasticsearch.common.Strings;
1415
import org.elasticsearch.common.settings.Settings;
1516
import org.elasticsearch.common.time.DateFormatter;
1617
import org.elasticsearch.common.time.FormatNames;
1718
import org.elasticsearch.common.xcontent.XContentHelper;
1819
import org.elasticsearch.datageneration.matchers.MatchResult;
1920
import org.elasticsearch.datageneration.matchers.Matcher;
21+
import org.elasticsearch.datageneration.matchers.source.SourceTransforms;
22+
import org.elasticsearch.datageneration.queries.QueryGenerator;
23+
import org.elasticsearch.index.query.QueryBuilder;
2024
import org.elasticsearch.index.query.QueryBuilders;
2125
import org.elasticsearch.search.aggregations.AggregationBuilders;
2226
import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval;
@@ -37,6 +41,7 @@
3741
import java.util.Comparator;
3842
import java.util.List;
3943
import java.util.Map;
44+
import java.util.Objects;
4045
import java.util.TreeMap;
4146

4247
import static org.hamcrest.Matchers.equalTo;
@@ -132,12 +137,52 @@ public void testMatchAllQuery() throws IOException {
132137
final MatchResult matchResult = Matcher.matchSource()
133138
.mappings(dataGenerationHelper.mapping().lookup(), getContenderMappings(), getBaselineMappings())
134139
.settings(getContenderSettings(), getBaselineSettings())
135-
.expected(getQueryHits(queryBaseline(searchSourceBuilder)))
140+
.expected(getQueryHits(queryBaseline(searchSourceBuilder), true))
136141
.ignoringSort(true)
137-
.isEqualTo(getQueryHits(queryContender(searchSourceBuilder)));
142+
.isEqualTo(getQueryHits(queryContender(searchSourceBuilder), true));
138143
assertTrue(matchResult.getMessage(), matchResult.isMatch());
139144
}
140145

146+
public void testRandomQueries() throws IOException {
147+
int numberOfDocuments = ESTestCase.randomIntBetween(10, 50);
148+
final List<XContentBuilder> documents = generateDocuments(numberOfDocuments);
149+
var mappingLookup = dataGenerationHelper.mapping().lookup();
150+
final List<Map<String, List<Object>>> docsNormalized = documents.stream().map(d -> {
151+
var document = XContentHelper.convertToMap(XContentType.JSON.xContent(), Strings.toString(d), true);
152+
return SourceTransforms.normalize(document, mappingLookup);
153+
}).toList();
154+
155+
indexDocuments(documents);
156+
157+
QueryGenerator queryGenerator = new QueryGenerator(dataGenerationHelper.mapping());
158+
Map<String, String> fieldsTypes = dataGenerationHelper.getTemplateFieldTypes();
159+
for (var e : fieldsTypes.entrySet()) {
160+
var path = e.getKey();
161+
var type = e.getValue();
162+
var docsWithFields = docsNormalized.stream().filter(d -> d.containsKey(path)).toList();
163+
if (docsWithFields.isEmpty() == false) {
164+
var doc = randomFrom(docsWithFields);
165+
List<Object> values = doc.get(path).stream().filter(Objects::nonNull).toList();
166+
if (values.isEmpty() == false) {
167+
Object value = randomFrom(values);
168+
List<QueryBuilder> queries = queryGenerator.generateQueries(type, path, value);
169+
for (var query : queries) {
170+
logger.info("Querying for field [{}] with value [{}]", path, value);
171+
172+
final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(query).size(numberOfDocuments);
173+
final MatchResult matchResult = Matcher.matchSource()
174+
.mappings(dataGenerationHelper.mapping().lookup(), getContenderMappings(), getBaselineMappings())
175+
.settings(getContenderSettings(), getBaselineSettings())
176+
.expected(getQueryHits(queryBaseline(searchSourceBuilder), false))
177+
.ignoringSort(true)
178+
.isEqualTo(getQueryHits(queryContender(searchSourceBuilder), false));
179+
assertTrue(matchResult.getMessage(), matchResult.isMatch());
180+
}
181+
}
182+
}
183+
}
184+
}
185+
141186
public void testTermsQuery() throws IOException {
142187
int numberOfDocuments = ESTestCase.randomIntBetween(20, 80);
143188
final List<XContentBuilder> documents = generateDocuments(numberOfDocuments);
@@ -150,9 +195,9 @@ public void testTermsQuery() throws IOException {
150195
final MatchResult matchResult = Matcher.matchSource()
151196
.mappings(dataGenerationHelper.mapping().lookup(), getContenderMappings(), getBaselineMappings())
152197
.settings(getContenderSettings(), getBaselineSettings())
153-
.expected(getQueryHits(queryBaseline(searchSourceBuilder)))
198+
.expected(getQueryHits(queryBaseline(searchSourceBuilder), true))
154199
.ignoringSort(true)
155-
.isEqualTo(getQueryHits(queryContender(searchSourceBuilder)));
200+
.isEqualTo(getQueryHits(queryContender(searchSourceBuilder), true));
156201
assertTrue(matchResult.getMessage(), matchResult.isMatch());
157202
}
158203

@@ -291,12 +336,15 @@ protected XContentBuilder generateDocument(final Instant timestamp) throws IOExc
291336
}
292337

293338
@SuppressWarnings("unchecked")
294-
private static List<Map<String, Object>> getQueryHits(final Response response) throws IOException {
339+
private static List<Map<String, Object>> getQueryHits(final Response response, final boolean requireResults) throws IOException {
295340
final Map<String, Object> map = XContentHelper.convertToMap(XContentType.JSON.xContent(), response.getEntity().getContent(), true);
296341
final Map<String, Object> hitsMap = (Map<String, Object>) map.get("hits");
297342

298343
final List<Map<String, Object>> hitsList = (List<Map<String, Object>>) hitsMap.get("hits");
299-
assertThat(hitsList.size(), greaterThan(0));
344+
345+
if (requireResults) {
346+
assertThat(hitsList.size(), greaterThan(0));
347+
}
300348

301349
return hitsList.stream()
302350
.sorted(Comparator.comparing((Map<String, Object> hit) -> ((String) hit.get("_id"))))

0 commit comments

Comments
 (0)