-
Notifications
You must be signed in to change notification settings - Fork 25.6k
Add random queries to logsdb data generation tests #132109
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
parkertimmins
merged 16 commits into
elastic:main
from
parkertimmins:parker/logsdb-random-test-querying
Jul 31, 2025
Merged
Changes from all commits
Commits
Show all changes
16 commits
Select commit
Hold shift + click to select a range
3f348aa
Phrase query that uses random field from docs
parkertimmins f11085e
Working, but still quite messy
parkertimmins 2abe636
Move query generation into separate classes
parkertimmins 253f714
Remove unused code
parkertimmins f489d14
[CI] Auto commit changes from spotless
e25b09c
Merge branch 'main' into parker/logsdb-random-test-querying
parkertimmins 7b8176c
Wildcard term query has incorrect assertion
parkertimmins a5ef2e6
Merge branch 'main' into parker/logsdb-random-test-querying
parkertimmins 08aa2b9
Remove forbidden api
parkertimmins 5385327
Allow empty results set
parkertimmins da338e0
Dont use emojis queries
parkertimmins e1efabc
For each type decide if needs to be indexed or have doc values
parkertimmins 9990ca7
Query dynamic fields
parkertimmins 1c1f258
text should have phrase query for dynamic mappings
parkertimmins 6a86c34
Dont test runtime fields
parkertimmins a746998
Merge branch 'main' into parker/logsdb-random-test-querying
parkertimmins File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
109 changes: 109 additions & 0 deletions
109
.../framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the "Elastic License | ||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side | ||
* Public License v 1"; you may not use this file except in compliance with, at | ||
* your election, the "Elastic License 2.0", the "GNU Affero General Public | ||
* License v3.0 only", or the "Server Side Public License, v 1". | ||
*/ | ||
|
||
package org.elasticsearch.datageneration.queries; | ||
|
||
import org.elasticsearch.datageneration.FieldType; | ||
import org.elasticsearch.index.query.QueryBuilder; | ||
import org.elasticsearch.index.query.QueryBuilders; | ||
import org.elasticsearch.test.ESTestCase; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
public interface LeafQueryGenerator { | ||
|
||
List<QueryBuilder> generate(Map<String, Object> fieldMapping, String path, Object value); | ||
|
||
/** | ||
* Build a query for a specific type. If the field is nested, this query will need to be wrapped in nested queries. | ||
* @param type the type to build a query for | ||
* @return a generator that can build queries for this type | ||
*/ | ||
static LeafQueryGenerator buildForType(String type) { | ||
LeafQueryGenerator noQueries = (Map<String, Object> fieldMapping, String path, Object value) -> List.of(); | ||
|
||
FieldType fieldType = FieldType.tryParse(type); | ||
if (fieldType == null) { | ||
return noQueries; | ||
} | ||
|
||
return switch (fieldType) { | ||
case KEYWORD -> new KeywordQueryGenerator(); | ||
case TEXT -> new TextQueryGenerator(); | ||
case WILDCARD -> new WildcardQueryGenerator(); | ||
default -> noQueries; | ||
}; | ||
} | ||
|
||
class KeywordQueryGenerator implements LeafQueryGenerator { | ||
public List<QueryBuilder> generate(Map<String, Object> fieldMapping, String path, Object value) { | ||
if (fieldMapping != null) { | ||
boolean isIndexed = (Boolean) fieldMapping.getOrDefault("index", true); | ||
boolean hasDocValues = (Boolean) fieldMapping.getOrDefault("doc_values", true); | ||
if (isIndexed == false && hasDocValues == false) { | ||
return List.of(); | ||
} | ||
} | ||
return List.of(QueryBuilders.termQuery(path, value)); | ||
} | ||
} | ||
|
||
class WildcardQueryGenerator implements LeafQueryGenerator { | ||
public List<QueryBuilder> generate(Map<String, Object> fieldMapping, String path, Object value) { | ||
// Queries with emojis can currently fail due to https://github.com/elastic/elasticsearch/issues/132144 | ||
if (containsHighSurrogates((String) value)) { | ||
return List.of(); | ||
} | ||
return List.of(QueryBuilders.termQuery(path, value), QueryBuilders.wildcardQuery(path, value + "*")); | ||
} | ||
} | ||
|
||
class TextQueryGenerator implements LeafQueryGenerator { | ||
public List<QueryBuilder> generate(Map<String, Object> fieldMapping, String path, Object value) { | ||
if (fieldMapping != null) { | ||
boolean isIndexed = (Boolean) fieldMapping.getOrDefault("index", true); | ||
if (isIndexed == false) { | ||
return List.of(); | ||
} | ||
} | ||
|
||
var results = new ArrayList<QueryBuilder>(); | ||
results.add(QueryBuilders.matchQuery(path, value)); | ||
var phraseQuery = buildPhraseQuery(path, (String) value); | ||
if (phraseQuery != null) { | ||
results.add(phraseQuery); | ||
} | ||
return results; | ||
} | ||
|
||
private static QueryBuilder buildPhraseQuery(String path, String value) { | ||
var tokens = Arrays.asList(value.split("[^a-zA-Z0-9]")); | ||
if (tokens.isEmpty()) { | ||
return null; | ||
} | ||
|
||
int low = ESTestCase.randomIntBetween(0, tokens.size() - 1); | ||
int hi = ESTestCase.randomIntBetween(low + 1, tokens.size()); | ||
var phrase = String.join(" ", tokens.subList(low, hi)); | ||
return QueryBuilders.matchPhraseQuery(path, phrase); | ||
} | ||
} | ||
|
||
static boolean containsHighSurrogates(String s) { | ||
for (int i = 0; i < s.length(); i++) { | ||
if (Character.isHighSurrogate(s.charAt(i))) { | ||
return true; | ||
} | ||
} | ||
return false; | ||
} | ||
} |
98 changes: 98 additions & 0 deletions
98
test/framework/src/main/java/org/elasticsearch/datageneration/queries/QueryGenerator.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the "Elastic License | ||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side | ||
* Public License v 1"; you may not use this file except in compliance with, at | ||
* your election, the "Elastic License 2.0", the "GNU Affero General Public | ||
* License v3.0 only", or the "Server Side Public License, v 1". | ||
*/ | ||
|
||
package org.elasticsearch.datageneration.queries; | ||
|
||
import org.apache.lucene.search.join.ScoreMode; | ||
import org.elasticsearch.datageneration.Mapping; | ||
import org.elasticsearch.index.query.QueryBuilder; | ||
import org.elasticsearch.index.query.QueryBuilders; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
public class QueryGenerator { | ||
|
||
private final Mapping mapping; | ||
|
||
public QueryGenerator(Mapping mapping) { | ||
this.mapping = mapping; | ||
} | ||
|
||
public List<QueryBuilder> generateQueries(String type, String path, Object value) { | ||
// This query generator cannot handle fields with periods in the name. | ||
if (path.equals("host.name")) { | ||
return List.of(); | ||
} | ||
// Can handle dynamically mapped fields, but not runtime fields | ||
if (isRuntimeField(path)) { | ||
return List.of(); | ||
} | ||
var leafQueryGenerator = LeafQueryGenerator.buildForType(type); | ||
var fieldMapping = mapping.lookup().get(path); | ||
var leafQueries = leafQueryGenerator.generate(fieldMapping, path, value); | ||
return leafQueries.stream().map(q -> wrapInNestedQuery(path, q)).toList(); | ||
} | ||
|
||
private QueryBuilder wrapInNestedQuery(String path, QueryBuilder leafQuery) { | ||
String[] parts = path.split("\\."); | ||
List<String> nestedPaths = getNestedPathPrefixes(parts); | ||
QueryBuilder query = leafQuery; | ||
for (String nestedPath : nestedPaths.reversed()) { | ||
query = QueryBuilders.nestedQuery(nestedPath, query, ScoreMode.Max); | ||
} | ||
return query; | ||
} | ||
|
||
@SuppressWarnings("unchecked") | ||
private List<String> getNestedPathPrefixes(String[] path) { | ||
Map<String, Object> mapping = this.mapping.raw(); | ||
mapping = (Map<String, Object>) mapping.get("_doc"); | ||
mapping = (Map<String, Object>) mapping.get("properties"); | ||
|
||
var result = new ArrayList<String>(); | ||
for (int i = 0; i < path.length - 1; i++) { | ||
var field = path[i]; | ||
mapping = (Map<String, Object>) mapping.get(field); | ||
|
||
// dynamic field | ||
if (mapping == null) { | ||
break; | ||
} | ||
|
||
boolean nested = "nested".equals(mapping.get("type")); | ||
if (nested) { | ||
result.add(String.join(".", Arrays.copyOfRange(path, 0, i + 1))); | ||
} | ||
mapping = (Map<String, Object>) mapping.get("properties"); | ||
} | ||
return result; | ||
} | ||
|
||
@SuppressWarnings("unchecked") | ||
private boolean isRuntimeField(String path) { | ||
String[] parts = path.split("\\."); | ||
var topLevelMapping = (Map<String, Object>) mapping.raw().get("_doc"); | ||
boolean inRuntimeContext = "runtime".equals(topLevelMapping.get("dynamic")); | ||
for (int i = 0; i < parts.length - 1; i++) { | ||
var pathToHere = String.join(".", Arrays.copyOfRange(parts, 0, i + 1)); | ||
Map<String, Object> fieldMapping = mapping.lookup().get(pathToHere); | ||
if (fieldMapping == null) { | ||
break; | ||
} | ||
if (fieldMapping.containsKey("dynamic")) { | ||
// lower down dynamic definitions override higher up behavior | ||
inRuntimeContext = "runtime".equals(fieldMapping.get("dynamic")); | ||
} | ||
} | ||
return inRuntimeContext; | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.