Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import java.util.function.Function;
import java.util.stream.Collectors;

class SourceTransforms {
public class SourceTransforms {
/**
* This preprocessing step makes it easier to match the document using a unified structure.
* It performs following modifications:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.datageneration.queries;

import org.elasticsearch.datageneration.FieldType;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.test.ESTestCase;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;

public interface LeafQueryGenerator {

List<QueryBuilder> generate(Map<String, Object> fieldMapping, String path, Object value);

/**
* Build a query for a specific type. If the field is nested, this query will need to be wrapped in nested queries.
* @param type the type to build a query for
* @return a generator that can build queries for this type
*/
static LeafQueryGenerator buildForType(String type) {
LeafQueryGenerator noQueries = (Map<String, Object> fieldMapping, String path, Object value) -> List.of();

FieldType fieldType = FieldType.tryParse(type);
if (fieldType == null) {
return noQueries;
}

return switch (fieldType) {
case KEYWORD -> new KeywordQueryGenerator();
case TEXT -> new TextQueryGenerator();
case WILDCARD -> new WildcardQueryGenerator();
default -> noQueries;
};
}

class KeywordQueryGenerator implements LeafQueryGenerator {
public List<QueryBuilder> generate(Map<String, Object> fieldMapping, String path, Object value) {
if (fieldMapping != null) {
boolean isIndexed = (Boolean) fieldMapping.getOrDefault("index", true);
boolean hasDocValues = (Boolean) fieldMapping.getOrDefault("doc_values", true);
if (isIndexed == false && hasDocValues == false) {
return List.of();
}
}
return List.of(QueryBuilders.termQuery(path, value));
}
}

class WildcardQueryGenerator implements LeafQueryGenerator {
public List<QueryBuilder> generate(Map<String, Object> fieldMapping, String path, Object value) {
// Queries with emojis can currently fail due to https://github.com/elastic/elasticsearch/issues/132144
if (containsHighSurrogates((String) value)) {
return List.of();
}
return List.of(QueryBuilders.termQuery(path, value), QueryBuilders.wildcardQuery(path, value + "*"));
}
}

class TextQueryGenerator implements LeafQueryGenerator {
public List<QueryBuilder> generate(Map<String, Object> fieldMapping, String path, Object value) {
if (fieldMapping != null) {
boolean isIndexed = (Boolean) fieldMapping.getOrDefault("index", true);
if (isIndexed == false) {
return List.of();
}
}

var results = new ArrayList<QueryBuilder>();
results.add(QueryBuilders.matchQuery(path, value));
var phraseQuery = buildPhraseQuery(path, (String) value);
if (phraseQuery != null) {
results.add(phraseQuery);
}
return results;
}

private static QueryBuilder buildPhraseQuery(String path, String value) {
var tokens = Arrays.asList(value.split("[^a-zA-Z0-9]"));
if (tokens.isEmpty()) {
return null;
}

int low = ESTestCase.randomIntBetween(0, tokens.size() - 1);
int hi = ESTestCase.randomIntBetween(low + 1, tokens.size());
var phrase = String.join(" ", tokens.subList(low, hi));
return QueryBuilders.matchPhraseQuery(path, phrase);
}
}

static boolean containsHighSurrogates(String s) {
for (int i = 0; i < s.length(); i++) {
if (Character.isHighSurrogate(s.charAt(i))) {
return true;
}
}
return false;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.datageneration.queries;

import org.apache.lucene.search.join.ScoreMode;
import org.elasticsearch.datageneration.Mapping;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;

public class QueryGenerator {

private final Mapping mapping;

public QueryGenerator(Mapping mapping) {
this.mapping = mapping;
}

public List<QueryBuilder> generateQueries(String type, String path, Object value) {
// This query generator cannot handle fields with periods in the name.
if (path.equals("host.name")) {
return List.of();
}
// Can handle dynamically mapped fields, but not runtime fields
if (isRuntimeField(path)) {
return List.of();
}
var leafQueryGenerator = LeafQueryGenerator.buildForType(type);
var fieldMapping = mapping.lookup().get(path);
var leafQueries = leafQueryGenerator.generate(fieldMapping, path, value);
return leafQueries.stream().map(q -> wrapInNestedQuery(path, q)).toList();
}

private QueryBuilder wrapInNestedQuery(String path, QueryBuilder leafQuery) {
String[] parts = path.split("\\.");
List<String> nestedPaths = getNestedPathPrefixes(parts);
QueryBuilder query = leafQuery;
for (String nestedPath : nestedPaths.reversed()) {
query = QueryBuilders.nestedQuery(nestedPath, query, ScoreMode.Max);
}
return query;
}

@SuppressWarnings("unchecked")
private List<String> getNestedPathPrefixes(String[] path) {
Map<String, Object> mapping = this.mapping.raw();
mapping = (Map<String, Object>) mapping.get("_doc");
mapping = (Map<String, Object>) mapping.get("properties");

var result = new ArrayList<String>();
for (int i = 0; i < path.length - 1; i++) {
var field = path[i];
mapping = (Map<String, Object>) mapping.get(field);

// dynamic field
if (mapping == null) {
break;
}

boolean nested = "nested".equals(mapping.get("type"));
if (nested) {
result.add(String.join(".", Arrays.copyOfRange(path, 0, i + 1)));
}
mapping = (Map<String, Object>) mapping.get("properties");
}
return result;
}

@SuppressWarnings("unchecked")
private boolean isRuntimeField(String path) {
String[] parts = path.split("\\.");
var topLevelMapping = (Map<String, Object>) mapping.raw().get("_doc");
boolean inRuntimeContext = "runtime".equals(topLevelMapping.get("dynamic"));
for (int i = 0; i < parts.length - 1; i++) {
var pathToHere = String.join(".", Arrays.copyOfRange(parts, 0, i + 1));
Map<String, Object> fieldMapping = mapping.lookup().get(pathToHere);
if (fieldMapping == null) {
break;
}
if (fieldMapping.containsKey("dynamic")) {
// lower down dynamic definitions override higher up behavior
inRuntimeContext = "runtime".equals(fieldMapping.get("dynamic"));
}
}
return inRuntimeContext;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.function.Consumer;
import java.util.function.Supplier;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -121,6 +122,26 @@ Mapping mapping() {
return this.mapping;
}

public Map<String, String> getTemplateFieldTypes() {
Map<String, String> allPaths = new TreeMap<>();
gatherFieldTypes(allPaths, "", template.template());
return allPaths;
}

private static void gatherFieldTypes(Map<String, String> paths, String pathToHere, Map<String, Template.Entry> template) {
for (var entry : template.entrySet()) {
var field = entry.getKey();
var child = entry.getValue();
var pathToChild = pathToHere.isEmpty() ? field : pathToHere + "." + field;
if (child instanceof Template.Object object) {
gatherFieldTypes(paths, pathToChild, object.children());
} else {
var leaf = (Template.Leaf) child;
paths.put(pathToChild, leaf.type());
}
}
}

void writeLogsDbMapping(XContentBuilder builder) throws IOException {
builder.map(mapping.raw());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,16 @@
import org.elasticsearch.client.Response;
import org.elasticsearch.client.ResponseException;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.time.DateFormatter;
import org.elasticsearch.common.time.FormatNames;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.datageneration.matchers.MatchResult;
import org.elasticsearch.datageneration.matchers.Matcher;
import org.elasticsearch.datageneration.matchers.source.SourceTransforms;
import org.elasticsearch.datageneration.queries.QueryGenerator;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval;
Expand All @@ -37,6 +41,7 @@
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.TreeMap;

import static org.hamcrest.Matchers.equalTo;
Expand Down Expand Up @@ -132,12 +137,52 @@ public void testMatchAllQuery() throws IOException {
final MatchResult matchResult = Matcher.matchSource()
.mappings(dataGenerationHelper.mapping().lookup(), getContenderMappings(), getBaselineMappings())
.settings(getContenderSettings(), getBaselineSettings())
.expected(getQueryHits(queryBaseline(searchSourceBuilder)))
.expected(getQueryHits(queryBaseline(searchSourceBuilder), true))
.ignoringSort(true)
.isEqualTo(getQueryHits(queryContender(searchSourceBuilder)));
.isEqualTo(getQueryHits(queryContender(searchSourceBuilder), true));
assertTrue(matchResult.getMessage(), matchResult.isMatch());
}

public void testRandomQueries() throws IOException {
int numberOfDocuments = ESTestCase.randomIntBetween(10, 50);
final List<XContentBuilder> documents = generateDocuments(numberOfDocuments);
var mappingLookup = dataGenerationHelper.mapping().lookup();
final List<Map<String, List<Object>>> docsNormalized = documents.stream().map(d -> {
var document = XContentHelper.convertToMap(XContentType.JSON.xContent(), Strings.toString(d), true);
return SourceTransforms.normalize(document, mappingLookup);
}).toList();

indexDocuments(documents);

QueryGenerator queryGenerator = new QueryGenerator(dataGenerationHelper.mapping());
Map<String, String> fieldsTypes = dataGenerationHelper.getTemplateFieldTypes();
for (var e : fieldsTypes.entrySet()) {
var path = e.getKey();
var type = e.getValue();
var docsWithFields = docsNormalized.stream().filter(d -> d.containsKey(path)).toList();
if (docsWithFields.isEmpty() == false) {
var doc = randomFrom(docsWithFields);
List<Object> values = doc.get(path).stream().filter(Objects::nonNull).toList();
if (values.isEmpty() == false) {
Object value = randomFrom(values);
List<QueryBuilder> queries = queryGenerator.generateQueries(type, path, value);
for (var query : queries) {
logger.info("Querying for field [{}] with value [{}]", path, value);

final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(query).size(numberOfDocuments);
final MatchResult matchResult = Matcher.matchSource()
.mappings(dataGenerationHelper.mapping().lookup(), getContenderMappings(), getBaselineMappings())
.settings(getContenderSettings(), getBaselineSettings())
.expected(getQueryHits(queryBaseline(searchSourceBuilder), false))
.ignoringSort(true)
.isEqualTo(getQueryHits(queryContender(searchSourceBuilder), false));
assertTrue(matchResult.getMessage(), matchResult.isMatch());
}
}
}
}
}

public void testTermsQuery() throws IOException {
int numberOfDocuments = ESTestCase.randomIntBetween(20, 80);
final List<XContentBuilder> documents = generateDocuments(numberOfDocuments);
Expand All @@ -150,9 +195,9 @@ public void testTermsQuery() throws IOException {
final MatchResult matchResult = Matcher.matchSource()
.mappings(dataGenerationHelper.mapping().lookup(), getContenderMappings(), getBaselineMappings())
.settings(getContenderSettings(), getBaselineSettings())
.expected(getQueryHits(queryBaseline(searchSourceBuilder)))
.expected(getQueryHits(queryBaseline(searchSourceBuilder), true))
.ignoringSort(true)
.isEqualTo(getQueryHits(queryContender(searchSourceBuilder)));
.isEqualTo(getQueryHits(queryContender(searchSourceBuilder), true));
assertTrue(matchResult.getMessage(), matchResult.isMatch());
}

Expand Down Expand Up @@ -291,12 +336,15 @@ protected XContentBuilder generateDocument(final Instant timestamp) throws IOExc
}

@SuppressWarnings("unchecked")
private static List<Map<String, Object>> getQueryHits(final Response response) throws IOException {
private static List<Map<String, Object>> getQueryHits(final Response response, final boolean requireResults) throws IOException {
final Map<String, Object> map = XContentHelper.convertToMap(XContentType.JSON.xContent(), response.getEntity().getContent(), true);
final Map<String, Object> hitsMap = (Map<String, Object>) map.get("hits");

final List<Map<String, Object>> hitsList = (List<Map<String, Object>>) hitsMap.get("hits");
assertThat(hitsList.size(), greaterThan(0));

if (requireResults) {
assertThat(hitsList.size(), greaterThan(0));
}

return hitsList.stream()
.sorted(Comparator.comparing((Map<String, Object> hit) -> ((String) hit.get("_id"))))
Expand Down