Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import java.util.function.Function;
import java.util.stream.Collectors;

class SourceTransforms {
public class SourceTransforms {
/**
* This preprocessing step makes it easier to match the document using a unified structure.
* It performs following modifications:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.datageneration.queries;

import org.elasticsearch.datageneration.FieldType;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.test.ESTestCase;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;

public interface LeafQueryGenerator {

List<QueryBuilder> generate(Map<String, Object> fieldMapping, String path, Object value);

/**
* Build a query for a specific type. If the field is nested, this query will need to be wrapped in nested queries.
* @param type the type to build a query for
* @return a generator that can build queries for this type
*/
static LeafQueryGenerator buildForType(String type) {
LeafQueryGenerator noQueries = (Map<String, Object> fieldMapping, String path, Object value) -> List.of();

FieldType fieldType = FieldType.tryParse(type);
if (fieldType == null) {
return noQueries;
}

return switch (fieldType) {
case KEYWORD -> new KeywordQueryGenerator();
case TEXT -> new TextQueryGenerator();
case WILDCARD -> new WildcardQueryGenerator();
default -> noQueries;
};
}

class KeywordQueryGenerator implements LeafQueryGenerator {
public List<QueryBuilder> generate(Map<String, Object> fieldMapping, String path, Object value) {
var ignoreAbove = (Integer) fieldMapping.getOrDefault("ignore_above", Integer.MAX_VALUE);
var s = (String) value;
if (s.isEmpty() || ignoreAbove < s.length()) {
return List.of();
}
return List.of(QueryBuilders.termQuery(path, value));
}
}

class WildcardQueryGenerator implements LeafQueryGenerator {
public List<QueryBuilder> generate(Map<String, Object> fieldMapping, String path, Object value) {
var ignoreAbove = (Integer) fieldMapping.getOrDefault("ignore_above", Integer.MAX_VALUE);
var s = (String) value;
if (s.isEmpty() || ignoreAbove < s.length()) {
return List.of();
}
return List.of(
QueryBuilders.termQuery(path, value),
QueryBuilders.wildcardQuery(path, value + "*")
);
}
}

class TextQueryGenerator implements LeafQueryGenerator {
public List<QueryBuilder> generate(Map<String, Object> fieldMapping, String path, Object value) {
if (((String) value).isEmpty()) {
return List.of();
}
var results = new ArrayList<QueryBuilder>();
results.add(QueryBuilders.matchQuery(path, value));
var phraseQuery = buildPhraseQuery(path, value);
if (phraseQuery != null) {
results.add(phraseQuery);
}
return results;
}

private static QueryBuilder buildPhraseQuery(String path, Object value) {
String needle = (String) value;
var tokens = Arrays.asList(needle.split("[^a-zA-Z0-9]"));
if (tokens.isEmpty()) {
return null;
}

int low = ESTestCase.randomIntBetween(0, tokens.size() - 1);
int hi = ESTestCase.randomIntBetween(low+1, tokens.size());
var phrase = String.join(" ", tokens.subList(low, hi));
return QueryBuilders.matchPhraseQuery(path, phrase);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.datageneration.queries;

import org.apache.lucene.search.join.ScoreMode;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;

import java.util.Arrays;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;


public class QueryGenerator {

private final Map<String, Map<String, Object>> mappingLookup;
private final Map<String, Object> mappingRaw;

public QueryGenerator(Map<String, Map<String, Object>> mappingLookup, Map<String, Object> mappingRaw) {
this.mappingLookup = mappingLookup;
this.mappingRaw = mappingRaw;
}

public List<QueryBuilder> generateQueries(
String path,
Map<String, Object> mapping,
Object value
) {
// This query generator cannot handle fields with periods in the name.
if (path.equals("host.name")) {
return List.of();
}
if (mapping == null || isEnabled(path) == false) {
return List.of();
}
boolean isIndexed = (Boolean) mapping.getOrDefault("index", true);
if (isIndexed == false) {
return List.of();
}
var type = (String) mapping.get("type");
var leafQueryGenerator = LeafQueryGenerator.buildForType(type);
var leafQueries = leafQueryGenerator.generate(mapping, path, value);
return leafQueries.stream().map(q -> wrapInNestedQuery(path, q)).toList();
}

private QueryBuilder wrapInNestedQuery(String path, QueryBuilder leafQuery) {
String[] parts = path.split("\\.");
List<String> nestedPaths = getNestedPathPrefixes(parts);
QueryBuilder query = leafQuery;
for (String nestedPath : nestedPaths.reversed()) {
query = QueryBuilders.nestedQuery(nestedPath, query, ScoreMode.Max);
}
return query;
}

@SuppressWarnings("unchecked")
private List<String> getNestedPathPrefixes(String[] path) {
Map<String, Object> mapping = mappingRaw;
mapping = (Map<String, Object>) mapping.get("_doc");
mapping = (Map<String, Object>) mapping.get("properties");

var result = new ArrayList<String>();
for (int i = 0; i < path.length - 1; i++) {
var field = path[i];
mapping = (Map<String, Object>) mapping.get(field);
boolean nested = "nested".equals(mapping.get("type"));
if (nested) {
result.add(String.join(".", Arrays.copyOfRange(path, 0, i + 1)));
}
mapping = (Map<String, Object>) mapping.get("properties");
}

mapping = (Map<String, Object>) mapping.get(path[path.length - 1]);
assert mapping.containsKey("properties") == false;
return result;
}

/**
* Traverse down mapping tree and check that all objects are enabled in path
*/
private boolean isEnabled(String path) {
String[] parts = path.split("\\.");
for (int i = 0; i < parts.length - 1; i++) {
var pathToHere = String.join(".", Arrays.copyOfRange(parts, 0, i + 1));
Map<String, Object> mapping = mappingLookup.get(pathToHere);

boolean enabled = true;
if (mapping.containsKey("enabled") && mapping.get("enabled") instanceof Boolean) {
enabled = (Boolean) mapping.get("enabled");
}
if (mapping.containsKey("enabled") && mapping.get("enabled") instanceof String) {
enabled = Boolean.parseBoolean((String) mapping.get("enabled"));
}

if (enabled == false) {
return false;
}
}
return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,23 @@

package org.elasticsearch.xpack.logsdb.qa;

import org.apache.lucene.search.Query;
import org.apache.lucene.search.join.ScoreMode;
import org.elasticsearch.client.Request;
import org.elasticsearch.client.Response;
import org.elasticsearch.client.ResponseException;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.time.DateFormatter;
import org.elasticsearch.common.time.FormatNames;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.datageneration.FieldType;
import org.elasticsearch.datageneration.matchers.MatchResult;
import org.elasticsearch.datageneration.matchers.Matcher;
import org.elasticsearch.datageneration.matchers.source.SourceTransforms;
import org.elasticsearch.datageneration.queries.QueryGenerator;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval;
Expand All @@ -34,9 +41,11 @@
import java.time.ZonedDateTime;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.TreeMap;

import static org.hamcrest.Matchers.equalTo;
Expand Down Expand Up @@ -138,6 +147,46 @@ public void testMatchAllQuery() throws IOException {
assertTrue(matchResult.getMessage(), matchResult.isMatch());
}

public void testRandomQueries() throws IOException {
int numberOfDocuments = ESTestCase.randomIntBetween(20, 80);
final List<XContentBuilder> documents = generateDocuments(numberOfDocuments);
var mappingLookup = dataGenerationHelper.mapping().lookup();
final List<Map<String, List<Object>>> docsNormalized = documents.stream().map(d -> {
var document = XContentHelper.convertToMap(XContentType.JSON.xContent(), Strings.toString(d), true);
return SourceTransforms.normalize(document, mappingLookup);
}).toList();

indexDocuments(documents);

QueryGenerator queryGenerator = new QueryGenerator(mappingLookup, dataGenerationHelper.mapping().raw());
for (var e : mappingLookup.entrySet()) {
var path = e.getKey();
var mapping = e.getValue();
var docsWithFields = docsNormalized.stream().filter(d -> d.containsKey(path)).toList();
if (docsWithFields.isEmpty() == false) {
var doc = randomFrom(docsWithFields);
List<Object> values = doc.get(path).stream().filter(Objects::nonNull).toList();
if (values.isEmpty() == false) {
Object value = randomFrom(values);
List<QueryBuilder> queries = queryGenerator.generateQueries(path, mapping, value);
for (var query : queries) {
logger.info("Querying for field [{}] with value [{}]", path, value);

final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(query)
.size(numberOfDocuments);
final MatchResult matchResult = Matcher.matchSource()
.mappings(dataGenerationHelper.mapping().lookup(), getContenderMappings(), getBaselineMappings())
.settings(getContenderSettings(), getBaselineSettings())
.expected(getQueryHits(queryBaseline(searchSourceBuilder)))
.ignoringSort(true)
.isEqualTo(getQueryHits(queryContender(searchSourceBuilder)));
assertTrue(matchResult.getMessage(), matchResult.isMatch());
}
}
}
}
}

public void testTermsQuery() throws IOException {
int numberOfDocuments = ESTestCase.randomIntBetween(20, 80);
final List<XContentBuilder> documents = generateDocuments(numberOfDocuments);
Expand Down