diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldMapperTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldMapperTests.java index 8f56159355bbe..f61b8f7f8078a 100644 --- a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldMapperTests.java +++ b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldMapperTests.java @@ -24,7 +24,6 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Tuple; -import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.LuceneDocument; @@ -46,7 +45,6 @@ import java.util.Collection; import java.util.Collections; import java.util.List; -import java.util.UUID; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -253,25 +251,7 @@ public SyntheticSourceExample example(int maxValues) { } private Tuple generateValue() { - StringBuilder builder = new StringBuilder(); - if (randomBoolean()) { - builder.append(randomAlphaOfLength(5)); - } else { - String timestamp = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.formatMillis(System.currentTimeMillis()); - builder.append(timestamp); - } - for (int i = 0; i < randomIntBetween(0, 9); i++) { - builder.append(" "); - int rand = randomIntBetween(0, 4); - switch (rand) { - case 0 -> builder.append(randomAlphaOfLength(5)); - case 1 -> builder.append(randomAlphanumericOfLength(5)); - case 2 -> builder.append(UUID.randomUUID()); - case 3 -> builder.append(randomIp(true)); - case 4 -> builder.append(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.formatMillis(randomMillisUpToYear9999())); - } - } - String value = builder.toString(); + var value = PatternedTextVsMatchOnlyTextTests.randomMessage(); return Tuple.tuple(value, value); } diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextVsMatchOnlyTextTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextVsMatchOnlyTextTests.java new file mode 100644 index 0000000000000..89185b1be8f11 --- /dev/null +++ b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextVsMatchOnlyTextTests.java @@ -0,0 +1,266 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.logsdb.patternedtext; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.action.DocWriteRequest; +import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; +import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; +import org.elasticsearch.action.bulk.BulkRequest; +import org.elasticsearch.action.bulk.BulkResponse; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.support.IndicesOptions; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.time.DateFormatter; +import org.elasticsearch.index.mapper.extras.MapperExtrasPlugin; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.license.LicenseSettings; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.json.JsonXContent; +import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin; +import org.elasticsearch.xpack.logsdb.LogsDBPlugin; +import org.junit.Before; + +import java.io.IOException; +import java.time.Instant; +import java.time.ZonedDateTime; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.BiFunction; +import java.util.stream.Collectors; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse; + +public class PatternedTextVsMatchOnlyTextTests extends ESIntegTestCase { + private static final Logger logger = LogManager.getLogger(PatternedTextVsMatchOnlyTextTests.class); + + @Override + protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal, otherSettings)) + .put(LicenseSettings.SELF_GENERATED_LICENSE_TYPE.getKey(), "trial") + .build(); + } + + @Override + protected Collection> nodePlugins() { + return Arrays.asList(MapperExtrasPlugin.class, LogsDBPlugin.class, LocalStateCompositeXPackPlugin.class); + } + + private static final String INDEX = "test_index"; + private static final String MATCH_ONLY_TEXT_FIELD = "field_match_only_text"; + private static final String PATTERNED_TEXT_FIELD = "field_patterned_text"; + private static final String MAPPING = """ + { + "properties": { + "@timestamp": { "type": "date" }, + "field_match_only_text": { "type": "match_only_text" }, + "field_patterned_text": { "type": "patterned_text" } + } + } + """; + + @Before + public void setup() { + assumeTrue("Only when patterned_text feature flag is enabled", PatternedTextFieldMapper.PATTERNED_TEXT_MAPPER.isEnabled()); + } + + public void testQueries() throws IOException { + var createRequest = new CreateIndexRequest(INDEX).mapping(MAPPING); + + assertAcked(admin().indices().create(createRequest)); + + int numDocs = randomIntBetween(10, 200); + List logMessages = generateMessages(numDocs); + indexDocs(logMessages); + + var queryTerms = logMessages.stream().flatMap(m -> randomQueryValues(m).stream()).toList(); + { + var ptQueries = buildQueries(PATTERNED_TEXT_FIELD, queryTerms, QueryBuilders::matchPhraseQuery); + var motQueries = buildQueries(MATCH_ONLY_TEXT_FIELD, queryTerms, QueryBuilders::matchPhraseQuery); + assertQueryResults(ptQueries, motQueries, numDocs, "phrase"); + } + { + var ptQueries = buildQueries(PATTERNED_TEXT_FIELD, queryTerms, QueryBuilders::matchQuery); + var motQueries = buildQueries(MATCH_ONLY_TEXT_FIELD, queryTerms, QueryBuilders::matchQuery); + assertQueryResults(ptQueries, motQueries, numDocs, "match"); + } + { + var ptQueries = buildQueries(PATTERNED_TEXT_FIELD, queryTerms, QueryBuilders::termQuery); + var motQueries = buildQueries(MATCH_ONLY_TEXT_FIELD, queryTerms, QueryBuilders::termQuery); + assertQueryResults(ptQueries, motQueries, numDocs, "term"); + } + } + + private void assertQueryResults( + List patternedTextQueries, + List matchOnlyTextQueries, + int numDocs, + String queryType + ) { + var numQueriesWithResults = new AtomicInteger(0); + var numQueriesTotal = new AtomicInteger(0); + for (int i = 0; i < patternedTextQueries.size(); ++i) { + var ptRequest = client().prepareSearch(INDEX).setQuery(patternedTextQueries.get(i)).setSize(numDocs); + var motRequest = client().prepareSearch(INDEX).setQuery(matchOnlyTextQueries.get(i)).setSize(numDocs); + + numQueriesTotal.incrementAndGet(); + assertNoFailuresAndResponse(ptRequest, ptResponse -> { + assertNoFailuresAndResponse(motRequest, motResponse -> { + + assertEquals(motResponse.getHits().getTotalHits().value(), ptResponse.getHits().getTotalHits().value()); + + var motDocIds = Arrays.stream(motResponse.getHits().getHits()).map(SearchHit::getId).collect(Collectors.toSet()); + var ptDocIds = Arrays.stream(ptResponse.getHits().getHits()).map(SearchHit::getId).collect(Collectors.toSet()); + assertEquals(motDocIds, ptDocIds); + + if (motResponse.getHits().getTotalHits().value() > 0) { + numQueriesWithResults.incrementAndGet(); + } + }); + }); + } + logger.info("Ran {} {} queries, of which {} had matches", numQueriesTotal.get(), queryType, numQueriesWithResults.get()); + } + + private List buildQueries(String field, List terms, BiFunction queryBuilder) { + return terms.stream().map(t -> queryBuilder.apply(field, t)).toList(); + } + + private static List randomQueryValues(String value) { + var values = new ArrayList(); + + values.add(value); + values.add(randomSubstring(value)); + + var tokenizerRegex = "[\\s\\p{Punct}]+"; + List tokens = Arrays.stream(value.split(tokenizerRegex)).filter(t -> t.isEmpty() == false).toList(); + if (tokens.isEmpty() == false) { + values.add(randomFrom(tokens)); + values.add(randomSubPhrase(tokens)); + } + return values; + } + + private static String randomSubstring(String value) { + int low = ESTestCase.randomIntBetween(0, value.length() - 1); + int hi = ESTestCase.randomIntBetween(low + 1, value.length()); + return value.substring(low, hi); + } + + private static String randomSubPhrase(List tokens) { + int low = ESTestCase.randomIntBetween(0, tokens.size() - 1); + int hi = ESTestCase.randomIntBetween(low + 1, tokens.size()); + return String.join(" ", tokens.subList(low, hi)); + } + + private List generateMessages(int numDocs) { + List logMessages = new ArrayList<>(); + for (int i = 0; i < numDocs; i++) { + logMessages.add(randomMessage()); + } + return logMessages; + } + + private void indexDocs(List logMessages) throws IOException { + BulkRequest bulkRequest = new BulkRequest(); + long timestamp = System.currentTimeMillis(); + for (var msg : logMessages) { + timestamp += TimeUnit.SECONDS.toMillis(1); + var indexRequest = new IndexRequest(INDEX).opType(DocWriteRequest.OpType.CREATE) + .source( + JsonXContent.contentBuilder() + .startObject() + .field("@timestamp", timestamp) + .field("field_patterned_text", msg) + .field("field_match_only_text", msg) + .endObject() + ); + bulkRequest.add(indexRequest); + } + BulkResponse bulkResponse = client().bulk(bulkRequest).actionGet(); + assertFalse(bulkResponse.hasFailures()); + safeGet(indicesAdmin().refresh(new RefreshRequest(INDEX).indicesOptions(IndicesOptions.lenientExpandOpenHidden()))); + } + + public static String randomMessage() { + if (rarely()) { + return randomRealisticUnicodeOfCodepointLength(randomIntBetween(1, 100)); + } + + StringBuilder message = new StringBuilder(); + int numTokens = randomIntBetween(1, 30); + + if (randomBoolean()) { + message.append("[").append(randomTimestamp()).append("]"); + } + for (int i = 0; i < numTokens; i++) { + message.append(randomSeparator()); + + if (randomBoolean()) { + message.append(randomSentence()); + } else { + var token = randomFrom( + random(), + () -> randomRealisticUnicodeOfCodepointLength(randomIntBetween(1, 20)), + () -> UUID.randomUUID().toString(), + () -> randomIp(randomBoolean()), + PatternedTextVsMatchOnlyTextTests::randomTimestamp, + ESTestCase::randomInt, + ESTestCase::randomDouble + ); + + if (randomBoolean()) { + message.append("[").append(token).append("]"); + } else { + message.append(token); + } + } + } + return message.toString(); + } + + private static StringBuilder randomSentence() { + int words = randomIntBetween(1, 10); + StringBuilder text = new StringBuilder(); + for (int i = 0; i < words; i++) { + if (i > 0) { + text.append(" "); + } + text.append(randomAlphaOfLength(randomIntBetween(1, 10))); + } + return text; + } + + private static String randomSeparator() { + if (randomBoolean()) { + // Return spaces frequently since current token splitting is on spaces. + return " ".repeat(randomIntBetween(1, 10)); + } else { + return randomFrom("\t\n;:.',".split("")); + } + } + + private static String randomTimestamp() { + long millis = randomMillisUpToYear9999(); + ZonedDateTime zonedDateTime = ZonedDateTime.ofInstant(Instant.ofEpochMilli(millis), randomZone()); + DateFormatter formatter = DateFormatter.forPattern(randomDateFormatterPattern()).withLocale(randomLocale(random())); + return formatter.format(zonedDateTime); + } +}