From 6e36e5e7a5efb525f9f4434d377c9a5a7e370313 Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Mon, 11 Aug 2025 13:55:55 -0500 Subject: [PATCH 1/6] Partial version temp --- .../PatternedTextRandomTests.java | 179 ++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextRandomTests.java diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextRandomTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextRandomTests.java new file mode 100644 index 0000000000000..81a24cdd52020 --- /dev/null +++ b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextRandomTests.java @@ -0,0 +1,179 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.logsdb.patternedtext; + +import org.elasticsearch.ResourceNotFoundException; +import org.elasticsearch.action.ActionType; +import org.elasticsearch.action.DocWriteRequest; +import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; +import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; +import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; +import org.elasticsearch.action.bulk.BulkRequest; +import org.elasticsearch.action.bulk.BulkResponse; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.support.IndicesOptions; +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.time.DateFormatter; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.index.IndexMode; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.mapper.DateFieldMapper; +import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xcontent.json.JsonXContent; +import org.elasticsearch.xpack.core.security.action.apikey.CreateApiKeyAction; + +import java.io.IOException; +import java.time.Instant; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; +import java.util.Arrays; +import java.util.UUID; +import java.util.concurrent.TimeUnit; + +public class PatternedTextRandomTests extends ESIntegTestCase { + + + + public void test() throws IOException { + var settings = Settings.builder().put(IndexSettings.MODE.getKey(), IndexMode.LOGSDB.getName()); + String index = "test_index"; + var mappings = XContentFactory.jsonBuilder() + .startObject() + .startObject("properties") + .startObject("@timestamp") + .field("type", "date") + .endObject() + .startObject("field_patterned_text") + .field("type", "patterned_text") + .endObject() + .startObject("field_match_only_text") + .field("type", "match_only_text") + .endObject() + .endObject() + .endObject(); + + var createRequest = new CreateIndexRequest(index) + .settings(settings) + .mapping(mappings); + + var createResponse = safeGet(admin().indices().create(createRequest)); + assertTrue(createResponse.isAcknowledged()); + + BulkRequest bulkRequest = new BulkRequest(); + int numDocs = randomIntBetween(1, 300); + long timestamp = System.currentTimeMillis(); + for (int i = 0; i < numDocs; i++) { + timestamp += TimeUnit.SECONDS.toMillis(1); + String logMessage = randomMessage(); + var indexRequest = new IndexRequest(index).opType(DocWriteRequest.OpType.CREATE) + .source( + JsonXContent.contentBuilder() + .startObject() + .field("@timestamp", timestamp) + .field("field_patterned_text", logMessage) + .field("field_match_only_text", logMessage); + .endObject() + ); + bulkRequest.add(indexRequest); + } + BulkResponse bulkResponse = client().bulk(bulkRequest).actionGet(); + + client().index( + + ) + ); + safeGet( + indicesAdmin().refresh( + new RefreshRequest(".ds-" + dataStreamName + "*").indicesOptions(IndicesOptions.lenientExpandOpenHidden()) + ) + ); + + int numDocs = randomIntBetween(100, 1000); + + + new BulkRequest() + . + + for (int i = 0; i < numDocs; i++) { + + } + + } + + + + public static String randomMessage() { + if (rarely()) { + return randomRealisticUnicodeOfCodepointLength(randomIntBetween(1, 100)); + } + + StringBuilder message = new StringBuilder(); + int numTokens = randomIntBetween(1, 30); + + if (randomBoolean()) { + message.append("[").append(randomTimestamp()).append("]"); + } + for (int i = 0; i < numTokens; i++) { + message.append(randomSeparator()); + + if (randomBoolean()) { + message.append(randomSentence()); + } else { + var token = randomFrom( + random(), + () -> randomRealisticUnicodeOfCodepointLength(randomIntBetween(1, 20)), + () -> UUID.randomUUID().toString(), + () -> randomIp(randomBoolean()), + PatternedTextRandomTests::randomTimestamp, + ESTestCase::randomInt, + ESTestCase::randomDouble + ); + + if (randomBoolean()) { + message.append("[").append(token).append("]"); + } else { + message.append(token); + } + } + } + return message.toString(); + } + + private static StringBuilder randomSentence() { + int words = randomIntBetween(1, 10); + StringBuilder text = new StringBuilder(); + for (int i = 0; i < words; i++) { + if (i > 0) { + text.append(" "); + } + text.append(randomAlphaOfLength(randomIntBetween(1, 10))); + } + return text; + } + + private static String randomSeparator() { + if (randomBoolean()) { + // Return spaces frequently since current token splitting is on spaces. + return " ".repeat(randomIntBetween(1, 10)); + } else { + return randomFrom("\t\n;:.',".split("")); + } + } + + private static String randomTimestamp() { + long millis = randomMillisUpToYear9999(); + ZonedDateTime zonedDateTime = ZonedDateTime.ofInstant(Instant.ofEpochMilli(millis), randomZone()); + DateFormatter formatter = DateFormatter.forPattern(randomDateFormatterPattern()).withLocale(randomLocale(random())); + return formatter.format(zonedDateTime); + } +} From 826bbad45f34758662a488f232449004963b40d5 Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Mon, 11 Aug 2025 17:20:15 -0500 Subject: [PATCH 2/6] A full test that should in theory work --- .../PatternedTextRandomTests.java | 182 ++++++++++++++---- 1 file changed, 147 insertions(+), 35 deletions(-) diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextRandomTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextRandomTests.java index 81a24cdd52020..649368f9202a8 100644 --- a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextRandomTests.java +++ b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextRandomTests.java @@ -16,101 +16,213 @@ import org.elasticsearch.action.bulk.BulkRequest; import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.search.SearchRequestBuilder; +import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.support.IndicesOptions; import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.time.DateFormatter; +import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.core.Tuple; +import org.elasticsearch.datageneration.queries.LeafQueryGenerator; +import org.elasticsearch.datastreams.DataStreamsPlugin; import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.DateFieldMapper; +import org.elasticsearch.index.mapper.extras.MapperExtrasPlugin; +import org.elasticsearch.index.query.MatchQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.license.LicenseSettings; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.search.SearchHit; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.InternalSettingsPlugin; +import org.elasticsearch.test.hamcrest.ElasticsearchAssertions; +import org.elasticsearch.test.transport.MockTransportService; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xcontent.json.JsonXContent; +import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin; +import org.elasticsearch.xpack.core.XPackPlugin; import org.elasticsearch.xpack.core.security.action.apikey.CreateApiKeyAction; +import org.elasticsearch.xpack.logsdb.LogsDBPlugin; +import org.junit.Before; import java.io.IOException; import java.time.Instant; import java.time.ZoneOffset; import java.time.ZonedDateTime; -import java.util.Arrays; -import java.util.UUID; +import java.util.*; import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures; public class PatternedTextRandomTests extends ESIntegTestCase { + @Override + protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal, otherSettings)) +// .put("xpack.license.self_generated.type", "trial") +// .put("cluster.logsdb.enabled", "true") + .put(LicenseSettings.SELF_GENERATED_LICENSE_TYPE.getKey(), "trial") + .build(); + } + + @Override + protected Collection> nodePlugins() { +// return Arrays.asList(LocalStateCompositeXPackPlugin.class); + return Arrays.asList(MapperExtrasPlugin.class, LogsDBPlugin.class, LocalStateCompositeXPackPlugin.class); +// return CollectionUtils.appendToCopy(super.nodePlugins(), LogsDBPlugin.class, LocalStateCompositeXPackPlugin.class); +// return List.of(InternalSettingsPlugin.class, XPackPlugin.class, LogsDBPlugin.class, DataStreamsPlugin.class); + } + + private static final String INDEX = "test_index"; + private static final String MATCH_ONLY_TEXT_FIELD = "field_match_only_text"; + private static final String PATTERNED_TEXT_FIELD = "field_patterned_text"; + + @Before + public void setup() { + assumeTrue("Only when patterned_text feature flag is enabled", PatternedTextFieldMapper.PATTERNED_TEXT_MAPPER.isEnabled()); + } + public void test() throws IOException { - var settings = Settings.builder().put(IndexSettings.MODE.getKey(), IndexMode.LOGSDB.getName()); - String index = "test_index"; + var settings = Settings.builder(); +// var settings = Settings.builder().put(IndexSettings.MODE.getKey(), IndexMode.LOGSDB.getName()); var mappings = XContentFactory.jsonBuilder() .startObject() .startObject("properties") .startObject("@timestamp") .field("type", "date") .endObject() - .startObject("field_patterned_text") + .startObject(PATTERNED_TEXT_FIELD) .field("type", "patterned_text") .endObject() - .startObject("field_match_only_text") + .startObject(MATCH_ONLY_TEXT_FIELD) .field("type", "match_only_text") .endObject() .endObject() .endObject(); - var createRequest = new CreateIndexRequest(index) + var createRequest = new CreateIndexRequest(INDEX) .settings(settings) .mapping(mappings); var createResponse = safeGet(admin().indices().create(createRequest)); assertTrue(createResponse.isAcknowledged()); - BulkRequest bulkRequest = new BulkRequest(); int numDocs = randomIntBetween(1, 300); - long timestamp = System.currentTimeMillis(); - for (int i = 0; i < numDocs; i++) { - timestamp += TimeUnit.SECONDS.toMillis(1); - String logMessage = randomMessage(); - var indexRequest = new IndexRequest(index).opType(DocWriteRequest.OpType.CREATE) - .source( - JsonXContent.contentBuilder() - .startObject() - .field("@timestamp", timestamp) - .field("field_patterned_text", logMessage) - .field("field_match_only_text", logMessage); - .endObject() + List logMessages = generateMessages(numDocs); + indexDocs(logMessages); + + for (var message : logMessages) { + List queryTerms = randomQueryParts(message); + + var patternedTextQueries = generateQueries(PATTERNED_TEXT_FIELD, queryTerms); + var matchOnlyQueries = generateQueries(MATCH_ONLY_TEXT_FIELD, queryTerms); + + for (int i = 0; i < patternedTextQueries.size(); ++i) { + var ptQuery = patternedTextQueries.get(i); + var motQuery = matchOnlyQueries.get(i); + + var ptResponse = client().prepareSearch(INDEX).setQuery(ptQuery).setSize(numDocs).get(); + var motResponse = client().prepareSearch(INDEX).setQuery(motQuery).setSize(numDocs).get(); + + assertNoFailures(ptResponse); + assertNoFailures(motResponse); + +// assertTrue(motResponse.getHits().getTotalHits().value() > 0); + assertEquals( + motResponse.getHits().getTotalHits().value(), + ptResponse.getHits().getTotalHits().value() ); - bulkRequest.add(indexRequest); - } - BulkResponse bulkResponse = client().bulk(bulkRequest).actionGet(); - client().index( + var motDocIds = Arrays.stream(motResponse.getHits().getHits()).map(SearchHit::getId).collect(Collectors.toSet()); + var ptDocIds = Arrays.stream(ptResponse.getHits().getHits()).map(SearchHit::getId).collect(Collectors.toSet()); - ) - ); - safeGet( - indicesAdmin().refresh( - new RefreshRequest(".ds-" + dataStreamName + "*").indicesOptions(IndicesOptions.lenientExpandOpenHidden()) - ) - ); + assertEquals(motDocIds, ptDocIds); + } + } + } - int numDocs = randomIntBetween(100, 1000); + public List generateQueries(String field, List queryTerms) { + var results = new ArrayList(); + for (var queryTerm : queryTerms) { + results.add(QueryBuilders.termQuery(field, queryTerm)); + results.add(QueryBuilders.matchQuery(field, queryTerm)); + results.add(QueryBuilders.matchPhraseQuery(field, queryTerm)); + } - new BulkRequest() - . + return results; + } - for (int i = 0; i < numDocs; i++) { + private static List randomQueryParts(String value) { + var values = new ArrayList(); + var tokenizerRegex = "[\\s\\p{Punct}]+"; + List tokens = Arrays.stream(value.split(tokenizerRegex)).filter(t -> t.isEmpty() == false).toList(); + + // full value + values.add(value); + // random sub-phrase + values.add(randomSubstring(value)); + if (tokens.isEmpty() == false) { + // random term + values.add(randomFrom(tokens)); + // random sub-phrase + values.add(getSubPhrase(tokens)); } + return values; + } + + private static String randomSubstring(String value) { + int low = ESTestCase.randomIntBetween(0, value.length() - 1); + int hi = ESTestCase.randomIntBetween(low + 1, value.length()); + return value.substring(low, hi); + } + private static String getSubPhrase(List tokens) { + int low = ESTestCase.randomIntBetween(0, tokens.size() - 1); + int hi = ESTestCase.randomIntBetween(low + 1, tokens.size()); + return String.join(" ", tokens.subList(low, hi)); } + private List generateMessages(int numDocs) { + List logMessages = new ArrayList<>(); + for (int i = 0; i < numDocs; i++) { + logMessages.add(randomMessage()); + } + return logMessages; + } + private void indexDocs(List logMessages) throws IOException { + BulkRequest bulkRequest = new BulkRequest(); + long timestamp = System.currentTimeMillis(); + for (var msg : logMessages) { + timestamp += TimeUnit.SECONDS.toMillis(1); + var indexRequest = new IndexRequest(INDEX).opType(DocWriteRequest.OpType.CREATE) + .source( + JsonXContent.contentBuilder() + .startObject() + .field("@timestamp", timestamp) + .field("field_patterned_text", msg) + .field("field_match_only_text", msg) + .endObject() + ); + bulkRequest.add(indexRequest); + } + BulkResponse bulkResponse = client().bulk(bulkRequest).actionGet(); + assertFalse(bulkResponse.hasFailures()); + safeGet(indicesAdmin().refresh(new RefreshRequest(INDEX).indicesOptions(IndicesOptions.lenientExpandOpenHidden()))); + } public static String randomMessage() { if (rarely()) { From e82fc56d4858553e64465f39bcaa3c8034e3b62b Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Mon, 11 Aug 2025 17:47:10 -0500 Subject: [PATCH 3/6] working version --- .../PatternedTextRandomTests.java | 71 ++++++------------- 1 file changed, 22 insertions(+), 49 deletions(-) diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextRandomTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextRandomTests.java index 649368f9202a8..37fb5c1551c22 100644 --- a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextRandomTests.java +++ b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextRandomTests.java @@ -7,31 +7,16 @@ package org.elasticsearch.xpack.logsdb.patternedtext; -import org.elasticsearch.ResourceNotFoundException; -import org.elasticsearch.action.ActionType; import org.elasticsearch.action.DocWriteRequest; import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; -import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; import org.elasticsearch.action.bulk.BulkRequest; import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.action.index.IndexRequest; -import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.action.search.SearchRequestBuilder; -import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.support.IndicesOptions; -import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.time.DateFormatter; -import org.elasticsearch.common.util.CollectionUtils; -import org.elasticsearch.core.Tuple; -import org.elasticsearch.datageneration.queries.LeafQueryGenerator; -import org.elasticsearch.datastreams.DataStreamsPlugin; -import org.elasticsearch.index.IndexMode; -import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.index.mapper.extras.MapperExtrasPlugin; -import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.license.LicenseSettings; @@ -39,16 +24,9 @@ import org.elasticsearch.search.SearchHit; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.test.InternalSettingsPlugin; -import org.elasticsearch.test.hamcrest.ElasticsearchAssertions; -import org.elasticsearch.test.transport.MockTransportService; -import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; -import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xcontent.json.JsonXContent; import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin; -import org.elasticsearch.xpack.core.XPackPlugin; -import org.elasticsearch.xpack.core.security.action.apikey.CreateApiKeyAction; import org.elasticsearch.xpack.logsdb.LogsDBPlugin; import org.junit.Before; @@ -60,7 +38,8 @@ import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse; + public class PatternedTextRandomTests extends ESIntegTestCase { @@ -68,20 +47,13 @@ public class PatternedTextRandomTests extends ESIntegTestCase { protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { return Settings.builder() .put(super.nodeSettings(nodeOrdinal, otherSettings)) -// .put("xpack.license.self_generated.type", "trial") -// .put("cluster.logsdb.enabled", "true") .put(LicenseSettings.SELF_GENERATED_LICENSE_TYPE.getKey(), "trial") .build(); } - - @Override protected Collection> nodePlugins() { -// return Arrays.asList(LocalStateCompositeXPackPlugin.class); return Arrays.asList(MapperExtrasPlugin.class, LogsDBPlugin.class, LocalStateCompositeXPackPlugin.class); -// return CollectionUtils.appendToCopy(super.nodePlugins(), LogsDBPlugin.class, LocalStateCompositeXPackPlugin.class); -// return List.of(InternalSettingsPlugin.class, XPackPlugin.class, LogsDBPlugin.class, DataStreamsPlugin.class); } private static final String INDEX = "test_index"; @@ -93,9 +65,8 @@ public void setup() { assumeTrue("Only when patterned_text feature flag is enabled", PatternedTextFieldMapper.PATTERNED_TEXT_MAPPER.isEnabled()); } - public void test() throws IOException { + public void testQueries() throws IOException { var settings = Settings.builder(); -// var settings = Settings.builder().put(IndexSettings.MODE.getKey(), IndexMode.LOGSDB.getName()); var mappings = XContentFactory.jsonBuilder() .startObject() .startObject("properties") @@ -118,10 +89,12 @@ public void test() throws IOException { var createResponse = safeGet(admin().indices().create(createRequest)); assertTrue(createResponse.isAcknowledged()); - int numDocs = randomIntBetween(1, 300); + int numDocs = randomIntBetween(10, 1000); List logMessages = generateMessages(numDocs); indexDocs(logMessages); + int[] numQueriesWithResults = {0}; + int[] totalQueries = {0}; for (var message : logMessages) { List queryTerms = randomQueryParts(message); @@ -132,24 +105,24 @@ public void test() throws IOException { var ptQuery = patternedTextQueries.get(i); var motQuery = matchOnlyQueries.get(i); - var ptResponse = client().prepareSearch(INDEX).setQuery(ptQuery).setSize(numDocs).get(); - var motResponse = client().prepareSearch(INDEX).setQuery(motQuery).setSize(numDocs).get(); - - assertNoFailures(ptResponse); - assertNoFailures(motResponse); - -// assertTrue(motResponse.getHits().getTotalHits().value() > 0); - assertEquals( - motResponse.getHits().getTotalHits().value(), - ptResponse.getHits().getTotalHits().value() - ); - - var motDocIds = Arrays.stream(motResponse.getHits().getHits()).map(SearchHit::getId).collect(Collectors.toSet()); - var ptDocIds = Arrays.stream(ptResponse.getHits().getHits()).map(SearchHit::getId).collect(Collectors.toSet()); - - assertEquals(motDocIds, ptDocIds); + var ptRequest = client().prepareSearch(INDEX).setQuery(ptQuery).setSize(numDocs); + var motRequest = client().prepareSearch(INDEX).setQuery(motQuery).setSize(numDocs); + totalQueries[0]++; + assertNoFailuresAndResponse(ptRequest, ptResponse -> { + assertNoFailuresAndResponse(motRequest, motResponse -> { + assertEquals(motResponse.getHits().getTotalHits().value(), ptResponse.getHits().getTotalHits().value()); + + if (motResponse.getHits().getTotalHits().value() > 0) { + numQueriesWithResults[0]++; + } + var motDocIds = Arrays.stream(motResponse.getHits().getHits()).map(SearchHit::getId).collect(Collectors.toSet()); + var ptDocIds = Arrays.stream(ptResponse.getHits().getHits()).map(SearchHit::getId).collect(Collectors.toSet()); + assertEquals(motDocIds, ptDocIds); + }); + }); } } + System.out.println("num queries with results: " + numQueriesWithResults[0] + ", total: " + totalQueries[0]); } public List generateQueries(String field, List queryTerms) { From 911deed4fb42d1c8fa7644b6fcd86ec22f1daf5f Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Tue, 12 Aug 2025 10:06:34 -0500 Subject: [PATCH 4/6] cleanup --- .../PatternedTextRandomTests.java | 88 +++++++++---------- 1 file changed, 40 insertions(+), 48 deletions(-) diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextRandomTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextRandomTests.java index 37fb5c1551c22..66c8a85bad422 100644 --- a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextRandomTests.java +++ b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextRandomTests.java @@ -7,6 +7,8 @@ package org.elasticsearch.xpack.logsdb.patternedtext; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.elasticsearch.action.DocWriteRequest; import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; @@ -32,16 +34,22 @@ import java.io.IOException; import java.time.Instant; -import java.time.ZoneOffset; import java.time.ZonedDateTime; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.UUID; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse; public class PatternedTextRandomTests extends ESIntegTestCase { + private static final Logger logger = LogManager.getLogger(PatternedTextRandomTests.class); @Override protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { @@ -59,6 +67,15 @@ protected Collection> nodePlugins() { private static final String INDEX = "test_index"; private static final String MATCH_ONLY_TEXT_FIELD = "field_match_only_text"; private static final String PATTERNED_TEXT_FIELD = "field_patterned_text"; + private static final String MAPPING = """ + { + "properties": { + "@timestamp": { "type": "date" }, + "field_match_only_text": { "type": "match_only_text" }, + "field_patterned_text": { "type": "patterned_text" } + } + } + """; @Before public void setup() { @@ -66,92 +83,67 @@ public void setup() { } public void testQueries() throws IOException { - var settings = Settings.builder(); - var mappings = XContentFactory.jsonBuilder() - .startObject() - .startObject("properties") - .startObject("@timestamp") - .field("type", "date") - .endObject() - .startObject(PATTERNED_TEXT_FIELD) - .field("type", "patterned_text") - .endObject() - .startObject(MATCH_ONLY_TEXT_FIELD) - .field("type", "match_only_text") - .endObject() - .endObject() - .endObject(); - var createRequest = new CreateIndexRequest(INDEX) - .settings(settings) - .mapping(mappings); + .mapping(MAPPING); - var createResponse = safeGet(admin().indices().create(createRequest)); - assertTrue(createResponse.isAcknowledged()); + assertAcked(admin().indices().create(createRequest)); - int numDocs = randomIntBetween(10, 1000); + int numDocs = randomIntBetween(10, 200); List logMessages = generateMessages(numDocs); indexDocs(logMessages); - int[] numQueriesWithResults = {0}; - int[] totalQueries = {0}; + var numQueriesWithResults = new AtomicInteger(0); + var numQueriesTotal = new AtomicInteger(0); for (var message : logMessages) { - List queryTerms = randomQueryParts(message); - + List queryTerms = randomQueryValues(message); var patternedTextQueries = generateQueries(PATTERNED_TEXT_FIELD, queryTerms); var matchOnlyQueries = generateQueries(MATCH_ONLY_TEXT_FIELD, queryTerms); for (int i = 0; i < patternedTextQueries.size(); ++i) { - var ptQuery = patternedTextQueries.get(i); - var motQuery = matchOnlyQueries.get(i); + var ptRequest = client().prepareSearch(INDEX).setQuery(patternedTextQueries.get(i)); + var motRequest = client().prepareSearch(INDEX).setQuery(matchOnlyQueries.get(i)); - var ptRequest = client().prepareSearch(INDEX).setQuery(ptQuery).setSize(numDocs); - var motRequest = client().prepareSearch(INDEX).setQuery(motQuery).setSize(numDocs); - totalQueries[0]++; + numQueriesTotal.incrementAndGet(); assertNoFailuresAndResponse(ptRequest, ptResponse -> { assertNoFailuresAndResponse(motRequest, motResponse -> { + assertEquals(motResponse.getHits().getTotalHits().value(), ptResponse.getHits().getTotalHits().value()); - if (motResponse.getHits().getTotalHits().value() > 0) { - numQueriesWithResults[0]++; - } var motDocIds = Arrays.stream(motResponse.getHits().getHits()).map(SearchHit::getId).collect(Collectors.toSet()); var ptDocIds = Arrays.stream(ptResponse.getHits().getHits()).map(SearchHit::getId).collect(Collectors.toSet()); assertEquals(motDocIds, ptDocIds); + + if (motResponse.getHits().getTotalHits().value() > 0) { + numQueriesWithResults.incrementAndGet(); + } }); }); } } - System.out.println("num queries with results: " + numQueriesWithResults[0] + ", total: " + totalQueries[0]); + logger.info("Ran {} queries, of which {} had matches", numQueriesTotal.get(), numQueriesWithResults.get()); } - public List generateQueries(String field, List queryTerms) { + private List generateQueries(String field, List queryTerms) { var results = new ArrayList(); - for (var queryTerm : queryTerms) { results.add(QueryBuilders.termQuery(field, queryTerm)); results.add(QueryBuilders.matchQuery(field, queryTerm)); results.add(QueryBuilders.matchPhraseQuery(field, queryTerm)); } - return results; } - private static List randomQueryParts(String value) { + private static List randomQueryValues(String value) { var values = new ArrayList(); - var tokenizerRegex = "[\\s\\p{Punct}]+"; - List tokens = Arrays.stream(value.split(tokenizerRegex)).filter(t -> t.isEmpty() == false).toList(); - // full value values.add(value); - // random sub-phrase values.add(randomSubstring(value)); + var tokenizerRegex = "[\\s\\p{Punct}]+"; + List tokens = Arrays.stream(value.split(tokenizerRegex)).filter(t -> t.isEmpty() == false).toList(); if (tokens.isEmpty() == false) { - // random term values.add(randomFrom(tokens)); - // random sub-phrase - values.add(getSubPhrase(tokens)); + values.add(randomSubPhrase(tokens)); } return values; } @@ -162,7 +154,7 @@ private static String randomSubstring(String value) { return value.substring(low, hi); } - private static String getSubPhrase(List tokens) { + private static String randomSubPhrase(List tokens) { int low = ESTestCase.randomIntBetween(0, tokens.size() - 1); int hi = ESTestCase.randomIntBetween(low + 1, tokens.size()); return String.join(" ", tokens.subList(low, hi)); From 8f491ccd8491f0d8e9756dd3c5663bc02ca17545 Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Tue, 12 Aug 2025 10:57:38 -0500 Subject: [PATCH 5/6] Use randmom messages in synth source tests --- .../PatternedTextFieldMapperTests.java | 22 +----- ...=> PatternedTextVsMatchOnlyTextTests.java} | 73 ++++++++++--------- 2 files changed, 41 insertions(+), 54 deletions(-) rename x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/{PatternedTextRandomTests.java => PatternedTextVsMatchOnlyTextTests.java} (74%) diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldMapperTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldMapperTests.java index 8f56159355bbe..f61b8f7f8078a 100644 --- a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldMapperTests.java +++ b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldMapperTests.java @@ -24,7 +24,6 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Tuple; -import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.LuceneDocument; @@ -46,7 +45,6 @@ import java.util.Collection; import java.util.Collections; import java.util.List; -import java.util.UUID; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -253,25 +251,7 @@ public SyntheticSourceExample example(int maxValues) { } private Tuple generateValue() { - StringBuilder builder = new StringBuilder(); - if (randomBoolean()) { - builder.append(randomAlphaOfLength(5)); - } else { - String timestamp = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.formatMillis(System.currentTimeMillis()); - builder.append(timestamp); - } - for (int i = 0; i < randomIntBetween(0, 9); i++) { - builder.append(" "); - int rand = randomIntBetween(0, 4); - switch (rand) { - case 0 -> builder.append(randomAlphaOfLength(5)); - case 1 -> builder.append(randomAlphanumericOfLength(5)); - case 2 -> builder.append(UUID.randomUUID()); - case 3 -> builder.append(randomIp(true)); - case 4 -> builder.append(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.formatMillis(randomMillisUpToYear9999())); - } - } - String value = builder.toString(); + var value = PatternedTextVsMatchOnlyTextTests.randomMessage(); return Tuple.tuple(value, value); } diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextRandomTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextVsMatchOnlyTextTests.java similarity index 74% rename from x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextRandomTests.java rename to x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextVsMatchOnlyTextTests.java index 66c8a85bad422..7f0c77d127521 100644 --- a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextRandomTests.java +++ b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextVsMatchOnlyTextTests.java @@ -26,7 +26,6 @@ import org.elasticsearch.search.SearchHit; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xcontent.json.JsonXContent; import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin; import org.elasticsearch.xpack.logsdb.LogsDBPlugin; @@ -42,14 +41,15 @@ import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.BiFunction; import java.util.stream.Collectors; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse; -public class PatternedTextRandomTests extends ESIntegTestCase { - private static final Logger logger = LogManager.getLogger(PatternedTextRandomTests.class); +public class PatternedTextVsMatchOnlyTextTests extends ESIntegTestCase { + private static final Logger logger = LogManager.getLogger(PatternedTextVsMatchOnlyTextTests.class); @Override protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { @@ -92,45 +92,52 @@ public void testQueries() throws IOException { List logMessages = generateMessages(numDocs); indexDocs(logMessages); + var queryTerms = logMessages.stream().flatMap(m -> randomQueryValues(m).stream()).toList(); + { + var ptQueries = buildQueries(PATTERNED_TEXT_FIELD, queryTerms, QueryBuilders::matchPhraseQuery); + var motQueries = buildQueries(MATCH_ONLY_TEXT_FIELD, queryTerms, QueryBuilders::matchPhraseQuery); + assertQueryResults(ptQueries, motQueries, numDocs, "phrase"); + } + { + var ptQueries = buildQueries(PATTERNED_TEXT_FIELD, queryTerms, QueryBuilders::matchQuery); + var motQueries = buildQueries(MATCH_ONLY_TEXT_FIELD, queryTerms, QueryBuilders::matchQuery); + assertQueryResults(ptQueries, motQueries, numDocs, "match"); + } + { + var ptQueries = buildQueries(PATTERNED_TEXT_FIELD, queryTerms, QueryBuilders::termQuery); + var motQueries = buildQueries(MATCH_ONLY_TEXT_FIELD, queryTerms, QueryBuilders::termQuery); + assertQueryResults(ptQueries, motQueries, numDocs, "term"); + } + } + + private void assertQueryResults(List patternedTextQueries, List matchOnlyTextQueries, int numDocs, String queryType) { var numQueriesWithResults = new AtomicInteger(0); var numQueriesTotal = new AtomicInteger(0); - for (var message : logMessages) { - List queryTerms = randomQueryValues(message); - var patternedTextQueries = generateQueries(PATTERNED_TEXT_FIELD, queryTerms); - var matchOnlyQueries = generateQueries(MATCH_ONLY_TEXT_FIELD, queryTerms); - - for (int i = 0; i < patternedTextQueries.size(); ++i) { - var ptRequest = client().prepareSearch(INDEX).setQuery(patternedTextQueries.get(i)); - var motRequest = client().prepareSearch(INDEX).setQuery(matchOnlyQueries.get(i)); + for (int i = 0; i < patternedTextQueries.size(); ++i) { + var ptRequest = client().prepareSearch(INDEX).setQuery(patternedTextQueries.get(i)).setSize(numDocs); + var motRequest = client().prepareSearch(INDEX).setQuery(matchOnlyTextQueries.get(i)).setSize(numDocs); - numQueriesTotal.incrementAndGet(); - assertNoFailuresAndResponse(ptRequest, ptResponse -> { - assertNoFailuresAndResponse(motRequest, motResponse -> { + numQueriesTotal.incrementAndGet(); + assertNoFailuresAndResponse(ptRequest, ptResponse -> { + assertNoFailuresAndResponse(motRequest, motResponse -> { - assertEquals(motResponse.getHits().getTotalHits().value(), ptResponse.getHits().getTotalHits().value()); + assertEquals(motResponse.getHits().getTotalHits().value(), ptResponse.getHits().getTotalHits().value()); - var motDocIds = Arrays.stream(motResponse.getHits().getHits()).map(SearchHit::getId).collect(Collectors.toSet()); - var ptDocIds = Arrays.stream(ptResponse.getHits().getHits()).map(SearchHit::getId).collect(Collectors.toSet()); - assertEquals(motDocIds, ptDocIds); + var motDocIds = Arrays.stream(motResponse.getHits().getHits()).map(SearchHit::getId).collect(Collectors.toSet()); + var ptDocIds = Arrays.stream(ptResponse.getHits().getHits()).map(SearchHit::getId).collect(Collectors.toSet()); + assertEquals(motDocIds, ptDocIds); - if (motResponse.getHits().getTotalHits().value() > 0) { - numQueriesWithResults.incrementAndGet(); - } - }); + if (motResponse.getHits().getTotalHits().value() > 0) { + numQueriesWithResults.incrementAndGet(); + } }); - } + }); } - logger.info("Ran {} queries, of which {} had matches", numQueriesTotal.get(), numQueriesWithResults.get()); + logger.info("Ran {} {} queries, of which {} had matches", numQueriesTotal.get(), queryType, numQueriesWithResults.get()); } - private List generateQueries(String field, List queryTerms) { - var results = new ArrayList(); - for (var queryTerm : queryTerms) { - results.add(QueryBuilders.termQuery(field, queryTerm)); - results.add(QueryBuilders.matchQuery(field, queryTerm)); - results.add(QueryBuilders.matchPhraseQuery(field, queryTerm)); - } - return results; + private List buildQueries(String field, List terms, BiFunction queryBuilder) { + return terms.stream().map(t -> queryBuilder.apply(field, t)).toList(); } private static List randomQueryValues(String value) { @@ -211,7 +218,7 @@ public static String randomMessage() { () -> randomRealisticUnicodeOfCodepointLength(randomIntBetween(1, 20)), () -> UUID.randomUUID().toString(), () -> randomIp(randomBoolean()), - PatternedTextRandomTests::randomTimestamp, + PatternedTextVsMatchOnlyTextTests::randomTimestamp, ESTestCase::randomInt, ESTestCase::randomDouble ); From bca12129f90cee9fbb6892b88b01fdc97980638c Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 12 Aug 2025 16:10:28 +0000 Subject: [PATCH 6/6] [CI] Auto commit changes from spotless --- .../PatternedTextVsMatchOnlyTextTests.java | 33 ++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextVsMatchOnlyTextTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextVsMatchOnlyTextTests.java index 7f0c77d127521..89185b1be8f11 100644 --- a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextVsMatchOnlyTextTests.java +++ b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextVsMatchOnlyTextTests.java @@ -47,16 +47,15 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse; - public class PatternedTextVsMatchOnlyTextTests extends ESIntegTestCase { private static final Logger logger = LogManager.getLogger(PatternedTextVsMatchOnlyTextTests.class); @Override protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { return Settings.builder() - .put(super.nodeSettings(nodeOrdinal, otherSettings)) - .put(LicenseSettings.SELF_GENERATED_LICENSE_TYPE.getKey(), "trial") - .build(); + .put(super.nodeSettings(nodeOrdinal, otherSettings)) + .put(LicenseSettings.SELF_GENERATED_LICENSE_TYPE.getKey(), "trial") + .build(); } @Override @@ -68,14 +67,14 @@ protected Collection> nodePlugins() { private static final String MATCH_ONLY_TEXT_FIELD = "field_match_only_text"; private static final String PATTERNED_TEXT_FIELD = "field_patterned_text"; private static final String MAPPING = """ - { - "properties": { - "@timestamp": { "type": "date" }, - "field_match_only_text": { "type": "match_only_text" }, - "field_patterned_text": { "type": "patterned_text" } - } - } - """; + { + "properties": { + "@timestamp": { "type": "date" }, + "field_match_only_text": { "type": "match_only_text" }, + "field_patterned_text": { "type": "patterned_text" } + } + } + """; @Before public void setup() { @@ -83,8 +82,7 @@ public void setup() { } public void testQueries() throws IOException { - var createRequest = new CreateIndexRequest(INDEX) - .mapping(MAPPING); + var createRequest = new CreateIndexRequest(INDEX).mapping(MAPPING); assertAcked(admin().indices().create(createRequest)); @@ -110,7 +108,12 @@ public void testQueries() throws IOException { } } - private void assertQueryResults(List patternedTextQueries, List matchOnlyTextQueries, int numDocs, String queryType) { + private void assertQueryResults( + List patternedTextQueries, + List matchOnlyTextQueries, + int numDocs, + String queryType + ) { var numQueriesWithResults = new AtomicInteger(0); var numQueriesTotal = new AtomicInteger(0); for (int i = 0; i < patternedTextQueries.size(); ++i) {