Skip to content

Commit 3c5aaee

Browse files
parkertimminsjavanna
authored andcommitted
Test that random queries against patterned_text and match_only_text produce equal results (#132749)
Add a test that verifies that patterned_text and match_only_text fields behave the same. The test create random documents contains a patterned_text field and a match_only_text field, with the same value. Then it generates random phrase, match, and term queries from the field values and queries the index. It then verifies that the same set of ids is returns by the patterned_text field queries as by the match_only_text field queries.
1 parent 4ace824 commit 3c5aaee

File tree

2 files changed

+267
-21
lines changed

2 files changed

+267
-21
lines changed

x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldMapperTests.java

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import org.elasticsearch.common.Strings;
2525
import org.elasticsearch.common.settings.Settings;
2626
import org.elasticsearch.core.Tuple;
27-
import org.elasticsearch.index.mapper.DateFieldMapper;
2827
import org.elasticsearch.index.mapper.DocumentMapper;
2928
import org.elasticsearch.index.mapper.KeywordFieldMapper;
3029
import org.elasticsearch.index.mapper.LuceneDocument;
@@ -46,7 +45,6 @@
4645
import java.util.Collection;
4746
import java.util.Collections;
4847
import java.util.List;
49-
import java.util.UUID;
5048

5149
import static org.hamcrest.Matchers.containsString;
5250
import static org.hamcrest.Matchers.equalTo;
@@ -253,25 +251,7 @@ public SyntheticSourceExample example(int maxValues) {
253251
}
254252

255253
private Tuple<String, String> generateValue() {
256-
StringBuilder builder = new StringBuilder();
257-
if (randomBoolean()) {
258-
builder.append(randomAlphaOfLength(5));
259-
} else {
260-
String timestamp = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.formatMillis(System.currentTimeMillis());
261-
builder.append(timestamp);
262-
}
263-
for (int i = 0; i < randomIntBetween(0, 9); i++) {
264-
builder.append(" ");
265-
int rand = randomIntBetween(0, 4);
266-
switch (rand) {
267-
case 0 -> builder.append(randomAlphaOfLength(5));
268-
case 1 -> builder.append(randomAlphanumericOfLength(5));
269-
case 2 -> builder.append(UUID.randomUUID());
270-
case 3 -> builder.append(randomIp(true));
271-
case 4 -> builder.append(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.formatMillis(randomMillisUpToYear9999()));
272-
}
273-
}
274-
String value = builder.toString();
254+
var value = PatternedTextVsMatchOnlyTextTests.randomMessage();
275255
return Tuple.tuple(value, value);
276256
}
277257

Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.logsdb.patternedtext;
9+
10+
import org.apache.logging.log4j.LogManager;
11+
import org.apache.logging.log4j.Logger;
12+
import org.elasticsearch.action.DocWriteRequest;
13+
import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
14+
import org.elasticsearch.action.admin.indices.refresh.RefreshRequest;
15+
import org.elasticsearch.action.bulk.BulkRequest;
16+
import org.elasticsearch.action.bulk.BulkResponse;
17+
import org.elasticsearch.action.index.IndexRequest;
18+
import org.elasticsearch.action.support.IndicesOptions;
19+
import org.elasticsearch.common.settings.Settings;
20+
import org.elasticsearch.common.time.DateFormatter;
21+
import org.elasticsearch.index.mapper.extras.MapperExtrasPlugin;
22+
import org.elasticsearch.index.query.QueryBuilder;
23+
import org.elasticsearch.index.query.QueryBuilders;
24+
import org.elasticsearch.license.LicenseSettings;
25+
import org.elasticsearch.plugins.Plugin;
26+
import org.elasticsearch.search.SearchHit;
27+
import org.elasticsearch.test.ESIntegTestCase;
28+
import org.elasticsearch.test.ESTestCase;
29+
import org.elasticsearch.xcontent.json.JsonXContent;
30+
import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin;
31+
import org.elasticsearch.xpack.logsdb.LogsDBPlugin;
32+
import org.junit.Before;
33+
34+
import java.io.IOException;
35+
import java.time.Instant;
36+
import java.time.ZonedDateTime;
37+
import java.util.ArrayList;
38+
import java.util.Arrays;
39+
import java.util.Collection;
40+
import java.util.List;
41+
import java.util.UUID;
42+
import java.util.concurrent.TimeUnit;
43+
import java.util.concurrent.atomic.AtomicInteger;
44+
import java.util.function.BiFunction;
45+
import java.util.stream.Collectors;
46+
47+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
48+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse;
49+
50+
public class PatternedTextVsMatchOnlyTextTests extends ESIntegTestCase {
51+
private static final Logger logger = LogManager.getLogger(PatternedTextVsMatchOnlyTextTests.class);
52+
53+
@Override
54+
protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
55+
return Settings.builder()
56+
.put(super.nodeSettings(nodeOrdinal, otherSettings))
57+
.put(LicenseSettings.SELF_GENERATED_LICENSE_TYPE.getKey(), "trial")
58+
.build();
59+
}
60+
61+
@Override
62+
protected Collection<Class<? extends Plugin>> nodePlugins() {
63+
return Arrays.asList(MapperExtrasPlugin.class, LogsDBPlugin.class, LocalStateCompositeXPackPlugin.class);
64+
}
65+
66+
private static final String INDEX = "test_index";
67+
private static final String MATCH_ONLY_TEXT_FIELD = "field_match_only_text";
68+
private static final String PATTERNED_TEXT_FIELD = "field_patterned_text";
69+
private static final String MAPPING = """
70+
{
71+
"properties": {
72+
"@timestamp": { "type": "date" },
73+
"field_match_only_text": { "type": "match_only_text" },
74+
"field_patterned_text": { "type": "patterned_text" }
75+
}
76+
}
77+
""";
78+
79+
@Before
80+
public void setup() {
81+
assumeTrue("Only when patterned_text feature flag is enabled", PatternedTextFieldMapper.PATTERNED_TEXT_MAPPER.isEnabled());
82+
}
83+
84+
public void testQueries() throws IOException {
85+
var createRequest = new CreateIndexRequest(INDEX).mapping(MAPPING);
86+
87+
assertAcked(admin().indices().create(createRequest));
88+
89+
int numDocs = randomIntBetween(10, 200);
90+
List<String> logMessages = generateMessages(numDocs);
91+
indexDocs(logMessages);
92+
93+
var queryTerms = logMessages.stream().flatMap(m -> randomQueryValues(m).stream()).toList();
94+
{
95+
var ptQueries = buildQueries(PATTERNED_TEXT_FIELD, queryTerms, QueryBuilders::matchPhraseQuery);
96+
var motQueries = buildQueries(MATCH_ONLY_TEXT_FIELD, queryTerms, QueryBuilders::matchPhraseQuery);
97+
assertQueryResults(ptQueries, motQueries, numDocs, "phrase");
98+
}
99+
{
100+
var ptQueries = buildQueries(PATTERNED_TEXT_FIELD, queryTerms, QueryBuilders::matchQuery);
101+
var motQueries = buildQueries(MATCH_ONLY_TEXT_FIELD, queryTerms, QueryBuilders::matchQuery);
102+
assertQueryResults(ptQueries, motQueries, numDocs, "match");
103+
}
104+
{
105+
var ptQueries = buildQueries(PATTERNED_TEXT_FIELD, queryTerms, QueryBuilders::termQuery);
106+
var motQueries = buildQueries(MATCH_ONLY_TEXT_FIELD, queryTerms, QueryBuilders::termQuery);
107+
assertQueryResults(ptQueries, motQueries, numDocs, "term");
108+
}
109+
}
110+
111+
private void assertQueryResults(
112+
List<QueryBuilder> patternedTextQueries,
113+
List<QueryBuilder> matchOnlyTextQueries,
114+
int numDocs,
115+
String queryType
116+
) {
117+
var numQueriesWithResults = new AtomicInteger(0);
118+
var numQueriesTotal = new AtomicInteger(0);
119+
for (int i = 0; i < patternedTextQueries.size(); ++i) {
120+
var ptRequest = client().prepareSearch(INDEX).setQuery(patternedTextQueries.get(i)).setSize(numDocs);
121+
var motRequest = client().prepareSearch(INDEX).setQuery(matchOnlyTextQueries.get(i)).setSize(numDocs);
122+
123+
numQueriesTotal.incrementAndGet();
124+
assertNoFailuresAndResponse(ptRequest, ptResponse -> {
125+
assertNoFailuresAndResponse(motRequest, motResponse -> {
126+
127+
assertEquals(motResponse.getHits().getTotalHits().value(), ptResponse.getHits().getTotalHits().value());
128+
129+
var motDocIds = Arrays.stream(motResponse.getHits().getHits()).map(SearchHit::getId).collect(Collectors.toSet());
130+
var ptDocIds = Arrays.stream(ptResponse.getHits().getHits()).map(SearchHit::getId).collect(Collectors.toSet());
131+
assertEquals(motDocIds, ptDocIds);
132+
133+
if (motResponse.getHits().getTotalHits().value() > 0) {
134+
numQueriesWithResults.incrementAndGet();
135+
}
136+
});
137+
});
138+
}
139+
logger.info("Ran {} {} queries, of which {} had matches", numQueriesTotal.get(), queryType, numQueriesWithResults.get());
140+
}
141+
142+
private List<QueryBuilder> buildQueries(String field, List<String> terms, BiFunction<String, Object, QueryBuilder> queryBuilder) {
143+
return terms.stream().map(t -> queryBuilder.apply(field, t)).toList();
144+
}
145+
146+
private static List<String> randomQueryValues(String value) {
147+
var values = new ArrayList<String>();
148+
149+
values.add(value);
150+
values.add(randomSubstring(value));
151+
152+
var tokenizerRegex = "[\\s\\p{Punct}]+";
153+
List<String> tokens = Arrays.stream(value.split(tokenizerRegex)).filter(t -> t.isEmpty() == false).toList();
154+
if (tokens.isEmpty() == false) {
155+
values.add(randomFrom(tokens));
156+
values.add(randomSubPhrase(tokens));
157+
}
158+
return values;
159+
}
160+
161+
private static String randomSubstring(String value) {
162+
int low = ESTestCase.randomIntBetween(0, value.length() - 1);
163+
int hi = ESTestCase.randomIntBetween(low + 1, value.length());
164+
return value.substring(low, hi);
165+
}
166+
167+
private static String randomSubPhrase(List<String> tokens) {
168+
int low = ESTestCase.randomIntBetween(0, tokens.size() - 1);
169+
int hi = ESTestCase.randomIntBetween(low + 1, tokens.size());
170+
return String.join(" ", tokens.subList(low, hi));
171+
}
172+
173+
private List<String> generateMessages(int numDocs) {
174+
List<String> logMessages = new ArrayList<>();
175+
for (int i = 0; i < numDocs; i++) {
176+
logMessages.add(randomMessage());
177+
}
178+
return logMessages;
179+
}
180+
181+
private void indexDocs(List<String> logMessages) throws IOException {
182+
BulkRequest bulkRequest = new BulkRequest();
183+
long timestamp = System.currentTimeMillis();
184+
for (var msg : logMessages) {
185+
timestamp += TimeUnit.SECONDS.toMillis(1);
186+
var indexRequest = new IndexRequest(INDEX).opType(DocWriteRequest.OpType.CREATE)
187+
.source(
188+
JsonXContent.contentBuilder()
189+
.startObject()
190+
.field("@timestamp", timestamp)
191+
.field("field_patterned_text", msg)
192+
.field("field_match_only_text", msg)
193+
.endObject()
194+
);
195+
bulkRequest.add(indexRequest);
196+
}
197+
BulkResponse bulkResponse = client().bulk(bulkRequest).actionGet();
198+
assertFalse(bulkResponse.hasFailures());
199+
safeGet(indicesAdmin().refresh(new RefreshRequest(INDEX).indicesOptions(IndicesOptions.lenientExpandOpenHidden())));
200+
}
201+
202+
public static String randomMessage() {
203+
if (rarely()) {
204+
return randomRealisticUnicodeOfCodepointLength(randomIntBetween(1, 100));
205+
}
206+
207+
StringBuilder message = new StringBuilder();
208+
int numTokens = randomIntBetween(1, 30);
209+
210+
if (randomBoolean()) {
211+
message.append("[").append(randomTimestamp()).append("]");
212+
}
213+
for (int i = 0; i < numTokens; i++) {
214+
message.append(randomSeparator());
215+
216+
if (randomBoolean()) {
217+
message.append(randomSentence());
218+
} else {
219+
var token = randomFrom(
220+
random(),
221+
() -> randomRealisticUnicodeOfCodepointLength(randomIntBetween(1, 20)),
222+
() -> UUID.randomUUID().toString(),
223+
() -> randomIp(randomBoolean()),
224+
PatternedTextVsMatchOnlyTextTests::randomTimestamp,
225+
ESTestCase::randomInt,
226+
ESTestCase::randomDouble
227+
);
228+
229+
if (randomBoolean()) {
230+
message.append("[").append(token).append("]");
231+
} else {
232+
message.append(token);
233+
}
234+
}
235+
}
236+
return message.toString();
237+
}
238+
239+
private static StringBuilder randomSentence() {
240+
int words = randomIntBetween(1, 10);
241+
StringBuilder text = new StringBuilder();
242+
for (int i = 0; i < words; i++) {
243+
if (i > 0) {
244+
text.append(" ");
245+
}
246+
text.append(randomAlphaOfLength(randomIntBetween(1, 10)));
247+
}
248+
return text;
249+
}
250+
251+
private static String randomSeparator() {
252+
if (randomBoolean()) {
253+
// Return spaces frequently since current token splitting is on spaces.
254+
return " ".repeat(randomIntBetween(1, 10));
255+
} else {
256+
return randomFrom("\t\n;:.',".split(""));
257+
}
258+
}
259+
260+
private static String randomTimestamp() {
261+
long millis = randomMillisUpToYear9999();
262+
ZonedDateTime zonedDateTime = ZonedDateTime.ofInstant(Instant.ofEpochMilli(millis), randomZone());
263+
DateFormatter formatter = DateFormatter.forPattern(randomDateFormatterPattern()).withLocale(randomLocale(random()));
264+
return formatter.format(zonedDateTime);
265+
}
266+
}

0 commit comments

Comments
 (0)