Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.mapper.DateFieldMapper;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.LuceneDocument;
Expand All @@ -46,7 +45,6 @@
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.UUID;

import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
Expand Down Expand Up @@ -253,25 +251,7 @@ public SyntheticSourceExample example(int maxValues) {
}

private Tuple<String, String> generateValue() {
StringBuilder builder = new StringBuilder();
if (randomBoolean()) {
builder.append(randomAlphaOfLength(5));
} else {
String timestamp = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.formatMillis(System.currentTimeMillis());
builder.append(timestamp);
}
for (int i = 0; i < randomIntBetween(0, 9); i++) {
builder.append(" ");
int rand = randomIntBetween(0, 4);
switch (rand) {
case 0 -> builder.append(randomAlphaOfLength(5));
case 1 -> builder.append(randomAlphanumericOfLength(5));
case 2 -> builder.append(UUID.randomUUID());
case 3 -> builder.append(randomIp(true));
case 4 -> builder.append(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.formatMillis(randomMillisUpToYear9999()));
}
}
String value = builder.toString();
var value = PatternedTextVsMatchOnlyTextTests.randomMessage();
return Tuple.tuple(value, value);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.logsdb.patternedtext;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.action.DocWriteRequest;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
import org.elasticsearch.action.admin.indices.refresh.RefreshRequest;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.support.IndicesOptions;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.time.DateFormatter;
import org.elasticsearch.index.mapper.extras.MapperExtrasPlugin;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.license.LicenseSettings;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xcontent.json.JsonXContent;
import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin;
import org.elasticsearch.xpack.logsdb.LogsDBPlugin;
import org.junit.Before;

import java.io.IOException;
import java.time.Instant;
import java.time.ZonedDateTime;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.BiFunction;
import java.util.stream.Collectors;

import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse;

public class PatternedTextVsMatchOnlyTextTests extends ESIntegTestCase {
private static final Logger logger = LogManager.getLogger(PatternedTextVsMatchOnlyTextTests.class);

@Override
protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
return Settings.builder()
.put(super.nodeSettings(nodeOrdinal, otherSettings))
.put(LicenseSettings.SELF_GENERATED_LICENSE_TYPE.getKey(), "trial")
.build();
}

@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Arrays.asList(MapperExtrasPlugin.class, LogsDBPlugin.class, LocalStateCompositeXPackPlugin.class);
}

private static final String INDEX = "test_index";
private static final String MATCH_ONLY_TEXT_FIELD = "field_match_only_text";
private static final String PATTERNED_TEXT_FIELD = "field_patterned_text";
private static final String MAPPING = """
{
"properties": {
"@timestamp": { "type": "date" },
"field_match_only_text": { "type": "match_only_text" },
"field_patterned_text": { "type": "patterned_text" }
}
}
""";

@Before
public void setup() {
assumeTrue("Only when patterned_text feature flag is enabled", PatternedTextFieldMapper.PATTERNED_TEXT_MAPPER.isEnabled());
}

public void testQueries() throws IOException {
var createRequest = new CreateIndexRequest(INDEX).mapping(MAPPING);

assertAcked(admin().indices().create(createRequest));

int numDocs = randomIntBetween(10, 200);
List<String> logMessages = generateMessages(numDocs);
indexDocs(logMessages);

var queryTerms = logMessages.stream().flatMap(m -> randomQueryValues(m).stream()).toList();
{
var ptQueries = buildQueries(PATTERNED_TEXT_FIELD, queryTerms, QueryBuilders::matchPhraseQuery);
var motQueries = buildQueries(MATCH_ONLY_TEXT_FIELD, queryTerms, QueryBuilders::matchPhraseQuery);
assertQueryResults(ptQueries, motQueries, numDocs, "phrase");
}
{
var ptQueries = buildQueries(PATTERNED_TEXT_FIELD, queryTerms, QueryBuilders::matchQuery);
var motQueries = buildQueries(MATCH_ONLY_TEXT_FIELD, queryTerms, QueryBuilders::matchQuery);
assertQueryResults(ptQueries, motQueries, numDocs, "match");
}
{
var ptQueries = buildQueries(PATTERNED_TEXT_FIELD, queryTerms, QueryBuilders::termQuery);
var motQueries = buildQueries(MATCH_ONLY_TEXT_FIELD, queryTerms, QueryBuilders::termQuery);
assertQueryResults(ptQueries, motQueries, numDocs, "term");
}
}

private void assertQueryResults(
List<QueryBuilder> patternedTextQueries,
List<QueryBuilder> matchOnlyTextQueries,
int numDocs,
String queryType
) {
var numQueriesWithResults = new AtomicInteger(0);
var numQueriesTotal = new AtomicInteger(0);
for (int i = 0; i < patternedTextQueries.size(); ++i) {
var ptRequest = client().prepareSearch(INDEX).setQuery(patternedTextQueries.get(i)).setSize(numDocs);
var motRequest = client().prepareSearch(INDEX).setQuery(matchOnlyTextQueries.get(i)).setSize(numDocs);

numQueriesTotal.incrementAndGet();
assertNoFailuresAndResponse(ptRequest, ptResponse -> {
assertNoFailuresAndResponse(motRequest, motResponse -> {

assertEquals(motResponse.getHits().getTotalHits().value(), ptResponse.getHits().getTotalHits().value());

var motDocIds = Arrays.stream(motResponse.getHits().getHits()).map(SearchHit::getId).collect(Collectors.toSet());
var ptDocIds = Arrays.stream(ptResponse.getHits().getHits()).map(SearchHit::getId).collect(Collectors.toSet());
assertEquals(motDocIds, ptDocIds);

if (motResponse.getHits().getTotalHits().value() > 0) {
numQueriesWithResults.incrementAndGet();
}
});
});
}
logger.info("Ran {} {} queries, of which {} had matches", numQueriesTotal.get(), queryType, numQueriesWithResults.get());
}

private List<QueryBuilder> buildQueries(String field, List<String> terms, BiFunction<String, Object, QueryBuilder> queryBuilder) {
return terms.stream().map(t -> queryBuilder.apply(field, t)).toList();
}

private static List<String> randomQueryValues(String value) {
var values = new ArrayList<String>();

values.add(value);
values.add(randomSubstring(value));

var tokenizerRegex = "[\\s\\p{Punct}]+";
List<String> tokens = Arrays.stream(value.split(tokenizerRegex)).filter(t -> t.isEmpty() == false).toList();
if (tokens.isEmpty() == false) {
values.add(randomFrom(tokens));
values.add(randomSubPhrase(tokens));
}
return values;
}

private static String randomSubstring(String value) {
int low = ESTestCase.randomIntBetween(0, value.length() - 1);
int hi = ESTestCase.randomIntBetween(low + 1, value.length());
return value.substring(low, hi);
}

private static String randomSubPhrase(List<String> tokens) {
int low = ESTestCase.randomIntBetween(0, tokens.size() - 1);
int hi = ESTestCase.randomIntBetween(low + 1, tokens.size());
return String.join(" ", tokens.subList(low, hi));
}

private List<String> generateMessages(int numDocs) {
List<String> logMessages = new ArrayList<>();
for (int i = 0; i < numDocs; i++) {
logMessages.add(randomMessage());
}
return logMessages;
}

private void indexDocs(List<String> logMessages) throws IOException {
BulkRequest bulkRequest = new BulkRequest();
long timestamp = System.currentTimeMillis();
for (var msg : logMessages) {
timestamp += TimeUnit.SECONDS.toMillis(1);
var indexRequest = new IndexRequest(INDEX).opType(DocWriteRequest.OpType.CREATE)
.source(
JsonXContent.contentBuilder()
.startObject()
.field("@timestamp", timestamp)
.field("field_patterned_text", msg)
.field("field_match_only_text", msg)
.endObject()
);
bulkRequest.add(indexRequest);
}
BulkResponse bulkResponse = client().bulk(bulkRequest).actionGet();
assertFalse(bulkResponse.hasFailures());
safeGet(indicesAdmin().refresh(new RefreshRequest(INDEX).indicesOptions(IndicesOptions.lenientExpandOpenHidden())));
}

public static String randomMessage() {
if (rarely()) {
return randomRealisticUnicodeOfCodepointLength(randomIntBetween(1, 100));
}

StringBuilder message = new StringBuilder();
int numTokens = randomIntBetween(1, 30);

if (randomBoolean()) {
message.append("[").append(randomTimestamp()).append("]");
}
for (int i = 0; i < numTokens; i++) {
message.append(randomSeparator());

if (randomBoolean()) {
message.append(randomSentence());
} else {
var token = randomFrom(
random(),
() -> randomRealisticUnicodeOfCodepointLength(randomIntBetween(1, 20)),
() -> UUID.randomUUID().toString(),
() -> randomIp(randomBoolean()),
PatternedTextVsMatchOnlyTextTests::randomTimestamp,
ESTestCase::randomInt,
ESTestCase::randomDouble
);

if (randomBoolean()) {
message.append("[").append(token).append("]");
} else {
message.append(token);
}
}
}
return message.toString();
}

private static StringBuilder randomSentence() {
int words = randomIntBetween(1, 10);
StringBuilder text = new StringBuilder();
for (int i = 0; i < words; i++) {
if (i > 0) {
text.append(" ");
}
text.append(randomAlphaOfLength(randomIntBetween(1, 10)));
}
return text;
}

private static String randomSeparator() {
if (randomBoolean()) {
// Return spaces frequently since current token splitting is on spaces.
return " ".repeat(randomIntBetween(1, 10));
} else {
return randomFrom("\t\n;:.',".split(""));
}
}

private static String randomTimestamp() {
long millis = randomMillisUpToYear9999();
ZonedDateTime zonedDateTime = ZonedDateTime.ofInstant(Instant.ofEpochMilli(millis), randomZone());
DateFormatter formatter = DateFormatter.forPattern(randomDateFormatterPattern()).withLocale(randomLocale(random()));
return formatter.format(zonedDateTime);
}
}