Skip to content

Commit 8708977

Browse files
martijnvgmridula-s109
authored andcommitted
Change patterned_text analyzer from standard to delimiter. (elastic#134434)
Delimiter analyzer splits text based a pre-defined list of delimiters.
1 parent 1d20da5 commit 8708977

File tree

6 files changed

+266
-29
lines changed

6 files changed

+266
-29
lines changed
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.logsdb.patternedtext;
9+
10+
import org.apache.lucene.analysis.Analyzer;
11+
import org.apache.lucene.analysis.LowerCaseFilter;
12+
import org.apache.lucene.analysis.TokenStream;
13+
import org.apache.lucene.analysis.Tokenizer;
14+
import org.apache.lucene.analysis.pattern.PatternTokenizer;
15+
import org.elasticsearch.common.regex.Regex;
16+
import org.elasticsearch.index.analysis.AnalyzerScope;
17+
import org.elasticsearch.index.analysis.NamedAnalyzer;
18+
19+
import java.util.regex.Pattern;
20+
21+
/**
22+
* An analyzer that tokenizes text by a pre-defined list of delimiters that work well for log messages.
23+
* The pre-defined list of delimiters is: whitespace characters, =, ?, :, [, ], {, }, ", \, '
24+
*/
25+
public final class DelimiterAnalyzer extends Analyzer {
26+
27+
static final NamedAnalyzer INSTANCE = new NamedAnalyzer("delimiter", AnalyzerScope.GLOBAL, new DelimiterAnalyzer());
28+
29+
private final Pattern pattern;
30+
31+
private DelimiterAnalyzer() {
32+
this.pattern = Regex.compile("[\\s\\=\\?\\:\\[\\]\\{\\}\\\"\\\\\\']", null);
33+
}
34+
35+
@Override
36+
protected TokenStreamComponents createComponents(String s) {
37+
final Tokenizer tokenizer = new PatternTokenizer(pattern, -1);
38+
TokenStream stream = new LowerCaseFilter(tokenizer);
39+
return new TokenStreamComponents(tokenizer, stream);
40+
}
41+
42+
@Override
43+
protected TokenStream normalize(String fieldName, TokenStream in) {
44+
TokenStream stream = in;
45+
stream = new LowerCaseFilter(stream);
46+
return stream;
47+
}
48+
}

x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldMapper.java

Lines changed: 35 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
package org.elasticsearch.xpack.logsdb.patternedtext;
99

10+
import org.apache.lucene.analysis.standard.StandardAnalyzer;
1011
import org.apache.lucene.document.Field;
1112
import org.apache.lucene.document.FieldType;
1213
import org.apache.lucene.document.SortedSetDocValuesField;
@@ -18,7 +19,7 @@
1819
import org.elasticsearch.common.util.FeatureFlag;
1920
import org.elasticsearch.index.IndexSettings;
2021
import org.elasticsearch.index.IndexVersion;
21-
import org.elasticsearch.index.analysis.IndexAnalyzers;
22+
import org.elasticsearch.index.analysis.AnalyzerScope;
2223
import org.elasticsearch.index.analysis.NamedAnalyzer;
2324
import org.elasticsearch.index.mapper.CompositeSyntheticFieldLoader;
2425
import org.elasticsearch.index.mapper.DocumentParserContext;
@@ -46,6 +47,7 @@
4647
public class PatternedTextFieldMapper extends FieldMapper {
4748

4849
public static final FeatureFlag PATTERNED_TEXT_MAPPER = new FeatureFlag("patterned_text");
50+
private static final NamedAnalyzer STANDARD_ANALYZER = new NamedAnalyzer("standard", AnalyzerScope.GLOBAL, new StandardAnalyzer());
4951

5052
public static class Defaults {
5153
public static final FieldType FIELD_TYPE_DOCS;
@@ -77,39 +79,32 @@ public static class Builder extends FieldMapper.Builder {
7779
private final IndexVersion indexCreatedVersion;
7880
private final IndexSettings indexSettings;
7981
private final Parameter<Map<String, String>> meta = Parameter.metaParam();
80-
private final TextParams.Analyzers analyzers;
8182
private final Parameter<String> indexOptions = patternedTextIndexOptions(m -> ((PatternedTextFieldMapper) m).indexOptions);
83+
private final Parameter<NamedAnalyzer> analyzer;
8284

8385
public Builder(String name, MappingParserContext context) {
84-
this(name, context.indexVersionCreated(), context.getIndexSettings(), context.getIndexAnalyzers());
86+
this(name, context.indexVersionCreated(), context.getIndexSettings());
8587
}
8688

87-
public Builder(String name, IndexVersion indexCreatedVersion, IndexSettings indexSettings, IndexAnalyzers indexAnalyzers) {
89+
public Builder(String name, IndexVersion indexCreatedVersion, IndexSettings indexSettings) {
8890
super(name);
8991
this.indexCreatedVersion = indexCreatedVersion;
9092
this.indexSettings = indexSettings;
91-
this.analyzers = new TextParams.Analyzers(
92-
indexAnalyzers,
93-
m -> ((PatternedTextFieldMapper) m).indexAnalyzer,
94-
m -> ((PatternedTextFieldMapper) m).positionIncrementGap,
95-
indexCreatedVersion
96-
);
93+
this.analyzer = analyzerParam(name, m -> ((PatternedTextFieldMapper) m).analyzer);
9794
}
9895

9996
@Override
10097
protected Parameter<?>[] getParameters() {
101-
return new Parameter<?>[] { meta, indexOptions };
98+
return new Parameter<?>[] { meta, indexOptions, analyzer };
10299
}
103100

104101
private PatternedTextFieldType buildFieldType(FieldType fieldType, MapperBuilderContext context) {
105-
NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer();
106-
NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer();
107-
NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer();
108-
TextSearchInfo tsi = new TextSearchInfo(fieldType, null, searchAnalyzer, searchQuoteAnalyzer);
102+
NamedAnalyzer analyzer = this.analyzer.get();
103+
TextSearchInfo tsi = new TextSearchInfo(fieldType, null, analyzer, analyzer);
109104
return new PatternedTextFieldType(
110105
context.buildFullName(leafName()),
111106
tsi,
112-
indexAnalyzer,
107+
analyzer,
113108
context.isSourceSynthetic(),
114109
meta.getValue()
115110
);
@@ -134,6 +129,22 @@ private static Parameter<String> patternedTextIndexOptions(Function<FieldMapper,
134129
});
135130
}
136131

132+
private static Parameter<NamedAnalyzer> analyzerParam(String name, Function<FieldMapper, NamedAnalyzer> initializer) {
133+
return new Parameter<>("analyzer", false, () -> DelimiterAnalyzer.INSTANCE, (n, c, o) -> {
134+
String analyzerName = o.toString();
135+
switch (analyzerName) {
136+
case "standard":
137+
return STANDARD_ANALYZER;
138+
case "delimiter":
139+
return DelimiterAnalyzer.INSTANCE;
140+
default:
141+
throw new IllegalArgumentException(
142+
"unsupported analyzer [" + analyzerName + "] for field [" + name + "], supported analyzers are [standard, log]"
143+
);
144+
}
145+
}, initializer, (b, n, v) -> b.field(n, v.name()), NamedAnalyzer::name);
146+
}
147+
137148
@Override
138149
public PatternedTextFieldMapper build(MapperBuilderContext context) {
139150
FieldType fieldType = buildLuceneFieldType(indexOptions);
@@ -152,11 +163,9 @@ public PatternedTextFieldMapper build(MapperBuilderContext context) {
152163
public static final TypeParser PARSER = new TypeParser(Builder::new);
153164

154165
private final IndexVersion indexCreatedVersion;
155-
private final IndexAnalyzers indexAnalyzers;
156-
private final NamedAnalyzer indexAnalyzer;
166+
private final NamedAnalyzer analyzer;
157167
private final IndexSettings indexSettings;
158168
private final String indexOptions;
159-
private final int positionIncrementGap;
160169
private final FieldType fieldType;
161170
private final KeywordFieldMapper templateIdMapper;
162171

@@ -173,22 +182,20 @@ private PatternedTextFieldMapper(
173182
assert mappedFieldType.hasDocValues() == false;
174183
this.fieldType = fieldType;
175184
this.indexCreatedVersion = builder.indexCreatedVersion;
176-
this.indexAnalyzers = builder.analyzers.indexAnalyzers;
177-
this.indexAnalyzer = builder.analyzers.getIndexAnalyzer();
185+
this.analyzer = builder.analyzer.get();
178186
this.indexSettings = builder.indexSettings;
179187
this.indexOptions = builder.indexOptions.getValue();
180-
this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue();
181188
this.templateIdMapper = templateIdMapper;
182189
}
183190

184191
@Override
185192
public Map<String, NamedAnalyzer> indexAnalyzers() {
186-
return Map.of(mappedFieldType.name(), indexAnalyzer);
193+
return Map.of(mappedFieldType.name(), analyzer);
187194
}
188195

189196
@Override
190197
public FieldMapper.Builder getMergeBuilder() {
191-
return new Builder(leafName(), indexCreatedVersion, indexSettings, indexAnalyzers).init(this);
198+
return new Builder(leafName(), indexCreatedVersion, indexSettings).init(this);
192199
}
193200

194201
@Override
@@ -269,4 +276,8 @@ protected SyntheticSourceSupport syntheticSourceSupport() {
269276
)
270277
);
271278
}
279+
280+
NamedAnalyzer getAnalyzer() {
281+
return analyzer;
282+
}
272283
}

x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldType.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
import org.apache.lucene.util.BytesRef;
2626
import org.apache.lucene.util.IOFunction;
2727
import org.elasticsearch.common.CheckedIntFunction;
28-
import org.elasticsearch.common.lucene.Lucene;
2928
import org.elasticsearch.common.unit.Fuzziness;
3029
import org.elasticsearch.index.fielddata.FieldDataContext;
3130
import org.elasticsearch.index.fielddata.IndexFieldData;
@@ -81,10 +80,10 @@ public class PatternedTextFieldType extends StringFieldType {
8180
new TextSearchInfo(
8281
hasPositions ? PatternedTextFieldMapper.Defaults.FIELD_TYPE_POSITIONS : PatternedTextFieldMapper.Defaults.FIELD_TYPE_DOCS,
8382
null,
84-
Lucene.STANDARD_ANALYZER,
85-
Lucene.STANDARD_ANALYZER
83+
DelimiterAnalyzer.INSTANCE,
84+
DelimiterAnalyzer.INSTANCE
8685
),
87-
Lucene.STANDARD_ANALYZER,
86+
DelimiterAnalyzer.INSTANCE,
8887
syntheticSource,
8988
Collections.emptyMap()
9089
);

x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldMapperTests.java

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@
2222
import org.apache.lucene.tests.analysis.Token;
2323
import org.apache.lucene.tests.index.RandomIndexWriter;
2424
import org.elasticsearch.common.Strings;
25+
import org.elasticsearch.common.bytes.BytesReference;
2526
import org.elasticsearch.common.settings.Settings;
27+
import org.elasticsearch.common.xcontent.XContentHelper;
2628
import org.elasticsearch.core.Tuple;
2729
import org.elasticsearch.index.mapper.DocumentMapper;
2830
import org.elasticsearch.index.mapper.KeywordFieldMapper;
@@ -35,8 +37,11 @@
3537
import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
3638
import org.elasticsearch.index.query.SearchExecutionContext;
3739
import org.elasticsearch.plugins.Plugin;
40+
import org.elasticsearch.xcontent.ToXContent;
3841
import org.elasticsearch.xcontent.XContentBuilder;
3942
import org.elasticsearch.xcontent.XContentFactory;
43+
import org.elasticsearch.xcontent.XContentType;
44+
import org.elasticsearch.xcontent.json.JsonXContent;
4045
import org.elasticsearch.xpack.logsdb.LogsDBPlugin;
4146
import org.junit.AssumptionViolatedException;
4247
import org.junit.Before;
@@ -45,6 +50,7 @@
4550
import java.util.Collection;
4651
import java.util.Collections;
4752
import java.util.List;
53+
import java.util.Map;
4854

4955
import static org.hamcrest.Matchers.containsString;
5056
import static org.hamcrest.Matchers.equalTo;
@@ -276,6 +282,49 @@ public void testDocValuesSynthetic() throws IOException {
276282
assertScriptDocValues(mapper, "foo", equalTo(List.of("foo")));
277283
}
278284

285+
public void testAnalyzerAttributeDefault() throws IOException {
286+
MapperService mapper = createMapperService(fieldMapping(b -> b.field("type", "patterned_text")));
287+
var fieldMapper = (PatternedTextFieldMapper) mapper.mappingLookup().getMapper("field");
288+
XContentBuilder builder = JsonXContent.contentBuilder().startObject();
289+
fieldMapper.toXContent(builder, ToXContent.EMPTY_PARAMS);
290+
builder.endObject();
291+
var result = (Map<?, ?>) XContentHelper.convertToMap(BytesReference.bytes(builder), false, XContentType.JSON).v2().get("field");
292+
assertThat(result.size(), equalTo(1));
293+
assertThat(result.get("type"), equalTo("patterned_text"));
294+
}
295+
296+
public void testAnalyzerAttributeStandard() throws IOException {
297+
MapperService mapper = createMapperService(fieldMapping(b -> b.field("type", "patterned_text").field("analyzer", "standard")));
298+
var fieldMapper = (PatternedTextFieldMapper) mapper.mappingLookup().getMapper("field");
299+
XContentBuilder builder = JsonXContent.contentBuilder().startObject();
300+
fieldMapper.toXContent(builder, ToXContent.EMPTY_PARAMS);
301+
builder.endObject();
302+
var result = (Map<?, ?>) XContentHelper.convertToMap(BytesReference.bytes(builder), false, XContentType.JSON).v2().get("field");
303+
assertThat(result.size(), equalTo(2));
304+
assertThat(result.get("type"), equalTo("patterned_text"));
305+
assertThat(result.get("analyzer"), equalTo("standard"));
306+
}
307+
308+
public void testAnalyzerAttributeLog() throws IOException {
309+
MapperService mapper = createMapperService(fieldMapping(b -> b.field("type", "patterned_text").field("analyzer", "delimiter")));
310+
var fieldMapper = (PatternedTextFieldMapper) mapper.mappingLookup().getMapper("field");
311+
XContentBuilder builder = JsonXContent.contentBuilder().startObject();
312+
fieldMapper.toXContent(builder, ToXContent.EMPTY_PARAMS);
313+
builder.endObject();
314+
var result = (Map<?, ?>) XContentHelper.convertToMap(BytesReference.bytes(builder), false, XContentType.JSON).v2().get("field");
315+
assertThat(result.size(), equalTo(1));
316+
assertThat(result.get("type"), equalTo("patterned_text"));
317+
assertThat(fieldMapper.getAnalyzer(), equalTo(DelimiterAnalyzer.INSTANCE));
318+
}
319+
320+
public void testAnalyzerAttributeIllegal() throws IOException {
321+
IllegalArgumentException e = (IllegalArgumentException) expectThrows(
322+
MapperParsingException.class,
323+
() -> createMapperService(fieldMapping(b -> b.field("type", "patterned_text").field("analyzer", "whitespace")))
324+
).getCause();
325+
assertThat(e.getMessage(), equalTo("unsupported analyzer [whitespace] for field [field], supported analyzers are [standard, log]"));
326+
}
327+
279328
@Override
280329
public void testSyntheticSourceKeepArrays() {
281330
// This mapper does not allow arrays

x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextIntegrationTests.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,8 @@ protected Collection<Class<? extends Plugin>> getPlugins() {
7676
"field_match_only_text": { "type": "match_only_text" },
7777
"field_patterned_text": {
7878
"type": "patterned_text",
79-
"index_options": "%"
79+
"index_options": "%",
80+
"analyzer": "standard"
8081
}
8182
}
8283
}

0 commit comments

Comments
 (0)