Skip to content

Commit 9aaba25

Browse files
Simple version of patterned_text with a single doc value for arguments (#129292)
Initial version of patterned_text mapper. Behaves similarly to match_only_text. This version uses a single SortedSetDocValues for a template and another for arguments. It splits the message by delimiters, the classifies a token as an argument if it contains a digit. All arguments are concatenated and inserted as a single doc value. A single inverted index is used, without positions. Phrase queries are still possible, using the SourceConfirmedTextQuery, but are not fast.
1 parent 2df9dd4 commit 9aaba25

File tree

18 files changed

+2045
-4
lines changed

18 files changed

+2045
-4
lines changed

modules/mapper-extras/src/main/java/module-info.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,6 @@
1414
requires org.apache.lucene.core;
1515
requires org.apache.lucene.memory;
1616
requires org.apache.lucene.queries;
17+
18+
exports org.elasticsearch.index.mapper.extras;
1719
}

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ public MatchOnlyTextFieldType(
173173
super(name, true, false, false, tsi, meta);
174174
this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer);
175175
this.textFieldType = new TextFieldType(name, isSyntheticSource);
176-
this.originalName = isSyntheticSource ? name() + "._original" : null;
176+
this.originalName = isSyntheticSource ? name + "._original" : null;
177177
}
178178

179179
public MatchOnlyTextFieldType(String name) {

server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ private FieldContext contextBuilders(
124124
if (fieldNameContainsWildcards) {
125125
if (fieldType.typeName().equals(TextFieldMapper.CONTENT_TYPE) == false
126126
&& fieldType.typeName().equals(KeywordFieldMapper.CONTENT_TYPE) == false
127-
&& fieldType.typeName().equals("match_only_text") == false) {
127+
&& fieldType.typeName().equals("match_only_text") == false
128+
&& fieldType.typeName().equals("patterned_text") == false) {
128129
continue;
129130
}
130131
if (highlighter.canHighlight(fieldType) == false) {

x-pack/plugin/logsdb/build.gradle

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,13 @@ base {
2424

2525
restResources {
2626
restApi {
27-
include 'bulk', 'search', '_common', 'indices', 'index', 'cluster', 'data_stream', 'ingest', 'cat', 'capabilities', 'esql.query'
27+
include 'bulk', 'search', '_common', 'indices', 'index', 'cluster', 'data_stream', 'ingest', 'cat', 'capabilities', 'esql.query', 'field_caps'
2828
}
2929
}
3030

3131
dependencies {
3232
compileOnly project(path: xpackModule('core'))
33+
implementation project(':modules:mapper-extras')
3334
testImplementation project(':modules:data-streams')
3435
testImplementation(testArtifact(project(xpackModule('core'))))
3536
javaRestTestImplementation(testArtifact(project(xpackModule('spatial'))))

x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsDBPlugin.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,27 @@
1212
import org.elasticsearch.common.settings.Settings;
1313
import org.elasticsearch.index.IndexSettingProvider;
1414
import org.elasticsearch.index.IndexVersion;
15+
import org.elasticsearch.index.mapper.Mapper;
1516
import org.elasticsearch.license.LicenseService;
1617
import org.elasticsearch.license.XPackLicenseState;
1718
import org.elasticsearch.plugins.ActionPlugin;
19+
import org.elasticsearch.plugins.MapperPlugin;
1820
import org.elasticsearch.plugins.Plugin;
1921
import org.elasticsearch.xpack.core.XPackPlugin;
2022
import org.elasticsearch.xpack.core.action.XPackInfoFeatureAction;
2123
import org.elasticsearch.xpack.core.action.XPackUsageFeatureAction;
24+
import org.elasticsearch.xpack.logsdb.patternedtext.PatternedTextFieldMapper;
25+
import org.elasticsearch.xpack.logsdb.patternedtext.PatternedTextFieldType;
2226

2327
import java.util.ArrayList;
2428
import java.util.Collection;
2529
import java.util.List;
30+
import java.util.Map;
2631

32+
import static java.util.Collections.singletonMap;
2733
import static org.elasticsearch.xpack.logsdb.LogsdbLicenseService.FALLBACK_SETTING;
2834

29-
public class LogsDBPlugin extends Plugin implements ActionPlugin {
35+
public class LogsDBPlugin extends Plugin implements ActionPlugin, MapperPlugin {
3036

3137
private final Settings settings;
3238
private final LogsdbLicenseService licenseService;
@@ -98,6 +104,15 @@ public List<ActionPlugin.ActionHandler> getActions() {
98104
return actions;
99105
}
100106

107+
@Override
108+
public Map<String, Mapper.TypeParser> getMappers() {
109+
if (PatternedTextFieldMapper.PATTERNED_TEXT_MAPPER.isEnabled()) {
110+
return singletonMap(PatternedTextFieldType.CONTENT_TYPE, PatternedTextFieldMapper.PARSER);
111+
} else {
112+
return Map.of();
113+
}
114+
}
115+
101116
protected XPackLicenseState getLicenseState() {
102117
return XPackPlugin.getSharedLicenseState();
103118
}
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.logsdb.patternedtext;
9+
10+
import org.apache.lucene.index.BinaryDocValues;
11+
import org.apache.lucene.index.DocValues;
12+
import org.apache.lucene.index.LeafReader;
13+
import org.apache.lucene.index.SortedSetDocValues;
14+
import org.apache.lucene.util.BytesRef;
15+
16+
import java.io.IOException;
17+
18+
public class PatternedTextDocValues extends BinaryDocValues {
19+
private final SortedSetDocValues templateDocValues;
20+
private final SortedSetDocValues argsDocValues;
21+
22+
PatternedTextDocValues(SortedSetDocValues templateDocValues, SortedSetDocValues argsDocValues) {
23+
this.templateDocValues = templateDocValues;
24+
this.argsDocValues = argsDocValues;
25+
}
26+
27+
static PatternedTextDocValues from(LeafReader leafReader, String templateFieldName, String argsFieldName) throws IOException {
28+
SortedSetDocValues templateDocValues = DocValues.getSortedSet(leafReader, templateFieldName);
29+
if (templateDocValues.getValueCount() == 0) {
30+
return null;
31+
}
32+
33+
SortedSetDocValues argsDocValues = DocValues.getSortedSet(leafReader, argsFieldName);
34+
return new PatternedTextDocValues(templateDocValues, argsDocValues);
35+
}
36+
37+
private String getNextStringValue() throws IOException {
38+
assert templateDocValues.docValueCount() == 1;
39+
String template = templateDocValues.lookupOrd(templateDocValues.nextOrd()).utf8ToString();
40+
int argsCount = PatternedTextValueProcessor.countArgs(template);
41+
if (argsCount > 0) {
42+
assert argsDocValues.docValueCount() == 1;
43+
var mergedArgs = argsDocValues.lookupOrd(argsDocValues.nextOrd());
44+
var args = PatternedTextValueProcessor.decodeRemainingArgs(mergedArgs.utf8ToString());
45+
return PatternedTextValueProcessor.merge(new PatternedTextValueProcessor.Parts(template, args));
46+
} else {
47+
return template;
48+
}
49+
}
50+
51+
@Override
52+
public BytesRef binaryValue() throws IOException {
53+
return new BytesRef(getNextStringValue());
54+
}
55+
56+
@Override
57+
public boolean advanceExact(int i) throws IOException {
58+
argsDocValues.advanceExact(i);
59+
// If template has a value, then message has a value. We don't have to check args here, since there may not be args for the doc
60+
return templateDocValues.advanceExact(i);
61+
}
62+
63+
@Override
64+
public int docID() {
65+
return templateDocValues.docID();
66+
}
67+
68+
@Override
69+
public int nextDoc() throws IOException {
70+
int templateNext = templateDocValues.nextDoc();
71+
var argsAdvance = argsDocValues.advance(templateNext);
72+
assert argsAdvance >= templateNext;
73+
return templateNext;
74+
}
75+
76+
@Override
77+
public int advance(int i) throws IOException {
78+
int templateAdvance = templateDocValues.advance(i);
79+
var argsAdvance = argsDocValues.advance(templateAdvance);
80+
assert argsAdvance >= templateAdvance;
81+
return templateAdvance;
82+
}
83+
84+
@Override
85+
public long cost() {
86+
return templateDocValues.cost() + argsDocValues.cost();
87+
}
88+
}
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.logsdb.patternedtext;
9+
10+
import org.apache.lucene.document.Field;
11+
import org.apache.lucene.document.FieldType;
12+
import org.apache.lucene.document.SortedSetDocValuesField;
13+
import org.apache.lucene.index.IndexOptions;
14+
import org.apache.lucene.util.BytesRef;
15+
import org.elasticsearch.common.util.FeatureFlag;
16+
import org.elasticsearch.index.IndexVersion;
17+
import org.elasticsearch.index.analysis.IndexAnalyzers;
18+
import org.elasticsearch.index.analysis.NamedAnalyzer;
19+
import org.elasticsearch.index.mapper.CompositeSyntheticFieldLoader;
20+
import org.elasticsearch.index.mapper.DocumentParserContext;
21+
import org.elasticsearch.index.mapper.FieldMapper;
22+
import org.elasticsearch.index.mapper.MapperBuilderContext;
23+
import org.elasticsearch.index.mapper.TextParams;
24+
import org.elasticsearch.index.mapper.TextSearchInfo;
25+
26+
import java.io.IOException;
27+
import java.util.Map;
28+
29+
/**
30+
* A {@link FieldMapper} that assigns every document the same value.
31+
*/
32+
public class PatternedTextFieldMapper extends FieldMapper {
33+
34+
public static final FeatureFlag PATTERNED_TEXT_MAPPER = new FeatureFlag("patterned_text");
35+
36+
public static class Defaults {
37+
public static final FieldType FIELD_TYPE;
38+
39+
static {
40+
final FieldType ft = new FieldType();
41+
ft.setTokenized(true);
42+
ft.setStored(false);
43+
ft.setStoreTermVectors(false);
44+
ft.setOmitNorms(true);
45+
ft.setIndexOptions(IndexOptions.DOCS);
46+
FIELD_TYPE = freezeAndDeduplicateFieldType(ft);
47+
}
48+
}
49+
50+
public static class Builder extends FieldMapper.Builder {
51+
52+
private final IndexVersion indexCreatedVersion;
53+
54+
private final Parameter<Map<String, String>> meta = Parameter.metaParam();
55+
56+
private final TextParams.Analyzers analyzers;
57+
58+
public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers) {
59+
super(name);
60+
this.indexCreatedVersion = indexCreatedVersion;
61+
this.analyzers = new TextParams.Analyzers(
62+
indexAnalyzers,
63+
m -> ((PatternedTextFieldMapper) m).indexAnalyzer,
64+
m -> ((PatternedTextFieldMapper) m).positionIncrementGap,
65+
indexCreatedVersion
66+
);
67+
}
68+
69+
@Override
70+
protected Parameter<?>[] getParameters() {
71+
return new Parameter<?>[] { meta };
72+
}
73+
74+
private PatternedTextFieldType buildFieldType(MapperBuilderContext context) {
75+
NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer();
76+
NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer();
77+
NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer();
78+
TextSearchInfo tsi = new TextSearchInfo(Defaults.FIELD_TYPE, null, searchAnalyzer, searchQuoteAnalyzer);
79+
return new PatternedTextFieldType(
80+
context.buildFullName(leafName()),
81+
tsi,
82+
indexAnalyzer,
83+
context.isSourceSynthetic(),
84+
meta.getValue()
85+
);
86+
}
87+
88+
@Override
89+
public PatternedTextFieldMapper build(MapperBuilderContext context) {
90+
return new PatternedTextFieldMapper(leafName(), buildFieldType(context), builderParams(this, context), this);
91+
}
92+
}
93+
94+
public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()));
95+
96+
private final IndexVersion indexCreatedVersion;
97+
private final IndexAnalyzers indexAnalyzers;
98+
private final NamedAnalyzer indexAnalyzer;
99+
private final int positionIncrementGap;
100+
private final FieldType fieldType;
101+
102+
private PatternedTextFieldMapper(
103+
String simpleName,
104+
PatternedTextFieldType mappedFieldPatternedTextFieldType,
105+
BuilderParams builderParams,
106+
Builder builder
107+
) {
108+
super(simpleName, mappedFieldPatternedTextFieldType, builderParams);
109+
assert mappedFieldPatternedTextFieldType.getTextSearchInfo().isTokenized();
110+
assert mappedFieldPatternedTextFieldType.hasDocValues() == false;
111+
this.fieldType = Defaults.FIELD_TYPE;
112+
this.indexCreatedVersion = builder.indexCreatedVersion;
113+
this.indexAnalyzers = builder.analyzers.indexAnalyzers;
114+
this.indexAnalyzer = builder.analyzers.getIndexAnalyzer();
115+
this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue();
116+
}
117+
118+
@Override
119+
public Map<String, NamedAnalyzer> indexAnalyzers() {
120+
return Map.of(mappedFieldType.name(), indexAnalyzer);
121+
}
122+
123+
@Override
124+
public FieldMapper.Builder getMergeBuilder() {
125+
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers).init(this);
126+
}
127+
128+
@Override
129+
protected void parseCreateField(DocumentParserContext context) throws IOException {
130+
final String value = context.parser().textOrNull();
131+
if (value == null) {
132+
return;
133+
}
134+
135+
var existingValue = context.doc().getField(fieldType().name());
136+
if (existingValue != null) {
137+
throw new IllegalArgumentException("Multiple values are not allowed for field [" + fieldType().name() + "].");
138+
}
139+
140+
// Parse template and args.
141+
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(value);
142+
143+
// Add index on original value
144+
context.doc().add(new Field(fieldType().name(), value, fieldType));
145+
146+
// Add template doc_values
147+
context.doc().add(new SortedSetDocValuesField(fieldType().templateFieldName(), new BytesRef(parts.template())));
148+
149+
// Add args doc_values
150+
if (parts.args().isEmpty() == false) {
151+
String remainingArgs = PatternedTextValueProcessor.encodeRemainingArgs(parts);
152+
context.doc().add(new SortedSetDocValuesField(fieldType().argsFieldName(), new BytesRef(remainingArgs)));
153+
}
154+
}
155+
156+
@Override
157+
protected String contentType() {
158+
return PatternedTextFieldType.CONTENT_TYPE;
159+
}
160+
161+
@Override
162+
public PatternedTextFieldType fieldType() {
163+
return (PatternedTextFieldType) super.fieldType();
164+
}
165+
166+
@Override
167+
protected SyntheticSourceSupport syntheticSourceSupport() {
168+
return new SyntheticSourceSupport.Native(
169+
() -> new CompositeSyntheticFieldLoader(
170+
leafName(),
171+
fullPath(),
172+
new PatternedTextSyntheticFieldLoaderLayer(fieldType().name(), fieldType().templateFieldName(), fieldType().argsFieldName())
173+
)
174+
);
175+
}
176+
}

0 commit comments

Comments
 (0)