Skip to content

Commit ea95e9e

Browse files
Add positions index_option to patterned_text (#133679)
Add an index_options field to patterned_text type. Currently, only supports docs and positions. When docs is enabled, positions are not stored, and queries fall back to a source confirmed query, like match_only_text. With positions enabled, phrase queries are efficient as the source does not need to be checked. Defaults to docs. Fixes #128935
1 parent 87695d2 commit ea95e9e

File tree

6 files changed

+436
-43
lines changed

6 files changed

+436
-43
lines changed

x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldMapper.java

Lines changed: 54 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.elasticsearch.index.mapper.KeywordFieldMapper;
2424
import org.elasticsearch.index.mapper.Mapper;
2525
import org.elasticsearch.index.mapper.MapperBuilderContext;
26+
import org.elasticsearch.index.mapper.MapperParsingException;
2627
import org.elasticsearch.index.mapper.MappingParserContext;
2728
import org.elasticsearch.index.mapper.TextParams;
2829
import org.elasticsearch.index.mapper.TextSearchInfo;
@@ -32,16 +33,20 @@
3233
import java.util.Iterator;
3334
import java.util.List;
3435
import java.util.Map;
36+
import java.util.function.Function;
37+
import java.util.function.Supplier;
3538

3639
/**
37-
* A {@link FieldMapper} that assigns every document the same value.
40+
* A {@link FieldMapper} for full-text log fields that internally splits text into a low cardinality template component
41+
* and high cardinality argument component. Separating these pieces allows the template component to be highly compressed.
3842
*/
3943
public class PatternedTextFieldMapper extends FieldMapper {
4044

4145
public static final FeatureFlag PATTERNED_TEXT_MAPPER = new FeatureFlag("patterned_text");
4246

4347
public static class Defaults {
44-
public static final FieldType FIELD_TYPE;
48+
public static final FieldType FIELD_TYPE_DOCS;
49+
public static final FieldType FIELD_TYPE_POSITIONS;
4550

4651
static {
4752
final FieldType ft = new FieldType();
@@ -50,7 +55,17 @@ public static class Defaults {
5055
ft.setStoreTermVectors(false);
5156
ft.setOmitNorms(true);
5257
ft.setIndexOptions(IndexOptions.DOCS);
53-
FIELD_TYPE = freezeAndDeduplicateFieldType(ft);
58+
FIELD_TYPE_DOCS = freezeAndDeduplicateFieldType(ft);
59+
}
60+
61+
static {
62+
final FieldType ft = new FieldType();
63+
ft.setTokenized(true);
64+
ft.setStored(false);
65+
ft.setStoreTermVectors(false);
66+
ft.setOmitNorms(true);
67+
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
68+
FIELD_TYPE_POSITIONS = freezeAndDeduplicateFieldType(ft);
5469
}
5570
}
5671

@@ -60,6 +75,7 @@ public static class Builder extends FieldMapper.Builder {
6075
private final IndexSettings indexSettings;
6176
private final Parameter<Map<String, String>> meta = Parameter.metaParam();
6277
private final TextParams.Analyzers analyzers;
78+
private final Parameter<String> indexOptions = patternedTextIndexOptions(m -> ((PatternedTextFieldMapper) m).indexOptions);
6379

6480
public Builder(String name, MappingParserContext context) {
6581
this(name, context.indexVersionCreated(), context.getIndexSettings(), context.getIndexAnalyzers());
@@ -79,14 +95,14 @@ public Builder(String name, IndexVersion indexCreatedVersion, IndexSettings inde
7995

8096
@Override
8197
protected Parameter<?>[] getParameters() {
82-
return new Parameter<?>[] { meta };
98+
return new Parameter<?>[] { meta, indexOptions };
8399
}
84100

85-
private PatternedTextFieldType buildFieldType(MapperBuilderContext context) {
101+
private PatternedTextFieldType buildFieldType(FieldType fieldType, MapperBuilderContext context) {
86102
NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer();
87103
NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer();
88104
NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer();
89-
TextSearchInfo tsi = new TextSearchInfo(Defaults.FIELD_TYPE, null, searchAnalyzer, searchQuoteAnalyzer);
105+
TextSearchInfo tsi = new TextSearchInfo(fieldType, null, searchAnalyzer, searchQuoteAnalyzer);
90106
return new PatternedTextFieldType(
91107
context.buildFullName(leafName()),
92108
tsi,
@@ -96,45 +112,68 @@ private PatternedTextFieldType buildFieldType(MapperBuilderContext context) {
96112
);
97113
}
98114

115+
private static FieldType buildLuceneFieldType(Supplier<String> indexOptionSupplier) {
116+
var indexOptions = TextParams.toIndexOptions(true, indexOptionSupplier.get());
117+
return indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS ? Defaults.FIELD_TYPE_POSITIONS : Defaults.FIELD_TYPE_DOCS;
118+
}
119+
120+
private static Parameter<String> patternedTextIndexOptions(Function<FieldMapper, String> initializer) {
121+
return Parameter.stringParam("index_options", false, initializer, "docs").addValidator(v -> {
122+
switch (v) {
123+
case "positions":
124+
case "docs":
125+
return;
126+
default:
127+
throw new MapperParsingException(
128+
"Unknown value [" + v + "] for field [index_options] - accepted values are [positions, docs]"
129+
);
130+
}
131+
});
132+
}
133+
99134
@Override
100135
public PatternedTextFieldMapper build(MapperBuilderContext context) {
101-
PatternedTextFieldType patternedTextFieldType = buildFieldType(context);
136+
FieldType fieldType = buildLuceneFieldType(indexOptions);
137+
PatternedTextFieldType patternedTextFieldType = buildFieldType(fieldType, context);
102138
BuilderParams builderParams = builderParams(this, context);
103139
var templateIdMapper = KeywordFieldMapper.Builder.buildWithDocValuesSkipper(
104140
patternedTextFieldType.templateIdFieldName(),
105141
indexSettings.getMode(),
106142
indexCreatedVersion,
107143
true
108144
).indexed(false).build(context);
109-
return new PatternedTextFieldMapper(leafName(), patternedTextFieldType, builderParams, this, templateIdMapper);
145+
return new PatternedTextFieldMapper(leafName(), fieldType, patternedTextFieldType, builderParams, this, templateIdMapper);
110146
}
111147
}
112148

113149
public static final TypeParser PARSER = new TypeParser(Builder::new);
114150

115151
private final IndexVersion indexCreatedVersion;
116152
private final IndexAnalyzers indexAnalyzers;
117-
private final IndexSettings indexSettings;
118153
private final NamedAnalyzer indexAnalyzer;
154+
private final IndexSettings indexSettings;
155+
private final String indexOptions;
119156
private final int positionIncrementGap;
120157
private final FieldType fieldType;
121158
private final KeywordFieldMapper templateIdMapper;
122159

123160
private PatternedTextFieldMapper(
124161
String simpleName,
125-
PatternedTextFieldType mappedFieldPatternedTextFieldType,
162+
FieldType fieldType,
163+
PatternedTextFieldType mappedFieldType,
126164
BuilderParams builderParams,
127165
Builder builder,
128166
KeywordFieldMapper templateIdMapper
129167
) {
130-
super(simpleName, mappedFieldPatternedTextFieldType, builderParams);
131-
assert mappedFieldPatternedTextFieldType.getTextSearchInfo().isTokenized();
132-
assert mappedFieldPatternedTextFieldType.hasDocValues() == false;
133-
this.fieldType = Defaults.FIELD_TYPE;
168+
super(simpleName, mappedFieldType, builderParams);
169+
assert mappedFieldType.getTextSearchInfo().isTokenized();
170+
assert mappedFieldType.hasDocValues() == false;
171+
this.fieldType = fieldType;
134172
this.indexCreatedVersion = builder.indexCreatedVersion;
135173
this.indexAnalyzers = builder.analyzers.indexAnalyzers;
136174
this.indexAnalyzer = builder.analyzers.getIndexAnalyzer();
137175
this.indexSettings = builder.indexSettings;
176+
this.indexOptions = builder.indexOptions.getValue();
138177
this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue();
139178
this.templateIdMapper = templateIdMapper;
140179
}
@@ -172,7 +211,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
172211
throw new IllegalArgumentException("Multiple values are not allowed for field [" + fieldType().name() + "].");
173212
}
174213

175-
// Parse template and args.
214+
// Parse template and args
176215
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(value);
177216

178217
// Add index on original value

x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldType.java

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,21 +63,29 @@ public class PatternedTextFieldType extends StringFieldType {
6363

6464
private final Analyzer indexAnalyzer;
6565
private final TextFieldMapper.TextFieldType textFieldType;
66+
private final boolean hasPositions;
6667

6768
PatternedTextFieldType(String name, TextSearchInfo tsi, Analyzer indexAnalyzer, boolean isSyntheticSource, Map<String, String> meta) {
6869
// Though this type is based on doc_values, hasDocValues is set to false as the patterned_text type is not aggregatable.
6970
// This does not stop its child .template type from being aggregatable.
7071
super(name, true, false, false, tsi, meta);
7172
this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer);
7273
this.textFieldType = new TextFieldMapper.TextFieldType(name, isSyntheticSource);
74+
this.hasPositions = tsi.hasPositions();
7375
}
7476

75-
PatternedTextFieldType(String name) {
77+
// For testing only
78+
PatternedTextFieldType(String name, boolean hasPositions, boolean syntheticSource) {
7679
this(
7780
name,
78-
new TextSearchInfo(PatternedTextFieldMapper.Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
81+
new TextSearchInfo(
82+
hasPositions ? PatternedTextFieldMapper.Defaults.FIELD_TYPE_POSITIONS : PatternedTextFieldMapper.Defaults.FIELD_TYPE_DOCS,
83+
null,
84+
Lucene.STANDARD_ANALYZER,
85+
Lucene.STANDARD_ANALYZER
86+
),
7987
Lucene.STANDARD_ANALYZER,
80-
false,
88+
syntheticSource,
8189
Collections.emptyMap()
8290
);
8391
}
@@ -114,9 +122,13 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
114122
};
115123
}
116124

117-
private Query sourceConfirmedQuery(Query query, SearchExecutionContext context) {
118-
// Disable scoring
119-
return new ConstantScoreQuery(new SourceConfirmedTextQuery(query, getValueFetcherProvider(context), indexAnalyzer));
125+
private Query maybeSourceConfirmQuery(Query query, SearchExecutionContext context) {
126+
// Disable scoring similarly to match_only_text
127+
if (hasPositions) {
128+
return new ConstantScoreQuery(query);
129+
} else {
130+
return new ConstantScoreQuery(new SourceConfirmedTextQuery(query, getValueFetcherProvider(context), indexAnalyzer));
131+
}
120132
}
121133

122134
private IntervalsSource toIntervalsSource(IntervalsSource source, Query approximation, SearchExecutionContext searchExecutionContext) {
@@ -221,21 +233,21 @@ public IntervalsSource rangeIntervals(
221233
public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, SearchExecutionContext queryShardContext)
222234
throws IOException {
223235
final Query textQuery = textFieldType.phraseQuery(stream, slop, enablePosIncrements, queryShardContext);
224-
return sourceConfirmedQuery(textQuery, queryShardContext);
236+
return maybeSourceConfirmQuery(textQuery, queryShardContext);
225237
}
226238

227239
@Override
228240
public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, SearchExecutionContext queryShardContext)
229241
throws IOException {
230242
final Query textQuery = textFieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, queryShardContext);
231-
return sourceConfirmedQuery(textQuery, queryShardContext);
243+
return maybeSourceConfirmQuery(textQuery, queryShardContext);
232244
}
233245

234246
@Override
235247
public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, SearchExecutionContext queryShardContext)
236248
throws IOException {
237249
final Query textQuery = textFieldType.phrasePrefixQuery(stream, slop, maxExpansions, queryShardContext);
238-
return sourceConfirmedQuery(textQuery, queryShardContext);
250+
return maybeSourceConfirmQuery(textQuery, queryShardContext);
239251
}
240252

241253
@Override

x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldTypeTests.java

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -42,21 +42,21 @@
4242
public class PatternedTextFieldTypeTests extends FieldTypeTestCase {
4343

4444
public void testTermQuery() {
45-
MappedFieldType ft = new PatternedTextFieldType("field");
45+
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
4646
assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field", "foo"))), ft.termQuery("foo", null));
4747
assertEquals(AutomatonQueries.caseInsensitiveTermQuery(new Term("field", "fOo")), ft.termQueryCaseInsensitive("fOo", null));
4848
}
4949

5050
public void testTermsQuery() {
51-
MappedFieldType ft = new PatternedTextFieldType("field");
51+
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
5252
List<BytesRef> terms = new ArrayList<>();
5353
terms.add(new BytesRef("foo"));
5454
terms.add(new BytesRef("123"));
5555
assertEquals(new TermInSetQuery("field", terms), ft.termsQuery(Arrays.asList("foo", "123"), null));
5656
}
5757

5858
public void testRangeQuery() {
59-
MappedFieldType ft = new PatternedTextFieldType("field");
59+
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
6060
assertEquals(
6161
new TermRangeQuery("field", BytesRefs.toBytesRef("foo"), BytesRefs.toBytesRef("bar"), true, false),
6262
ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_CONTEXT)
@@ -73,7 +73,7 @@ public void testRangeQuery() {
7373
}
7474

7575
public void testRegexpQuery() {
76-
MappedFieldType ft = new PatternedTextFieldType("field");
76+
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
7777
assertEquals(new RegexpQuery(new Term("field", "foo.*")), ft.regexpQuery("foo.*", 0, 0, 10, null, MOCK_CONTEXT));
7878

7979
ElasticsearchException ee = expectThrows(
@@ -84,7 +84,7 @@ public void testRegexpQuery() {
8484
}
8585

8686
public void testFuzzyQuery() {
87-
MappedFieldType ft = new PatternedTextFieldType("field");
87+
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
8888
assertEquals(
8989
new ConstantScoreQuery(new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true)),
9090
ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_CONTEXT)
@@ -110,15 +110,15 @@ private Query unwrapPositionalQuery(Query query) {
110110
}
111111

112112
public void testPhraseQuery() throws IOException {
113-
MappedFieldType ft = new PatternedTextFieldType("field");
113+
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
114114
TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("1", 4, 7));
115115
Query query = ft.phraseQuery(ts, 0, true, MOCK_CONTEXT);
116116
Query delegate = unwrapPositionalQuery(query);
117117
assertEquals(new PhraseQuery("field", "a", "1").toString(), delegate.toString());
118118
}
119119

120120
public void testMultiPhraseQuery() throws IOException {
121-
MappedFieldType ft = new PatternedTextFieldType("field");
121+
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
122122
TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("2", 0, 0, 3), new Token("c", 4, 7));
123123
Query query = ft.multiPhraseQuery(ts, 0, true, MOCK_CONTEXT);
124124
Query delegate = unwrapPositionalQuery(query);
@@ -129,7 +129,7 @@ public void testMultiPhraseQuery() throws IOException {
129129
}
130130

131131
public void testPhrasePrefixQuery() throws IOException {
132-
MappedFieldType ft = new PatternedTextFieldType("field");
132+
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
133133
TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 0, 0, 3), new Token("c", 4, 7));
134134
Query query = ft.phrasePrefixQuery(ts, 0, 10, MOCK_CONTEXT);
135135
Query delegate = unwrapPositionalQuery(query);
@@ -140,14 +140,14 @@ public void testPhrasePrefixQuery() throws IOException {
140140
}
141141

142142
public void testTermIntervals() {
143-
MappedFieldType ft = new PatternedTextFieldType("field");
143+
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
144144
IntervalsSource termIntervals = ft.termIntervals(new BytesRef("foo"), MOCK_CONTEXT);
145145
assertThat(termIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
146146
assertEquals(Intervals.term(new BytesRef("foo")), ((SourceIntervalsSource) termIntervals).getIntervalsSource());
147147
}
148148

149149
public void testPrefixIntervals() {
150-
MappedFieldType ft = new PatternedTextFieldType("field");
150+
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
151151
IntervalsSource prefixIntervals = ft.prefixIntervals(new BytesRef("foo"), MOCK_CONTEXT);
152152
assertThat(prefixIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
153153
assertEquals(
@@ -157,7 +157,7 @@ public void testPrefixIntervals() {
157157
}
158158

159159
public void testWildcardIntervals() {
160-
MappedFieldType ft = new PatternedTextFieldType("field");
160+
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
161161
IntervalsSource wildcardIntervals = ft.wildcardIntervals(new BytesRef("foo"), MOCK_CONTEXT);
162162
assertThat(wildcardIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
163163
assertEquals(
@@ -167,7 +167,7 @@ public void testWildcardIntervals() {
167167
}
168168

169169
public void testRegexpIntervals() {
170-
MappedFieldType ft = new PatternedTextFieldType("field");
170+
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
171171
IntervalsSource regexpIntervals = ft.regexpIntervals(new BytesRef("foo"), MOCK_CONTEXT);
172172
assertThat(regexpIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
173173
assertEquals(
@@ -177,13 +177,13 @@ public void testRegexpIntervals() {
177177
}
178178

179179
public void testFuzzyIntervals() {
180-
MappedFieldType ft = new PatternedTextFieldType("field");
180+
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
181181
IntervalsSource fuzzyIntervals = ft.fuzzyIntervals("foo", 1, 2, true, MOCK_CONTEXT);
182182
assertThat(fuzzyIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
183183
}
184184

185185
public void testRangeIntervals() {
186-
MappedFieldType ft = new PatternedTextFieldType("field");
186+
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
187187
IntervalsSource rangeIntervals = ft.rangeIntervals(new BytesRef("foo"), new BytesRef("foo1"), true, true, MOCK_CONTEXT);
188188
assertThat(rangeIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
189189
assertEquals(

x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextVsMatchOnlyTextTests.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,23 +66,30 @@ protected Collection<Class<? extends Plugin>> nodePlugins() {
6666
private static final String INDEX = "test_index";
6767
private static final String MATCH_ONLY_TEXT_FIELD = "field_match_only_text";
6868
private static final String PATTERNED_TEXT_FIELD = "field_patterned_text";
69-
private static final String MAPPING = """
69+
private static final String MAPPING_TEMPLATE = """
7070
{
7171
"properties": {
7272
"@timestamp": { "type": "date" },
7373
"field_match_only_text": { "type": "match_only_text" },
74-
"field_patterned_text": { "type": "patterned_text" }
74+
"field_patterned_text": {
75+
"type": "patterned_text",
76+
"index_options": "%"
77+
}
7578
}
7679
}
7780
""";
7881

82+
private static final String MAPPING_DOCS_ONLY = MAPPING_TEMPLATE.replace("%", "docs");
83+
private static final String MAPPING_POSITIONS = MAPPING_TEMPLATE.replace("%", "positions");
84+
7985
@Before
8086
public void setup() {
8187
assumeTrue("Only when patterned_text feature flag is enabled", PatternedTextFieldMapper.PATTERNED_TEXT_MAPPER.isEnabled());
8288
}
8389

8490
public void testQueries() throws IOException {
85-
var createRequest = new CreateIndexRequest(INDEX).mapping(MAPPING);
91+
var mapping = randomBoolean() ? MAPPING_DOCS_ONLY : MAPPING_POSITIONS;
92+
var createRequest = new CreateIndexRequest(INDEX).mapping(mapping);
8693

8794
assertAcked(admin().indices().create(createRequest));
8895

0 commit comments

Comments
 (0)