Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperBuilderContext;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.MappingParserContext;
import org.elasticsearch.index.mapper.TextParams;
import org.elasticsearch.index.mapper.TextSearchInfo;
Expand All @@ -32,16 +33,20 @@
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.function.Supplier;

/**
* A {@link FieldMapper} that assigns every document the same value.
* A {@link FieldMapper} for full-text log fields that internally splits text into a low cardinality template component
* and high cardinality argument component. Separating these pieces allows the template component to be highly compressed.
*/
public class PatternedTextFieldMapper extends FieldMapper {

public static final FeatureFlag PATTERNED_TEXT_MAPPER = new FeatureFlag("patterned_text");

public static class Defaults {
public static final FieldType FIELD_TYPE;
public static final FieldType FIELD_TYPE_DOCS;
public static final FieldType FIELD_TYPE_POSITIONS;

static {
final FieldType ft = new FieldType();
Expand All @@ -50,7 +55,17 @@ public static class Defaults {
ft.setStoreTermVectors(false);
ft.setOmitNorms(true);
ft.setIndexOptions(IndexOptions.DOCS);
FIELD_TYPE = freezeAndDeduplicateFieldType(ft);
FIELD_TYPE_DOCS = freezeAndDeduplicateFieldType(ft);
}

static {
final FieldType ft = new FieldType();
ft.setTokenized(true);
ft.setStored(false);
ft.setStoreTermVectors(false);
ft.setOmitNorms(true);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
FIELD_TYPE_POSITIONS = freezeAndDeduplicateFieldType(ft);
}
}

Expand All @@ -60,6 +75,7 @@ public static class Builder extends FieldMapper.Builder {
private final IndexSettings indexSettings;
private final Parameter<Map<String, String>> meta = Parameter.metaParam();
private final TextParams.Analyzers analyzers;
private final Parameter<String> indexOptions = patternedTextIndexOptions(m -> ((PatternedTextFieldMapper) m).indexOptions);

public Builder(String name, MappingParserContext context) {
this(name, context.indexVersionCreated(), context.getIndexSettings(), context.getIndexAnalyzers());
Expand All @@ -79,14 +95,14 @@ public Builder(String name, IndexVersion indexCreatedVersion, IndexSettings inde

@Override
protected Parameter<?>[] getParameters() {
return new Parameter<?>[] { meta };
return new Parameter<?>[] { meta, indexOptions };
}

private PatternedTextFieldType buildFieldType(MapperBuilderContext context) {
private PatternedTextFieldType buildFieldType(FieldType fieldType, MapperBuilderContext context) {
NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer();
NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer();
NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer();
TextSearchInfo tsi = new TextSearchInfo(Defaults.FIELD_TYPE, null, searchAnalyzer, searchQuoteAnalyzer);
TextSearchInfo tsi = new TextSearchInfo(fieldType, null, searchAnalyzer, searchQuoteAnalyzer);
return new PatternedTextFieldType(
context.buildFullName(leafName()),
tsi,
Expand All @@ -96,45 +112,68 @@ private PatternedTextFieldType buildFieldType(MapperBuilderContext context) {
);
}

private static FieldType buildLuceneFieldType(Supplier<String> indexOptionSupplier) {
var indexOptions = TextParams.toIndexOptions(true, indexOptionSupplier.get());
return indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS ? Defaults.FIELD_TYPE_POSITIONS : Defaults.FIELD_TYPE_DOCS;
}

private static Parameter<String> patternedTextIndexOptions(Function<FieldMapper, String> initializer) {
return Parameter.stringParam("index_options", false, initializer, "docs").addValidator(v -> {
switch (v) {
case "positions":
case "docs":
return;
default:
throw new MapperParsingException(
"Unknown value [" + v + "] for field [index_options] - accepted values are [positions, docs]"
);
}
});
}

@Override
public PatternedTextFieldMapper build(MapperBuilderContext context) {
PatternedTextFieldType patternedTextFieldType = buildFieldType(context);
FieldType fieldType = buildLuceneFieldType(indexOptions);
PatternedTextFieldType patternedTextFieldType = buildFieldType(fieldType, context);
BuilderParams builderParams = builderParams(this, context);
var templateIdMapper = KeywordFieldMapper.Builder.buildWithDocValuesSkipper(
patternedTextFieldType.templateIdFieldName(),
indexSettings.getMode(),
indexCreatedVersion,
true
).indexed(false).build(context);
return new PatternedTextFieldMapper(leafName(), patternedTextFieldType, builderParams, this, templateIdMapper);
return new PatternedTextFieldMapper(leafName(), fieldType, patternedTextFieldType, builderParams, this, templateIdMapper);
}
}

public static final TypeParser PARSER = new TypeParser(Builder::new);

private final IndexVersion indexCreatedVersion;
private final IndexAnalyzers indexAnalyzers;
private final IndexSettings indexSettings;
private final NamedAnalyzer indexAnalyzer;
private final IndexSettings indexSettings;
private final String indexOptions;
private final int positionIncrementGap;
private final FieldType fieldType;
private final KeywordFieldMapper templateIdMapper;

private PatternedTextFieldMapper(
String simpleName,
PatternedTextFieldType mappedFieldPatternedTextFieldType,
FieldType fieldType,
PatternedTextFieldType mappedFieldType,
BuilderParams builderParams,
Builder builder,
KeywordFieldMapper templateIdMapper
) {
super(simpleName, mappedFieldPatternedTextFieldType, builderParams);
assert mappedFieldPatternedTextFieldType.getTextSearchInfo().isTokenized();
assert mappedFieldPatternedTextFieldType.hasDocValues() == false;
this.fieldType = Defaults.FIELD_TYPE;
super(simpleName, mappedFieldType, builderParams);
assert mappedFieldType.getTextSearchInfo().isTokenized();
assert mappedFieldType.hasDocValues() == false;
this.fieldType = fieldType;
this.indexCreatedVersion = builder.indexCreatedVersion;
this.indexAnalyzers = builder.analyzers.indexAnalyzers;
this.indexAnalyzer = builder.analyzers.getIndexAnalyzer();
this.indexSettings = builder.indexSettings;
this.indexOptions = builder.indexOptions.getValue();
this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue();
this.templateIdMapper = templateIdMapper;
}
Expand Down Expand Up @@ -172,7 +211,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
throw new IllegalArgumentException("Multiple values are not allowed for field [" + fieldType().name() + "].");
}

// Parse template and args.
// Parse template and args
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(value);

// Add index on original value
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,21 +62,29 @@ public class PatternedTextFieldType extends StringFieldType {

private final Analyzer indexAnalyzer;
private final TextFieldMapper.TextFieldType textFieldType;
private final boolean hasPositions;

PatternedTextFieldType(String name, TextSearchInfo tsi, Analyzer indexAnalyzer, boolean isSyntheticSource, Map<String, String> meta) {
// Though this type is based on doc_values, hasDocValues is set to false as the patterned_text type is not aggregatable.
// This does not stop its child .template type from being aggregatable.
super(name, true, false, false, tsi, meta);
this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer);
this.textFieldType = new TextFieldMapper.TextFieldType(name, isSyntheticSource);
this.hasPositions = tsi.hasPositions();
}

PatternedTextFieldType(String name) {
// For testing only
PatternedTextFieldType(String name, boolean hasPositions, boolean syntheticSource) {
this(
name,
new TextSearchInfo(PatternedTextFieldMapper.Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
new TextSearchInfo(
hasPositions ? PatternedTextFieldMapper.Defaults.FIELD_TYPE_POSITIONS : PatternedTextFieldMapper.Defaults.FIELD_TYPE_DOCS,
null,
Lucene.STANDARD_ANALYZER,
Lucene.STANDARD_ANALYZER
),
Lucene.STANDARD_ANALYZER,
false,
syntheticSource,
Collections.emptyMap()
);
}
Expand Down Expand Up @@ -113,9 +121,13 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
};
}

private Query sourceConfirmedQuery(Query query, SearchExecutionContext context) {
// Disable scoring
return new ConstantScoreQuery(new SourceConfirmedTextQuery(query, getValueFetcherProvider(context), indexAnalyzer));
private Query maybeSourceConfirmQuery(Query query, SearchExecutionContext context) {
// Disable scoring similarly to match_only_text
if (hasPositions) {
return new ConstantScoreQuery(query);
} else {
return new ConstantScoreQuery(new SourceConfirmedTextQuery(query, getValueFetcherProvider(context), indexAnalyzer));
}
}

private IntervalsSource toIntervalsSource(IntervalsSource source, Query approximation, SearchExecutionContext searchExecutionContext) {
Expand Down Expand Up @@ -220,21 +232,21 @@ public IntervalsSource rangeIntervals(
public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, SearchExecutionContext queryShardContext)
throws IOException {
final Query textQuery = textFieldType.phraseQuery(stream, slop, enablePosIncrements, queryShardContext);
return sourceConfirmedQuery(textQuery, queryShardContext);
return maybeSourceConfirmQuery(textQuery, queryShardContext);
}

@Override
public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, SearchExecutionContext queryShardContext)
throws IOException {
final Query textQuery = textFieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, queryShardContext);
return sourceConfirmedQuery(textQuery, queryShardContext);
return maybeSourceConfirmQuery(textQuery, queryShardContext);
}

@Override
public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, SearchExecutionContext queryShardContext)
throws IOException {
final Query textQuery = textFieldType.phrasePrefixQuery(stream, slop, maxExpansions, queryShardContext);
return sourceConfirmedQuery(textQuery, queryShardContext);
return maybeSourceConfirmQuery(textQuery, queryShardContext);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,21 +42,21 @@
public class PatternedTextFieldTypeTests extends FieldTypeTestCase {

public void testTermQuery() {
MappedFieldType ft = new PatternedTextFieldType("field");
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field", "foo"))), ft.termQuery("foo", null));
assertEquals(AutomatonQueries.caseInsensitiveTermQuery(new Term("field", "fOo")), ft.termQueryCaseInsensitive("fOo", null));
}

public void testTermsQuery() {
MappedFieldType ft = new PatternedTextFieldType("field");
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
List<BytesRef> terms = new ArrayList<>();
terms.add(new BytesRef("foo"));
terms.add(new BytesRef("123"));
assertEquals(new TermInSetQuery("field", terms), ft.termsQuery(Arrays.asList("foo", "123"), null));
}

public void testRangeQuery() {
MappedFieldType ft = new PatternedTextFieldType("field");
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
assertEquals(
new TermRangeQuery("field", BytesRefs.toBytesRef("foo"), BytesRefs.toBytesRef("bar"), true, false),
ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_CONTEXT)
Expand All @@ -73,7 +73,7 @@ public void testRangeQuery() {
}

public void testRegexpQuery() {
MappedFieldType ft = new PatternedTextFieldType("field");
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
assertEquals(new RegexpQuery(new Term("field", "foo.*")), ft.regexpQuery("foo.*", 0, 0, 10, null, MOCK_CONTEXT));

ElasticsearchException ee = expectThrows(
Expand All @@ -84,7 +84,7 @@ public void testRegexpQuery() {
}

public void testFuzzyQuery() {
MappedFieldType ft = new PatternedTextFieldType("field");
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
assertEquals(
new ConstantScoreQuery(new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true)),
ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_CONTEXT)
Expand All @@ -110,15 +110,15 @@ private Query unwrapPositionalQuery(Query query) {
}

public void testPhraseQuery() throws IOException {
MappedFieldType ft = new PatternedTextFieldType("field");
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("1", 4, 7));
Query query = ft.phraseQuery(ts, 0, true, MOCK_CONTEXT);
Query delegate = unwrapPositionalQuery(query);
assertEquals(new PhraseQuery("field", "a", "1").toString(), delegate.toString());
}

public void testMultiPhraseQuery() throws IOException {
MappedFieldType ft = new PatternedTextFieldType("field");
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("2", 0, 0, 3), new Token("c", 4, 7));
Query query = ft.multiPhraseQuery(ts, 0, true, MOCK_CONTEXT);
Query delegate = unwrapPositionalQuery(query);
Expand All @@ -129,7 +129,7 @@ public void testMultiPhraseQuery() throws IOException {
}

public void testPhrasePrefixQuery() throws IOException {
MappedFieldType ft = new PatternedTextFieldType("field");
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 0, 0, 3), new Token("c", 4, 7));
Query query = ft.phrasePrefixQuery(ts, 0, 10, MOCK_CONTEXT);
Query delegate = unwrapPositionalQuery(query);
Expand All @@ -140,14 +140,14 @@ public void testPhrasePrefixQuery() throws IOException {
}

public void testTermIntervals() {
MappedFieldType ft = new PatternedTextFieldType("field");
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
IntervalsSource termIntervals = ft.termIntervals(new BytesRef("foo"), MOCK_CONTEXT);
assertThat(termIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
assertEquals(Intervals.term(new BytesRef("foo")), ((SourceIntervalsSource) termIntervals).getIntervalsSource());
}

public void testPrefixIntervals() {
MappedFieldType ft = new PatternedTextFieldType("field");
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
IntervalsSource prefixIntervals = ft.prefixIntervals(new BytesRef("foo"), MOCK_CONTEXT);
assertThat(prefixIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
assertEquals(
Expand All @@ -157,7 +157,7 @@ public void testPrefixIntervals() {
}

public void testWildcardIntervals() {
MappedFieldType ft = new PatternedTextFieldType("field");
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
IntervalsSource wildcardIntervals = ft.wildcardIntervals(new BytesRef("foo"), MOCK_CONTEXT);
assertThat(wildcardIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
assertEquals(
Expand All @@ -167,7 +167,7 @@ public void testWildcardIntervals() {
}

public void testRegexpIntervals() {
MappedFieldType ft = new PatternedTextFieldType("field");
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
IntervalsSource regexpIntervals = ft.regexpIntervals(new BytesRef("foo"), MOCK_CONTEXT);
assertThat(regexpIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
assertEquals(
Expand All @@ -177,13 +177,13 @@ public void testRegexpIntervals() {
}

public void testFuzzyIntervals() {
MappedFieldType ft = new PatternedTextFieldType("field");
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
IntervalsSource fuzzyIntervals = ft.fuzzyIntervals("foo", 1, 2, true, MOCK_CONTEXT);
assertThat(fuzzyIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
}

public void testRangeIntervals() {
MappedFieldType ft = new PatternedTextFieldType("field");
MappedFieldType ft = new PatternedTextFieldType("field", randomBoolean(), randomBoolean());
IntervalsSource rangeIntervals = ft.rangeIntervals(new BytesRef("foo"), new BytesRef("foo1"), true, true, MOCK_CONTEXT);
assertThat(rangeIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
assertEquals(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,23 +66,30 @@ protected Collection<Class<? extends Plugin>> nodePlugins() {
private static final String INDEX = "test_index";
private static final String MATCH_ONLY_TEXT_FIELD = "field_match_only_text";
private static final String PATTERNED_TEXT_FIELD = "field_patterned_text";
private static final String MAPPING = """
private static final String MAPPING_TEMPLATE = """
{
"properties": {
"@timestamp": { "type": "date" },
"field_match_only_text": { "type": "match_only_text" },
"field_patterned_text": { "type": "patterned_text" }
"field_patterned_text": {
"type": "patterned_text",
"index_options": "%"
}
}
}
""";

private static final String MAPPING_DOCS_ONLY = MAPPING_TEMPLATE.replace("%", "docs");
private static final String MAPPING_POSITIONS = MAPPING_TEMPLATE.replace("%", "positions");

@Before
public void setup() {
assumeTrue("Only when patterned_text feature flag is enabled", PatternedTextFieldMapper.PATTERNED_TEXT_MAPPER.isEnabled());
}

public void testQueries() throws IOException {
var createRequest = new CreateIndexRequest(INDEX).mapping(MAPPING);
var mapping = randomBoolean() ? MAPPING_DOCS_ONLY : MAPPING_POSITIONS;
var createRequest = new CreateIndexRequest(INDEX).mapping(mapping);

assertAcked(admin().indices().create(createRequest));

Expand Down
Loading