diff --git a/server/src/main/java/org/opensearch/index/mapper/CompositeFieldValueFetcher.java b/server/src/main/java/org/opensearch/index/mapper/CompositeFieldValueFetcher.java new file mode 100644 index 0000000000000..f8767369e28ad --- /dev/null +++ b/server/src/main/java/org/opensearch/index/mapper/CompositeFieldValueFetcher.java @@ -0,0 +1,55 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.index.LeafReader; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * Composite fetcher that tries multiple sources and returns already-converted values from the available source + * with the highest priority + * + * @opensearch.internal + */ +public class CompositeFieldValueFetcher extends FieldValueFetcher { + + private final List fieldValueFetchers; + + public CompositeFieldValueFetcher(String simpleName, List fieldValueFetchers) { + super(simpleName); + this.fieldValueFetchers = fieldValueFetchers; + } + + @Override + public List fetch(LeafReader reader, int docId) throws IOException { + // Try fetching values from various fetchers as per priority + for (final FieldValueFetcher fieldValueFetcher : fieldValueFetchers) { + List values = fieldValueFetcher.fetch(reader, docId); + + // Convert values immediately after fetching + if (values != null && !values.isEmpty()) { + List convertedValues = new ArrayList<>(values.size()); + for (Object value : values) { + convertedValues.add(fieldValueFetcher.convert(value)); + } + return convertedValues; + } + } + return null; + } + + @Override + Object convert(Object value) { + // Values are already converted, return as-is + return value; + } +} diff --git a/server/src/main/java/org/opensearch/index/mapper/FieldValueFetcher.java b/server/src/main/java/org/opensearch/index/mapper/FieldValueFetcher.java index aeacf235591ad..5eb70de1820f3 100644 --- a/server/src/main/java/org/opensearch/index/mapper/FieldValueFetcher.java +++ b/server/src/main/java/org/opensearch/index/mapper/FieldValueFetcher.java @@ -51,7 +51,7 @@ Object convert(Object value) { * @param builder - builder to store the field value(s) in */ void write(XContentBuilder builder, List values) throws IOException { - if (values.isEmpty()) { + if (values == null || values.isEmpty()) { return; } if (values.size() == 1) { diff --git a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java index 3271f60a466ee..8f833a5a74d4f 100644 --- a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java @@ -37,6 +37,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.document.StoredField; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.Term; import org.apache.lucene.search.BoostQuery; @@ -271,10 +272,8 @@ public Optional getSupportedDataCubeDimensionType() { @Override protected void canDeriveSourceInternal() { - if (this.ignoreAbove != Integer.MAX_VALUE || !Objects.equals(this.normalizerName, "default")) { - throw new UnsupportedOperationException( - "Unable to derive source for [" + name() + "] with " + "ignore_above and/or normalizer set" - ); + if (!(fieldType().normalizer() == null || Lucene.KEYWORD_ANALYZER.equals(fieldType().normalizer()))) { + throw new UnsupportedOperationException("Unable to derive source for [" + name() + "] with " + "normalizer set"); } checkStoredAndDocValuesForDerivedSource(); } @@ -295,11 +294,18 @@ protected void canDeriveSourceInternal() { */ @Override protected DerivedFieldGenerator derivedFieldGenerator() { - return new DerivedFieldGenerator( - mappedFieldType, - new SortedSetDocValuesFetcher(mappedFieldType, simpleName()), - new StoredFieldFetcher(mappedFieldType, simpleName()) + final FieldValueFetcher primaryFieldValueFetcher = KeywordFieldMapper.DerivedSourceHelper.getPrimaryFieldValueFetcher(this); + final FieldValueFetcher fallbackFieldValueFetcher = KeywordFieldMapper.DerivedSourceHelper.getFallbackFieldValueFetcher(this); + final FieldValueFetcher compositeFieldValueFetcher = new CompositeFieldValueFetcher( + simpleName(), + List.of(primaryFieldValueFetcher, fallbackFieldValueFetcher) ); + return new DerivedFieldGenerator(mappedFieldType, compositeFieldValueFetcher, null) { + @Override + public FieldValueType getDerivedFieldPreference() { + return FieldValueType.DOC_VALUES; + } + }; } /** @@ -872,11 +878,21 @@ protected void parseCreateField(ParseContext context) throws IOException { } } - if (value == null || value.length() > ignoreAbove) { + if (value == null) { return; } NamedAnalyzer normalizer = fieldType().normalizer(); + + if (value.length() > ignoreAbove) { + if ((normalizer == null || Lucene.KEYWORD_ANALYZER.equals(normalizer)) + && context.indexSettings().isDerivedSourceEnabled() + && context.isWithinMultiFields() == false) { + final BytesRef binaryValue = new BytesRef(value); + context.doc().add(new StoredField(fieldType().derivedSourceIgnoreFieldName(), binaryValue)); + } + return; + } if (normalizer != null) { value = normalizeValue(normalizer, name(), value); } @@ -936,4 +952,51 @@ protected String contentType() { public ParametrizedFieldMapper.Builder getMergeBuilder() { return new Builder(simpleName(), indexAnalyzers).init(this); } + + private static final class DerivedSourceHelper { + + private static FieldValueFetcher getPrimaryFieldValueFetcher(KeywordFieldMapper mapper) { + return mapper.fieldType().hasDocValues() + ? new SortedSetDocValuesFetcher(mapper.fieldType(), mapper.simpleName()) + : new StoredFieldFetcher(mapper.fieldType(), mapper.simpleName()); + } + + private static FieldValueFetcher getFallbackFieldValueFetcher(KeywordFieldMapper mapper) { + // Override to read from the special ignored value field + final MappedFieldType ignoredFieldType = new MappedFieldType( + mapper.fieldType().derivedSourceIgnoreFieldName(), + false, // not searchable + true, // stored + false, // no doc values + TextSearchInfo.NONE, + Collections.emptyMap() + ) { + @Override + public String typeName() { + return "keyword"; + } + + @Override + public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) { + return null; + } + + @Override + public Query termQuery(Object value, QueryShardContext context) { + return null; + } + + @Override + public Object valueForDisplay(Object value) { + if (value == null) { + return null; + } + // keywords are internally stored as utf8 bytes + BytesRef binaryValue = (BytesRef) value; + return binaryValue.utf8ToString(); + } + }; + return new StoredFieldFetcher(ignoredFieldType, mapper.simpleName()); + } + } } diff --git a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java index 757de65248d33..a8748f1afaad3 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java @@ -139,21 +139,22 @@ public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAna @Override public MatchOnlyTextFieldMapper build(BuilderContext context) { FieldType fieldType = TextParams.buildFieldType(index, store, indexOptions, norms, termVectors); - MatchOnlyTextFieldType tft = buildFieldType(fieldType, context); + MultiFields multiFields = multiFieldsBuilder.build(this, context); + MatchOnlyTextFieldType tft = buildFieldType(fieldType, multiFields, context); return new MatchOnlyTextFieldMapper( name, fieldType, tft, buildPrefixMapper(context, fieldType, tft), buildPhraseMapper(fieldType, tft), - multiFieldsBuilder.build(this, context), + multiFields, copyTo.build(), this ); } @Override - protected MatchOnlyTextFieldType buildFieldType(FieldType fieldType, BuilderContext context) { + protected MatchOnlyTextFieldType buildFieldType(FieldType fieldType, MultiFields multiFields, BuilderContext context) { NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer(); NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer(); NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer(); diff --git a/server/src/main/java/org/opensearch/index/mapper/StringFieldType.java b/server/src/main/java/org/opensearch/index/mapper/StringFieldType.java index ed2d3d6c25db5..2d8efd9e80b99 100644 --- a/server/src/main/java/org/opensearch/index/mapper/StringFieldType.java +++ b/server/src/main/java/org/opensearch/index/mapper/StringFieldType.java @@ -66,6 +66,7 @@ */ public abstract class StringFieldType extends TermBasedFieldType { + private static final String IGNORED_VALUE_FIELD_SUFFIX = ".ignored_value"; private static final Pattern WILDCARD_PATTERN = Pattern.compile("(\\\\.)|([?*]+)"); public StringFieldType( @@ -255,4 +256,8 @@ public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower includeUpper ); } + + public String derivedSourceIgnoreFieldName() { + return name() + IGNORED_VALUE_FIELD_SUFFIX; + } } diff --git a/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java index 50b30e65e315a..46f8ffac3039e 100644 --- a/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java @@ -44,6 +44,7 @@ import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StoredField; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.Term; import org.apache.lucene.queries.intervals.Intervals; @@ -395,7 +396,7 @@ protected List> getParameters() { ); } - protected TextFieldType buildFieldType(FieldType fieldType, BuilderContext context) { + protected TextFieldType buildFieldType(FieldType fieldType, MultiFields multiFields, BuilderContext context) { NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer(); NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer(); NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer(); @@ -417,6 +418,12 @@ protected TextFieldType buildFieldType(FieldType fieldType, BuilderContext conte if (fieldData.getValue()) { ft.setFielddata(true, freqFilter.getValue()); } + if (context.indexSettings().getAsBoolean(IndexSettings.INDEX_DERIVED_SOURCE_SETTING.getKey(), false)) { + ft.setHasDerivedSourceSupportedKeyword(TextFieldMapper.DerivedSourceHelper.hasDerivedSourceSupportedKeyword(multiFields)); + ft.setKeywordIgnoredLengthForDerivedSource( + TextFieldMapper.DerivedSourceHelper.getIgnoredLengthForDerivedSourceSupportedKeyword(multiFields) + ); + } return ft; } @@ -471,17 +478,15 @@ protected PhraseFieldMapper buildPhraseMapper(FieldType fieldType, TextFieldType @Override public TextFieldMapper build(BuilderContext context) { FieldType fieldType = TextParams.buildFieldType(index, store, indexOptions, norms, termVectors); - TextFieldType tft = buildFieldType(fieldType, context); - if (context.indexSettings().getAsBoolean(IndexSettings.INDEX_DERIVED_SOURCE_SETTING.getKey(), false)) { - fieldType.setStored(true); - } + MultiFields multiFields = multiFieldsBuilder.build(this, context); + TextFieldType tft = buildFieldType(fieldType, multiFields, context); return new TextFieldMapper( name, fieldType, tft, buildPrefixMapper(context, fieldType, tft), buildPhraseMapper(fieldType, tft), - multiFieldsBuilder.build(this, context), + multiFields, copyTo.build(), this ); @@ -767,6 +772,8 @@ public static class TextFieldType extends StringFieldType { private FielddataFrequencyFilter filter; private PrefixFieldType prefixFieldType; private boolean indexPhrases = false; + private boolean hasDerivedSourceSupportedKeyword = false; + private int keywordIgnoredLengthForDerivedSource = -1; public TextFieldType(String name, boolean indexed, boolean stored, TextSearchInfo tsi, Map meta) { super(name, indexed, stored, false, tsi, meta); @@ -832,6 +839,22 @@ public PrefixFieldType getPrefixFieldType() { return this.prefixFieldType; } + public void setHasDerivedSourceSupportedKeyword(boolean hasDerivedSourceSupportedKeyword) { + this.hasDerivedSourceSupportedKeyword = hasDerivedSourceSupportedKeyword; + } + + public boolean getHasDerivedSourceSupportedKeyword() { + return hasDerivedSourceSupportedKeyword; + } + + public void setKeywordIgnoredLengthForDerivedSource(int keywordIgnoredLengthForDerivedSource) { + this.keywordIgnoredLengthForDerivedSource = keywordIgnoredLengthForDerivedSource; + } + + public int getKeywordIgnoredLengthForDerivedSource() { + return keywordIgnoredLengthForDerivedSource; + } + @Override public String typeName() { return CONTENT_TYPE; @@ -1053,6 +1076,15 @@ protected void parseCreateField(ParseContext context) throws IOException { context.doc().add(new Field(phraseFieldMapper.fieldType().name(), value, phraseFieldMapper.fieldType)); } } + + // Explicitly add stored field, if there is no supporting sub keyword present or value length exceeds when + // compared to maximum value of ignore_above from derived source supporting keywords + if (context.indexSettings().isDerivedSourceEnabled() + && fieldType.stored() == false + && (fieldType().getHasDerivedSourceSupportedKeyword() == false + || fieldType().getKeywordIgnoredLengthForDerivedSource() < value.length())) { + context.doc().add(new StoredField(fieldType().derivedSourceIgnoreFieldName(), value)); + } } @Override @@ -1242,11 +1274,95 @@ protected void canDeriveSourceInternal() {} */ @Override protected DerivedFieldGenerator derivedFieldGenerator() { - return new DerivedFieldGenerator(mappedFieldType, null, new StoredFieldFetcher(mappedFieldType, simpleName())) { + final List fieldValueFetchers = TextFieldMapper.DerivedSourceHelper + .getDerivedSourceSupportedKeywordValueFetchers(multiFields, simpleName()); + + // Override to read from the special ignored value field + final MappedFieldType ignoredFieldType = new MappedFieldType( + fieldType().derivedSourceIgnoreFieldName(), + false, // not searchable + true, // stored + false, // no doc values + TextSearchInfo.NONE, + Collections.emptyMap() + ) { + @Override + public String typeName() { + return "text"; + } + + @Override + public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) { + return null; + } + + @Override + public Query termQuery(Object value, QueryShardContext context) { + return null; + } + }; + + fieldValueFetchers.add(new StoredFieldFetcher(ignoredFieldType, simpleName())); + final FieldValueFetcher compositeFieldValueFetcher = new CompositeFieldValueFetcher(simpleName(), fieldValueFetchers); + + return new DerivedFieldGenerator(mappedFieldType, compositeFieldValueFetcher, null) { @Override public FieldValueType getDerivedFieldPreference() { - return FieldValueType.STORED; + return FieldValueType.DOC_VALUES; } }; } + + private static final class DerivedSourceHelper { + + private static boolean hasDerivedSourceSupportedKeyword(MultiFields multiFields) { + for (Mapper mapper : multiFields) { + if (mapper instanceof KeywordFieldMapper kw) { + if (isDerivedSourceSupportedKeyword(kw)) { + return true; + } + } + } + return false; + } + + private static boolean isDerivedSourceSupportedKeyword(KeywordFieldMapper keywordFieldMapper) { + return (keywordFieldMapper.fieldType().normalizer() == null + || Lucene.KEYWORD_ANALYZER.equals(keywordFieldMapper.fieldType().normalizer())) + && (keywordFieldMapper.fieldType().isStored() || keywordFieldMapper.fieldType().hasDocValues()); + } + + private static int getIgnoredLengthForDerivedSourceSupportedKeyword(MultiFields multiFields) { + int ignoredLength = -1; + for (Mapper mapper : multiFields) { + if (mapper instanceof KeywordFieldMapper kw) { + if (isDerivedSourceSupportedKeyword(kw)) { + ignoredLength = Math.max(ignoredLength, kw.ignoreAbove()); + } + } + } + return ignoredLength; + } + + private static List getDerivedSourceSupportedKeywordValueFetchers( + MultiFields multiFields, + String textFieldName + ) { + List fetchers = new ArrayList<>(); + for (Mapper mapper : multiFields) { + if (mapper instanceof KeywordFieldMapper kw) { + if (isDerivedSourceSupportedKeyword(kw)) { + fetchers.add(getKeywordFieldValueFetcher(kw, textFieldName)); + } + } + } + return fetchers; + } + + private static FieldValueFetcher getKeywordFieldValueFetcher(KeywordFieldMapper keywordFieldMapper, String textFieldName) { + return keywordFieldMapper.fieldType().hasDocValues() + ? new SortedSetDocValuesFetcher(keywordFieldMapper.fieldType(), textFieldName) + : new StoredFieldFetcher(keywordFieldMapper.fieldType(), textFieldName); + } + } }