diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index 59f43360b09b3..387477570ece0 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -14,6 +14,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; @@ -40,7 +41,6 @@ import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData; import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData; -import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; import org.elasticsearch.index.mapper.BlockLoader; import org.elasticsearch.index.mapper.BlockSourceReader; @@ -133,7 +133,9 @@ private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) { tsi, indexAnalyzer, context.isSourceSynthetic(), - meta.getValue() + meta.getValue(), + withinMultiField, + multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() ); return ft; } @@ -163,17 +165,24 @@ public static class MatchOnlyTextFieldType extends StringFieldType { private final TextFieldType textFieldType; private final String originalName; + private final boolean withinMultiField; + private final boolean hasCompatibleMultiFields; + public MatchOnlyTextFieldType( String name, TextSearchInfo tsi, Analyzer indexAnalyzer, boolean isSyntheticSource, - Map meta + Map meta, + boolean withinMultiField, + boolean hasCompatibleMultiFields ) { super(name, true, false, false, tsi, meta); this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer); this.textFieldType = new TextFieldType(name, isSyntheticSource); this.originalName = isSyntheticSource ? name + "._original" : null; + this.withinMultiField = withinMultiField; + this.hasCompatibleMultiFields = hasCompatibleMultiFields; } public MatchOnlyTextFieldType(String name) { @@ -182,7 +191,9 @@ public MatchOnlyTextFieldType(String name) { new TextSearchInfo(Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER), Lucene.STANDARD_ANALYZER, false, - Collections.emptyMap() + Collections.emptyMap(), + false, + false ); } @@ -209,16 +220,34 @@ private IOFunction, IOExcepti "Field [" + name() + "] of type [" + CONTENT_TYPE + "] cannot run positional queries since [_source] is disabled." ); } - if (searchExecutionContext.isSourceSynthetic()) { + if (searchExecutionContext.isSourceSynthetic() && withinMultiField) { + String parentField = searchExecutionContext.parentPath(name()); + var parent = searchExecutionContext.lookup().fieldType(parentField); + if (parent.isStored()) { + return storedFieldFetcher(parentField); + } else if (parent.hasDocValues()) { + return docValuesFieldFetcher(parentField); + } else { + assert false : "parent field should either be stored or have doc values"; + } + } else if (searchExecutionContext.isSourceSynthetic() && hasCompatibleMultiFields) { + var mapper = (MatchOnlyTextFieldMapper) searchExecutionContext.getMappingLookup().getMapper(name()); + var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(mapper); + if (kwd != null) { + var fieldType = kwd.fieldType(); + if (fieldType.isStored()) { + return storedFieldFetcher(fieldType.name()); + } else if (fieldType.hasDocValues()) { + return docValuesFieldFetcher(fieldType.name()); + } else { + assert false : "multi field should either be stored or have doc values"; + } + } else { + assert false : "multi field of type keyword should exist"; + } + } else if (searchExecutionContext.isSourceSynthetic()) { String name = storedFieldNameForSyntheticSource(); - StoredFieldLoader loader = StoredFieldLoader.create(false, Set.of(name)); - return context -> { - LeafStoredFieldLoader leafLoader = loader.getLoader(context, null); - return docId -> { - leafLoader.advanceTo(docId); - return leafLoader.storedFields().get(name); - }; - }; + return storedFieldFetcher(name); } return context -> { ValueFetcher valueFetcher = valueFetcher(searchExecutionContext, null); @@ -234,6 +263,35 @@ private IOFunction, IOExcepti }; } + private static IOFunction, IOException>> docValuesFieldFetcher(String name) { + return context -> { + var sortedDocValues = DocValues.getSortedSet(context.reader(), name); + return docId -> { + if (sortedDocValues.advanceExact(docId)) { + var values = new ArrayList<>(sortedDocValues.docValueCount()); + for (int i = 0; i < sortedDocValues.docValueCount(); i++) { + long ord = sortedDocValues.nextOrd(); + values.add(sortedDocValues.lookupOrd(ord).utf8ToString()); + } + return values; + } else { + return List.of(); + } + }; + }; + } + + private static IOFunction, IOException>> storedFieldFetcher(String name) { + var loader = StoredFieldLoader.create(false, Set.of(name)); + return context -> { + var leafLoader = loader.getLoader(context, null); + return docId -> { + leafLoader.advanceTo(docId); + return leafLoader.storedFields().get(name); + }; + }; + } + private Query toQuery(Query query, SearchExecutionContext searchExecutionContext) { return new ConstantScoreQuery( new SourceConfirmedTextQuery(query, getValueFetcherProvider(searchExecutionContext), indexAnalyzer) @@ -506,18 +564,27 @@ public MatchOnlyTextFieldType fieldType() { @Override protected SyntheticSourceSupport syntheticSourceSupport() { - return new SyntheticSourceSupport.Native( - () -> new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), fieldType().name(), leafName()) { - @Override - protected void write(XContentBuilder b, Object value) throws IOException { - if (value instanceof BytesRef valueBytes) { - b.value(valueBytes.utf8ToString()); - } else { - assert value instanceof String; - b.value(value.toString()); + if (storeSource) { + return new SyntheticSourceSupport.Native( + () -> new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), fieldType().name(), leafName()) { + @Override + protected void write(XContentBuilder b, Object value) throws IOException { + if (value instanceof BytesRef valueBytes) { + b.value(valueBytes.utf8ToString()); + } else { + assert value instanceof String; + b.value(value.toString()); + } } } + ); + } else { + var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(this); + if (kwd != null) { + return new SyntheticSourceSupport.Native(() -> kwd.syntheticFieldLoader(fullPath(), leafName())); } - ); + assert false : "there should be a suite field mapper with native synthetic source support"; + return super.syntheticSourceSupport(); + } } } diff --git a/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml index 821ab46b1bd64..1d52038e29d45 100644 --- a/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml +++ b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml @@ -394,4 +394,170 @@ synthetic_source with copy_to: - match: hits.hits.0.fields.copy.0: "Apache Lucene powers Elasticsearch" +--- +synthetic_source match_only_text as multi-field: + - requires: + cluster_features: [ "mapper.source.mode_from_index_setting" ] + reason: "Source mode configured through index setting" + + - do: + indices.create: + index: synthetic_source_test + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + foo: + type: keyword + fields: + text: + type: match_only_text + + - do: + index: + index: synthetic_source_test + id: "1" + refresh: true + body: + foo: "Apache Lucene powers Elasticsearch" + + - do: + search: + index: synthetic_source_test + body: + query: + match_phrase: + foo.text: apache lucene + + - match: { "hits.total.value": 1 } + - match: + hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch" + +--- +synthetic_source match_only_text as multi-field with stored keyword as parent: + - requires: + cluster_features: [ "mapper.source.mode_from_index_setting" ] + reason: "Source mode configured through index setting" + + - do: + indices.create: + index: synthetic_source_test + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + foo: + type: keyword + store: true + doc_values: false + fields: + text: + type: match_only_text + + - do: + index: + index: synthetic_source_test + id: "1" + refresh: true + body: + foo: "Apache Lucene powers Elasticsearch" + + - do: + search: + index: synthetic_source_test + body: + query: + match_phrase: + foo.text: apache lucene + + - match: { "hits.total.value": 1 } + - match: + hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch" +--- +synthetic_source match_only_text with multi-field: + - requires: + cluster_features: [ "mapper.source.mode_from_index_setting" ] + reason: "Source mode configured through index setting" + + - do: + indices.create: + index: synthetic_source_test + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + foo: + type: match_only_text + fields: + raw: + type: keyword + + - do: + index: + index: synthetic_source_test + id: "1" + refresh: true + body: + foo: "Apache Lucene powers Elasticsearch" + + - do: + search: + index: synthetic_source_test + body: + query: + match_phrase: + foo: apache lucene + + - match: { "hits.total.value": 1 } + - match: + hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch" + +--- +synthetic_source match_only_text with stored multi-field: + - requires: + cluster_features: [ "mapper.source.mode_from_index_setting" ] + reason: "Source mode configured through index setting" + + - do: + indices.create: + index: synthetic_source_test + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + foo: + type: match_only_text + fields: + raw: + type: keyword + store: true + doc_values: false + + - do: + index: + index: synthetic_source_test + id: "1" + refresh: true + body: + foo: "Apache Lucene powers Elasticsearch" + + - do: + search: + index: synthetic_source_test + body: + query: + match_phrase: + foo: apache lucene + + - match: { "hits.total.value": 1 } + - match: + hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch" diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/20_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/20_synthetic_source.yml new file mode 100644 index 0000000000000..abc83566d00b8 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mapping/20_synthetic_source.yml @@ -0,0 +1,81 @@ +--- +synthetic_source text as multi-field: + - requires: + cluster_features: [ "mapper.source.mode_from_index_setting" ] + reason: "Source mode configured through index setting" + + - do: + indices.create: + index: synthetic_source_test + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + foo: + type: keyword + fields: + text: + type: text + + - do: + index: + index: synthetic_source_test + id: "1" + refresh: true + body: + foo: "Apache Lucene powers Elasticsearch" + + - do: + search: + index: synthetic_source_test + body: + query: + match_phrase: + foo.text: apache lucene + + - match: { "hits.total.value": 1 } + - match: + hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch" + +--- +synthetic_source text with multi-field: + - requires: + cluster_features: [ "mapper.source.mode_from_index_setting" ] + reason: "Source mode configured through index setting" + + - do: + indices.create: + index: synthetic_source_test + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + foo: + type: text + fields: + raw: + type: keyword + + - do: + index: + index: synthetic_source_test + id: "1" + refresh: true + body: + foo: "Apache Lucene powers Elasticsearch" + + - do: + search: + index: synthetic_source_test + body: + query: + match_phrase: + foo: apache lucene + + - match: { "hits.total.value": 1 } + - match: + hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"