From fdaa9fe7fab329c3e388a12d0e3ee95efa947417 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 8 Jul 2025 21:58:17 +0200 Subject: [PATCH] Change match_only_text's value fetcher to use `SortedBinaryDocValues` instead of interacting with doc values api directly. This way, via field data abstraction, the right doc values type is used, and the right conversions happen. Values of all field types will get converted to strings. --- .../extras/MatchOnlyTextFieldMapper.java | 23 +-- .../test/logsdb/20_synthetic_source.yml | 149 ++++++++++++++++++ 2 files changed, 162 insertions(+), 10 deletions(-) create mode 100644 x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/logsdb/20_synthetic_source.yml diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index bb9babc14c1bb..04ace5ccc4157 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -14,7 +14,6 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.StoredField; -import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; @@ -48,6 +47,7 @@ import org.elasticsearch.index.mapper.BlockStoredFieldsReader; import org.elasticsearch.index.mapper.DocumentParserContext; import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperBuilderContext; import org.elasticsearch.index.mapper.SourceValueFetcher; import org.elasticsearch.index.mapper.StringFieldType; @@ -254,7 +254,8 @@ private IOFunction, IOExcepti if (parent.isStored()) { return storedFieldFetcher(parentField); } else if (parent.hasDocValues()) { - return docValuesFieldFetcher(parentField); + var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH); + return docValuesFieldFetcher(ifd); } else { assert false : "parent field should either be stored or have doc values"; } @@ -266,7 +267,8 @@ private IOFunction, IOExcepti if (fieldType.isStored()) { return storedFieldFetcher(fieldType.name()); } else if (fieldType.hasDocValues()) { - return docValuesFieldFetcher(fieldType.name()); + var ifd = searchExecutionContext.getForField(fieldType, MappedFieldType.FielddataOperation.SEARCH); + return docValuesFieldFetcher(ifd); } else { assert false : "multi field should either be stored or have doc values"; } @@ -291,15 +293,16 @@ private IOFunction, IOExcepti }; } - private static IOFunction, IOException>> docValuesFieldFetcher(String name) { + private static IOFunction, IOException>> docValuesFieldFetcher( + IndexFieldData ifd + ) { return context -> { - var sortedDocValues = DocValues.getSortedSet(context.reader(), name); + var sortedBinaryDocValues = ifd.load(context).getBytesValues(); return docId -> { - if (sortedDocValues.advanceExact(docId)) { - var values = new ArrayList<>(sortedDocValues.docValueCount()); - for (int i = 0; i < sortedDocValues.docValueCount(); i++) { - long ord = sortedDocValues.nextOrd(); - values.add(sortedDocValues.lookupOrd(ord).utf8ToString()); + if (sortedBinaryDocValues.advanceExact(docId)) { + var values = new ArrayList<>(sortedBinaryDocValues.docValueCount()); + for (int i = 0; i < sortedBinaryDocValues.docValueCount(); i++) { + values.add(sortedBinaryDocValues.nextValue().utf8ToString()); } return values; } else { diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/logsdb/20_synthetic_source.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/logsdb/20_synthetic_source.yml new file mode 100644 index 0000000000000..0a56c6d74215c --- /dev/null +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/logsdb/20_synthetic_source.yml @@ -0,0 +1,149 @@ +--- +synthetic_source match_only_text with wildcard as parent field: + - requires: + cluster_features: [ "mapper.source.mode_from_index_setting" ] + reason: "Source mode configured through index setting" + + - do: + indices.create: + index: synthetic_source_test + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + foo: + type: wildcard + fields: + text: + type: match_only_text + + - do: + index: + index: synthetic_source_test + id: "1" + refresh: true + body: + foo: "Apache Lucene powers Elasticsearch" + + - do: + search: + index: synthetic_source_test + body: + query: + match_phrase: + foo.text: apache lucene + + - match: { "hits.total.value": 1 } + - match: + hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch" + +--- +synthetic_source match_only_text with number as parent field: + - requires: + cluster_features: [ "mapper.source.mode_from_index_setting" ] + reason: "Source mode configured through index setting" + + - do: + indices.create: + index: synthetic_source_test + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + foo: + type: long + fields: + text: + type: match_only_text + + - do: + index: + index: synthetic_source_test + id: "1" + refresh: true + body: + foo: [1, 5] + + - do: + search: + index: synthetic_source_test + body: + query: + match_phrase: + foo.text: 1 5 + + - match: { "hits.total.value": 0 } + + - do: + indices.create: + index: stored_source_test + body: + mappings: + properties: + foo: + type: long + fields: + text: + type: match_only_text + + - do: + index: + index: stored_source_test + id: "1" + refresh: true + body: + foo: [1, 5] + + - do: + search: + index: stored_source_test + body: + query: + match_phrase: + foo.text: 1 5 + + - match: { "hits.total.value": 0 } + +--- +synthetic_source match_only_text with scaled_float as parent field: + - requires: + cluster_features: [ "mapper.source.mode_from_index_setting" ] + reason: "Source mode configured through index setting" + + - do: + indices.create: + index: synthetic_source_test + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + foo: + type: scaled_float + scaling_factor: 10 + fields: + text: + type: match_only_text + + - do: + index: + index: synthetic_source_test + id: "1" + refresh: true + body: + foo: [1.1, 5.5] + + - do: + search: + index: synthetic_source_test + body: + query: + match_phrase: + foo.text: 1.1 5.5 + + - match: { "hits.total.value": 0 }