From 1413a65a166a9dbaa98509e94e0e7d93f3e341bd Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Tue, 8 Jul 2025 18:21:35 -0400 Subject: [PATCH] Change match_only_text's value fetcher to use `SortedBinaryDocValues` instead of interacting with doc values api directly. (#130854) This pulls #130845 into the serverless fix branch for patch deployment. Original description: Change match_only_text's value fetcher to use SortedBinaryDocValues instead of interacting with doc values api directly. This way, via field data abstraction, the right doc values type is used, and the right conversions happen. Values of all field types will get converted to strings. Co-authored-by: Martijn van Groningen --- .../extras/MatchOnlyTextFieldMapper.java | 23 +-- .../test/logsdb/20_synthetic_source.yml | 149 ++++++++++++++++++ 2 files changed, 162 insertions(+), 10 deletions(-) create mode 100644 x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/logsdb/20_synthetic_source.yml diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index bb9babc14c1bb..04ace5ccc4157 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -14,7 +14,6 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.StoredField; -import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; @@ -48,6 +47,7 @@ import org.elasticsearch.index.mapper.BlockStoredFieldsReader; import org.elasticsearch.index.mapper.DocumentParserContext; import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperBuilderContext; import org.elasticsearch.index.mapper.SourceValueFetcher; import org.elasticsearch.index.mapper.StringFieldType; @@ -254,7 +254,8 @@ private IOFunction, IOExcepti if (parent.isStored()) { return storedFieldFetcher(parentField); } else if (parent.hasDocValues()) { - return docValuesFieldFetcher(parentField); + var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH); + return docValuesFieldFetcher(ifd); } else { assert false : "parent field should either be stored or have doc values"; } @@ -266,7 +267,8 @@ private IOFunction, IOExcepti if (fieldType.isStored()) { return storedFieldFetcher(fieldType.name()); } else if (fieldType.hasDocValues()) { - return docValuesFieldFetcher(fieldType.name()); + var ifd = searchExecutionContext.getForField(fieldType, MappedFieldType.FielddataOperation.SEARCH); + return docValuesFieldFetcher(ifd); } else { assert false : "multi field should either be stored or have doc values"; } @@ -291,15 +293,16 @@ private IOFunction, IOExcepti }; } - private static IOFunction, IOException>> docValuesFieldFetcher(String name) { + private static IOFunction, IOException>> docValuesFieldFetcher( + IndexFieldData ifd + ) { return context -> { - var sortedDocValues = DocValues.getSortedSet(context.reader(), name); + var sortedBinaryDocValues = ifd.load(context).getBytesValues(); return docId -> { - if (sortedDocValues.advanceExact(docId)) { - var values = new ArrayList<>(sortedDocValues.docValueCount()); - for (int i = 0; i < sortedDocValues.docValueCount(); i++) { - long ord = sortedDocValues.nextOrd(); - values.add(sortedDocValues.lookupOrd(ord).utf8ToString()); + if (sortedBinaryDocValues.advanceExact(docId)) { + var values = new ArrayList<>(sortedBinaryDocValues.docValueCount()); + for (int i = 0; i < sortedBinaryDocValues.docValueCount(); i++) { + values.add(sortedBinaryDocValues.nextValue().utf8ToString()); } return values; } else { diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/logsdb/20_synthetic_source.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/logsdb/20_synthetic_source.yml new file mode 100644 index 0000000000000..0a56c6d74215c --- /dev/null +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/logsdb/20_synthetic_source.yml @@ -0,0 +1,149 @@ +--- +synthetic_source match_only_text with wildcard as parent field: + - requires: + cluster_features: [ "mapper.source.mode_from_index_setting" ] + reason: "Source mode configured through index setting" + + - do: + indices.create: + index: synthetic_source_test + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + foo: + type: wildcard + fields: + text: + type: match_only_text + + - do: + index: + index: synthetic_source_test + id: "1" + refresh: true + body: + foo: "Apache Lucene powers Elasticsearch" + + - do: + search: + index: synthetic_source_test + body: + query: + match_phrase: + foo.text: apache lucene + + - match: { "hits.total.value": 1 } + - match: + hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch" + +--- +synthetic_source match_only_text with number as parent field: + - requires: + cluster_features: [ "mapper.source.mode_from_index_setting" ] + reason: "Source mode configured through index setting" + + - do: + indices.create: + index: synthetic_source_test + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + foo: + type: long + fields: + text: + type: match_only_text + + - do: + index: + index: synthetic_source_test + id: "1" + refresh: true + body: + foo: [1, 5] + + - do: + search: + index: synthetic_source_test + body: + query: + match_phrase: + foo.text: 1 5 + + - match: { "hits.total.value": 0 } + + - do: + indices.create: + index: stored_source_test + body: + mappings: + properties: + foo: + type: long + fields: + text: + type: match_only_text + + - do: + index: + index: stored_source_test + id: "1" + refresh: true + body: + foo: [1, 5] + + - do: + search: + index: stored_source_test + body: + query: + match_phrase: + foo.text: 1 5 + + - match: { "hits.total.value": 0 } + +--- +synthetic_source match_only_text with scaled_float as parent field: + - requires: + cluster_features: [ "mapper.source.mode_from_index_setting" ] + reason: "Source mode configured through index setting" + + - do: + indices.create: + index: synthetic_source_test + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + foo: + type: scaled_float + scaling_factor: 10 + fields: + text: + type: match_only_text + + - do: + index: + index: synthetic_source_test + id: "1" + refresh: true + body: + foo: [1.1, 5.5] + + - do: + search: + index: synthetic_source_test + body: + query: + match_phrase: + foo.text: 1.1 5.5 + + - match: { "hits.total.value": 0 }