From 5a4823256984f4a627e3cac6cc5a31bcbdb57754 Mon Sep 17 00:00:00 2001
From: Dmitry Kubikov <dmitry.kubikov@elastic.co>
Date: Fri, 31 Oct 2025 15:00:58 -0700
Subject: [PATCH 1/6] Store keyword fields that trip ignore_above in binary doc
 values

---
 .../extras/MatchOnlyTextFieldMapper.java      | 66 +++++++++++----
 .../test/match_only_text/10_basic.yml         |  4 +-
 ...aryDocValuesSyntheticFieldLoaderLayer.java | 81 +++++++++++++++++++
 .../index/mapper/KeywordFieldMapper.java      | 66 +++++++++++++--
 4 files changed, 193 insertions(+), 24 deletions(-)
 create mode 100644 server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java
diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java
index d54764d803101..ab7571e91769b 100644
--- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java
+++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java
@@ -14,6 +14,7 @@
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.StoredField;
+import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.Term;
@@ -30,6 +31,7 @@
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOFunction;
 import org.elasticsearch.common.CheckedIntFunction;
+import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.text.UTF8DecodingReader;
 import org.elasticsearch.common.unit.Fuzziness;
@@ -297,12 +299,18 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
 
             if (parent instanceof KeywordFieldMapper.KeywordFieldType keywordParent
                 && keywordParent.ignoreAbove().valuesPotentiallyIgnored()) {
-                final String parentFallbackFieldName = keywordParent.syntheticSourceFallbackFieldName();
                 if (parent.isStored()) {
-                    return storedFieldFetcher(parentFieldName, parentFallbackFieldName);
+                    // if the parent keyword field has ignore_above set, then any ignored values will be stored under a fallback field
+                    return combineFieldFetchers(
+                        storedFieldFetcher(parentFieldName),
+                        binaryDocValuesFieldFetcher(keywordParent.syntheticSourceFallbackFieldName())
+                    );
                 } else if (parent.hasDocValues()) {
                     var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH);
-                    return combineFieldFetchers(docValuesFieldFetcher(ifd), storedFieldFetcher(parentFallbackFieldName));
+                    return combineFieldFetchers(
+                        docValuesFieldFetcher(ifd),
+                        binaryDocValuesFieldFetcher(keywordParent.syntheticSourceFallbackFieldName())
+                    );
                 }
             }
 
@@ -325,22 +333,16 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
             final KeywordFieldMapper.KeywordFieldType keywordDelegate
         ) {
             if (keywordDelegate.ignoreAbove().valuesPotentiallyIgnored()) {
-                // because we don't know whether the delegate field will be ignored during parsing, we must also check the current field
-                String fieldName = name();
-                String fallbackName = syntheticSourceFallbackFieldName();
-
-                // delegate field names
                 String delegateFieldName = keywordDelegate.name();
-                String delegateFieldFallbackName = keywordDelegate.syntheticSourceFallbackFieldName();
+                // bc we don't know whether the delegate will ignore a value, we must also check the fallback field created by this
+                // match_only_text field
+                String fallbackName = syntheticSourceFallbackFieldName();
 
                 if (keywordDelegate.isStored()) {
-                    return storedFieldFetcher(delegateFieldName, delegateFieldFallbackName, fieldName, fallbackName);
+                    return storedFieldFetcher(delegateFieldName, fallbackName);
                 } else if (keywordDelegate.hasDocValues()) {
                     var ifd = searchExecutionContext.getForField(keywordDelegate, MappedFieldType.FielddataOperation.SEARCH);
-                    return combineFieldFetchers(
-                        docValuesFieldFetcher(ifd),
-                        storedFieldFetcher(delegateFieldFallbackName, fieldName, fallbackName)
-                    );
+                    return combineFieldFetchers(docValuesFieldFetcher(ifd), storedFieldFetcher(fallbackName));
                 }
             }
 
@@ -374,6 +376,42 @@ private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IO
             };
         }
 
+        /**
+         * Used exclusively to load ignored values from binary doc values. These values are stored in a separate fallback field in order to
+         * retain the original value and hence be able to support synthetic source.
+         */
+        private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> binaryDocValuesFieldFetcher(
+            String fieldName
+        ) {
+            return context -> {
+                var binaryDocValues = DocValues.getBinary(context.reader(), fieldName);
+                return docId -> {
+                    if (binaryDocValues == null || binaryDocValues.advanceExact(docId) == false) {
+                        return List.of();
+                    }
+
+                    // see KeywordFieldMapper.MultiValuedBinaryDocValuesField for context on how to decode these binary doc values back into
+                    // strings
+                    BytesRef docValuesBytes = binaryDocValues.binaryValue();
+
+                    try (ByteArrayStreamInput stream = new ByteArrayStreamInput()) {
+                        stream.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length);
+
+                        int docValueCount = stream.readVInt();
+                        var values = new ArrayList<>(docValueCount);
+
+                        for (int i = 0; i < docValueCount; i++) {
+                            // this function already knows how to decode the underlying bytes array, so no need to explicitly call VInt()
+                            BytesRef valueBytes = stream.readBytesRef();
+                            values.add(valueBytes.utf8ToString());
+                        }
+
+                        return values;
+                    }
+                };
+            };
+        }
+
         private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> storedFieldFetcher(String... names) {
             var loader = StoredFieldLoader.create(false, Set.of(names));
             return context -> {
diff --git a/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml
index 0050618beeb67..581841df3fe52 100644
--- a/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml
+++ b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml
@@ -465,7 +465,7 @@ synthetic_source match_only_text as multi-field with ignored keyword as parent:
         id: "1"
         refresh: true
         body:
-          foo: [ "Apache Lucene powers Elasticsearch", "Apache" ]
+          foo: [ "Apache Lucene powers Elasticsearch", "Apache", "Apache Lucene" ]
 
   - do:
       search:
@@ -477,7 +477,7 @@ synthetic_source match_only_text as multi-field with ignored keyword as parent:
 
   - match: { "hits.total.value": 1 }
   - match:
-      hits.hits.0._source.foo: [ "Apache", "Apache Lucene powers Elasticsearch" ]
+      hits.hits.0._source.foo: [ "Apache", "Apache Lucene powers Elasticsearch", "Apache Lucene" ]
 
 ---
 synthetic_source match_only_text as multi-field with stored keyword as parent:
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java b/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java
new file mode 100644
index 0000000000000..bc25251fead6a
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java
@@ -0,0 +1,81 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.index.mapper;
+
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
+import org.elasticsearch.xcontent.XContentBuilder;
+
+import java.io.IOException;
+
+public final class BinaryDocValuesSyntheticFieldLoaderLayer implements CompositeSyntheticFieldLoader.DocValuesLayer {
+
+    private final String fieldName;
+
+    // the binary doc values for a document are all encoded in a single binary array, which this stream knows how to read
+    // the doc values in the array take the form of [doc value count][length of value 1][value 1][length of value 2][value 2]...
+    private final ByteArrayStreamInput stream = new ByteArrayStreamInput();
+    private BytesRef docValuesBytes;
+    private int valueCount;
+
+    public BinaryDocValuesSyntheticFieldLoaderLayer(String fieldName) {
+        this.fieldName = fieldName;
+    }
+
+    @Override
+    public long valueCount() {
+        return valueCount;
+    }
+
+    @Override
+    public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException {
+        BinaryDocValues docValues = leafReader.getBinaryDocValues(fieldName);
+
+        // there are no values associated with this field
+        if (docValues == null) return null;
+
+        return docId -> {
+            // there are no more documents to process
+            if (docValues.advanceExact(docId) == false) {
+                valueCount = 0;
+                return false;
+            }
+
+            // otherwise, extract the doc values into a stream to later read from
+            docValuesBytes = docValues.binaryValue();
+            stream.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length);
+            valueCount = stream.readVInt();
+
+            return hasValue();
+        };
+    }
+
+    @Override
+    public boolean hasValue() {
+        return valueCount > 0;
+    }
+
+    @Override
+    public void write(XContentBuilder b) throws IOException {
+        for (int i = 0; i < valueCount; i++) {
+            // this function already knows how to decode the underlying bytes array, so no need to explicitly call VInt()
+            BytesRef valueBytes = stream.readBytesRef();
+            b.value(valueBytes.utf8ToString());
+        }
+    }
+
+    @Override
+    public String fieldName() {
+        return fieldName;
+    }
+
+}
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
index 773c42d584d4f..daa5894c289cb 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
@@ -40,6 +40,8 @@
 import org.apache.lucene.util.automaton.CompiledAutomaton;
 import org.apache.lucene.util.automaton.CompiledAutomaton.AUTOMATON_TYPE;
 import org.apache.lucene.util.automaton.Operations;
+import org.elasticsearch.ElasticsearchException;
+import org.elasticsearch.common.io.stream.BytesStreamOutput;
 import org.elasticsearch.common.lucene.BytesRefs;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.lucene.search.AutomatonQueries;
@@ -85,6 +87,7 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
@@ -1160,7 +1163,14 @@ private boolean indexValue(DocumentParserContext context, XContentString value)
                 var utfBytes = value.bytes();
                 var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length());
                 final String fieldName = fieldType().syntheticSourceFallbackFieldName();
-                context.doc().add(new StoredField(fieldName, bytesRef));
+
+                // store the value in a binary doc values field, create one if it doesn't exist
+                MultiValuedBinaryDocValuesField field = (MultiValuedBinaryDocValuesField) context.doc().getByKey(fieldName);
+                if (field == null) {
+                    field = new MultiValuedBinaryDocValuesField(fieldName);
+                    context.doc().addWithKey(fieldName, field);
+                }
+                field.add(bytesRef);
             }
 
             return false;
@@ -1323,15 +1333,55 @@ protected BytesRef preserve(BytesRef value) {
         // extra copy of the field for supporting synthetic source. This layer will check that copy.
         if (fieldType().ignoreAbove.valuesPotentiallyIgnored()) {
             final String fieldName = fieldType().syntheticSourceFallbackFieldName();
-            layers.add(new CompositeSyntheticFieldLoader.StoredFieldLayer(fieldName) {
-                @Override
-                protected void writeValue(Object value, XContentBuilder b) throws IOException {
-                    BytesRef ref = (BytesRef) value;
-                    b.utf8Value(ref.bytes, ref.offset, ref.length);
-                }
-            });
+            layers.add(new BinaryDocValuesSyntheticFieldLoaderLayer(fieldName));
         }
 
         return new CompositeSyntheticFieldLoader(leafFieldName, fullFieldName, layers);
     }
+
+    /**
+     * A custom implementation of {@link org.apache.lucene.index.BinaryDocValues} that uses a {@link Set} to maintain a collection of unique
+     * binary doc values for fields with multiple values per document.
+     */
+    private static final class MultiValuedBinaryDocValuesField extends CustomDocValuesField {
+
+        private final Set<BytesRef> uniqueValues;
+        private int docValuesByteCount = 0;
+
+        MultiValuedBinaryDocValuesField(String name) {
+            super(name);
+            // linked hash set to maintain insertion order of elements
+            uniqueValues = new LinkedHashSet<>();
+        }
+
+        public void add(final BytesRef value) {
+            uniqueValues.add(value);
+            // might as well track these on the go as opposed to having to loop through all entries later
+            docValuesByteCount += value.length;
+        }
+
+        /**
+         * Encodes the collection of binary doc values as a single contiguous binary array, wrapped in {@link BytesRef}. This array takes
+         * the form of [doc value count][length of value 1][value 1][length of value 2][value 2]...
+         */
+        @Override
+        public BytesRef binaryValue() {
+            int docValuesCount = uniqueValues.size();
+            // the + 1 is for the total doc values count, which is prefixed at the start of the array
+            int streamSize = docValuesByteCount + (docValuesCount + 1) * Integer.BYTES;
+
+            try (BytesStreamOutput out = new BytesStreamOutput(streamSize)) {
+                out.writeVInt(docValuesCount);
+                for (BytesRef value : uniqueValues) {
+                    int valueLength = value.length;
+                    out.writeVInt(valueLength);
+                    out.writeBytes(value.bytes, value.offset, valueLength);
+                }
+                return out.bytes().toBytesRef();
+            } catch (IOException e) {
+                throw new ElasticsearchException("Failed to get binary value", e);
+            }
+        }
+
+    }
 }

From 9d13387d794fc6e3aa53d7c6a962314a64916bd8 Mon Sep 17 00:00:00 2001
From: Dmitry Kubikov <dmitry.kubikov@elastic.co>
Date: Thu, 6 Nov 2025 12:16:03 -0800
Subject: [PATCH 2/6] Addressed feedback

---
 ...aryDocValuesSyntheticFieldLoaderLayer.java | 28 +++++-----
 .../index/mapper/KeywordFieldMapper.java      |  8 ++-
 ...eticSourceNativeArrayIntegrationTests.java | 12 +++-
 .../NativeArrayIntegrationTestCase.java       | 14 +++--
 .../wildcard/mapper/WildcardFieldMapper.java  | 56 +------------------
 5 files changed, 43 insertions(+), 75 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java b/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java
index bc25251fead6a..498bb7a3e6209 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java
@@ -24,24 +24,21 @@ public final class BinaryDocValuesSyntheticFieldLoaderLayer implements Composite
     // the binary doc values for a document are all encoded in a single binary array, which this stream knows how to read
     // the doc values in the array take the form of [doc value count][length of value 1][value 1][length of value 2][value 2]...
     private final ByteArrayStreamInput stream = new ByteArrayStreamInput();
-    private BytesRef docValuesBytes;
     private int valueCount;
 
     public BinaryDocValuesSyntheticFieldLoaderLayer(String fieldName) {
         this.fieldName = fieldName;
     }
 
-    @Override
-    public long valueCount() {
-        return valueCount;
-    }
-
     @Override
     public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException {
         BinaryDocValues docValues = leafReader.getBinaryDocValues(fieldName);
 
         // there are no values associated with this field
-        if (docValues == null) return null;
+        if (docValues == null) {
+            valueCount = 0;
+            return null;
+        }
 
         return docId -> {
             // there are no more documents to process
@@ -51,7 +48,7 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
             }
 
             // otherwise, extract the doc values into a stream to later read from
-            docValuesBytes = docValues.binaryValue();
+            BytesRef docValuesBytes = docValues.binaryValue();
             stream.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length);
             valueCount = stream.readVInt();
 
@@ -59,11 +56,6 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
         };
     }
 
-    @Override
-    public boolean hasValue() {
-        return valueCount > 0;
-    }
-
     @Override
     public void write(XContentBuilder b) throws IOException {
         for (int i = 0; i < valueCount; i++) {
@@ -73,6 +65,16 @@ public void write(XContentBuilder b) throws IOException {
         }
     }
 
+    @Override
+    public boolean hasValue() {
+        return valueCount > 0;
+    }
+
+    @Override
+    public long valueCount() {
+        return valueCount;
+    }
+
     @Override
     public String fieldName() {
         return fieldName;
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
index daa5894c289cb..901257e28f404 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
@@ -1355,9 +1355,11 @@ private static final class MultiValuedBinaryDocValuesField extends CustomDocValu
         }
 
         public void add(final BytesRef value) {
-            uniqueValues.add(value);
-            // might as well track these on the go as opposed to having to loop through all entries later
-            docValuesByteCount += value.length;
+            if (uniqueValues.add(value)) {
+                // might as well track these on the go as opposed to having to loop through all entries later
+                docValuesByteCount += value.length;
+            }
+            ;
         }
 
         /**
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordSyntheticSourceNativeArrayIntegrationTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordSyntheticSourceNativeArrayIntegrationTests.java
index 41e0c644ee20e..ec71e07fc9231 100644
--- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordSyntheticSourceNativeArrayIntegrationTests.java
+++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordSyntheticSourceNativeArrayIntegrationTests.java
@@ -52,6 +52,7 @@ public void testSynthesizeArrayIgnoreAbove() throws Exception {
             .endObject()
             .endObject()
             .endObject();
+
         // Note values that would be ignored are added at the end of arrays,
         // this makes testing easier as ignored values are always synthesized after regular values:
         var arrayValues = new Object[][] {
@@ -60,7 +61,16 @@ public void testSynthesizeArrayIgnoreAbove() throws Exception {
             new Object[] { "123", "1234", "12345" },
             new Object[] { null, null, null, "blabla" },
             new Object[] { "1", "2", "3", "blabla" } };
-        verifySyntheticArray(arrayValues, mapping, "_id", "field._original");
+
+        // values in the original array should be deduplicated
+        var expectedArrayValues = new Object[][] {
+            new Object[] { null, "a", "ab", "abc", "abcd", null, "abcde" },
+            new Object[] { "12345" },
+            new Object[] { "123", "1234", "12345" },
+            new Object[] { null, null, null, "blabla" },
+            new Object[] { "1", "2", "3", "blabla" } };
+
+        verifySyntheticArray(arrayValues, expectedArrayValues, mapping, "_id");
     }
 
     public void testSynthesizeObjectArray() throws Exception {
diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/NativeArrayIntegrationTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/NativeArrayIntegrationTestCase.java
index d1ab3c0907562..950626292d120 100644
--- a/test/framework/src/main/java/org/elasticsearch/index/mapper/NativeArrayIntegrationTestCase.java
+++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/NativeArrayIntegrationTestCase.java
@@ -259,11 +259,17 @@ protected void verifySyntheticArray(Object[][] arrays, XContentBuilder mapping,
     private XContentBuilder arrayToSource(Object[] array) throws IOException {
         var source = jsonBuilder().startObject();
         if (array != null) {
-            source.startArray("field");
-            for (Object arrayValue : array) {
-                source.value(arrayValue);
+            // collapse array if it only consists of one element
+            // if the only element is null, then we'll skip synthesizing source for that field
+            if (array.length == 1 && array[0] != null) {
+                source.field("field", array[0]);
+            } else {
+                source.startArray("field");
+                for (Object arrayValue : array) {
+                    source.value(arrayValue);
+                }
+                source.endArray();
             }
-            source.endArray();
         } else {
             source.field("field").nullValue();
         }
diff --git a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java
index ad28b0336d855..6dbf619e0c140 100644
--- a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java
+++ b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java
@@ -17,10 +17,8 @@
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.StoredField;
-import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexableField;
-import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanClause.Occur;
@@ -43,7 +41,6 @@
 import org.apache.lucene.util.automaton.RegExp;
 import org.elasticsearch.ElasticsearchParseException;
 import org.elasticsearch.common.geo.ShapeRelation;
-import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
 import org.elasticsearch.common.lucene.BytesRefs;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.time.DateMathParser;
@@ -58,6 +55,7 @@
 import org.elasticsearch.index.fielddata.FieldDataContext;
 import org.elasticsearch.index.fielddata.IndexFieldData;
 import org.elasticsearch.index.fielddata.plain.StringBinaryIndexFieldData;
+import org.elasticsearch.index.mapper.BinaryDocValuesSyntheticFieldLoaderLayer;
 import org.elasticsearch.index.mapper.BinaryFieldMapper.CustomBinaryDocValuesField;
 import org.elasticsearch.index.mapper.BlockLoader;
 import org.elasticsearch.index.mapper.CompositeSyntheticFieldLoader;
@@ -1106,7 +1104,7 @@ public FieldMapper.Builder getMergeBuilder() {
     protected SyntheticSourceSupport syntheticSourceSupport() {
         return new SyntheticSourceSupport.Native(() -> {
             var layers = new ArrayList<CompositeSyntheticFieldLoader.Layer>();
-            layers.add(new WildcardSyntheticFieldLoader());
+            layers.add(new BinaryDocValuesSyntheticFieldLoaderLayer(fullPath()));
             if (ignoreAbove.valuesPotentiallyIgnored()) {
                 layers.add(new CompositeSyntheticFieldLoader.StoredFieldLayer(originalName()) {
                     @Override
@@ -1120,54 +1118,4 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException {
         });
     }
 
-    private class WildcardSyntheticFieldLoader implements CompositeSyntheticFieldLoader.DocValuesLayer {
-        private final ByteArrayStreamInput docValuesStream = new ByteArrayStreamInput();
-        private int docValueCount;
-        private BytesRef docValueBytes;
-
-        @Override
-        public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException {
-            BinaryDocValues values = leafReader.getBinaryDocValues(fullPath());
-            if (values == null) {
-                docValueCount = 0;
-                return null;
-            }
-
-            return docId -> {
-                if (values.advanceExact(docId) == false) {
-                    docValueCount = 0;
-                    return hasValue();
-                }
-                docValueBytes = values.binaryValue();
-                docValuesStream.reset(docValueBytes.bytes);
-                docValuesStream.setPosition(docValueBytes.offset);
-                docValueCount = docValuesStream.readVInt();
-                return hasValue();
-            };
-        }
-
-        @Override
-        public boolean hasValue() {
-            return docValueCount > 0;
-        }
-
-        @Override
-        public long valueCount() {
-            return docValueCount;
-        }
-
-        @Override
-        public void write(XContentBuilder b) throws IOException {
-            for (int i = 0; i < docValueCount; i++) {
-                int length = docValuesStream.readVInt();
-                b.utf8Value(docValueBytes.bytes, docValuesStream.getPosition(), length);
-                docValuesStream.skipBytes(length);
-            }
-        }
-
-        @Override
-        public String fieldName() {
-            return fullPath();
-        }
-    }
 }

From c2ee79e55667c259ff6d9591cf21929599003a11 Mon Sep 17 00:00:00 2001
From: Dmitry Kubikov <dmitry.kubikov@elastic.co>
Date: Fri, 7 Nov 2025 16:02:47 -0800
Subject: [PATCH 3/6] Moved ignore values doc value field fetcher inside of
 existing fetcher function

---
 .../extras/MatchOnlyTextFieldMapper.java      | 128 ++++++++++--------
 ...aryDocValuesSyntheticFieldLoaderLayer.java |  21 ++-
 .../index/mapper/KeywordFieldMapper.java      |   1 -
 3 files changed, 86 insertions(+), 64 deletions(-)

diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java
index ab7571e91769b..7dc66fe31980b 100644
--- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java
+++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java
@@ -14,6 +14,7 @@
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.StoredField;
+import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.LeafReaderContext;
@@ -28,10 +29,10 @@
 import org.apache.lucene.search.PrefixQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOFunction;
 import org.elasticsearch.common.CheckedIntFunction;
-import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.text.UTF8DecodingReader;
 import org.elasticsearch.common.unit.Fuzziness;
@@ -39,8 +40,10 @@
 import org.elasticsearch.index.IndexVersions;
 import org.elasticsearch.index.analysis.IndexAnalyzers;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
+import org.elasticsearch.index.fielddata.AbstractBinaryDocValues;
 import org.elasticsearch.index.fielddata.FieldDataContext;
 import org.elasticsearch.index.fielddata.IndexFieldData;
+import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
 import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
 import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData;
 import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
@@ -299,18 +302,11 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
 
             if (parent instanceof KeywordFieldMapper.KeywordFieldType keywordParent
                 && keywordParent.ignoreAbove().valuesPotentiallyIgnored()) {
+                var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH);
                 if (parent.isStored()) {
-                    // if the parent keyword field has ignore_above set, then any ignored values will be stored under a fallback field
-                    return combineFieldFetchers(
-                        storedFieldFetcher(parentFieldName),
-                        binaryDocValuesFieldFetcher(keywordParent.syntheticSourceFallbackFieldName())
-                    );
+                    return combineFieldFetchers(storedFieldFetcher(parentFieldName), docValuesFieldFetcher(ifd));
                 } else if (parent.hasDocValues()) {
-                    var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH);
-                    return combineFieldFetchers(
-                        docValuesFieldFetcher(ifd),
-                        binaryDocValuesFieldFetcher(keywordParent.syntheticSourceFallbackFieldName())
-                    );
+                    return docValuesFieldFetcher(ifd);
                 }
             }
 
@@ -357,57 +353,29 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
             }
         }
 
-        private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> docValuesFieldFetcher(
-            IndexFieldData<?> ifd
-        ) {
+        private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> docValuesFieldFetcher(IndexFieldData<?> ifd) {
             return context -> {
-                var sortedBinaryDocValues = ifd.load(context).getBytesValues();
-                return docId -> {
-                    if (sortedBinaryDocValues.advanceExact(docId)) {
-                        var values = new ArrayList<>(sortedBinaryDocValues.docValueCount());
-                        for (int i = 0; i < sortedBinaryDocValues.docValueCount(); i++) {
-                            values.add(sortedBinaryDocValues.nextValue().utf8ToString());
-                        }
-                        return values;
-                    } else {
-                        return List.of();
-                    }
-                };
-            };
-        }
+                SortedBinaryDocValues indexedValuesDocValues = ifd.load(context).getBytesValues();
+                CustomBinaryDocValues ignoredValuesDocValues = new CustomBinaryDocValues(
+                    DocValues.getBinary(context.reader(), ifd.getFieldName() + TextFamilyFieldType.FALLBACK_FIELD_NAME_SUFFIX)
+                );
 
-        /**
-         * Used exclusively to load ignored values from binary doc values. These values are stored in a separate fallback field in order to
-         * retain the original value and hence be able to support synthetic source.
-         */
-        private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> binaryDocValuesFieldFetcher(
-            String fieldName
-        ) {
-            return context -> {
-                var binaryDocValues = DocValues.getBinary(context.reader(), fieldName);
                 return docId -> {
-                    if (binaryDocValues == null || binaryDocValues.advanceExact(docId) == false) {
-                        return List.of();
-                    }
-
-                    // see KeywordFieldMapper.MultiValuedBinaryDocValuesField for context on how to decode these binary doc values back into
-                    // strings
-                    BytesRef docValuesBytes = binaryDocValues.binaryValue();
-
-                    try (ByteArrayStreamInput stream = new ByteArrayStreamInput()) {
-                        stream.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length);
+                    int indexedValueCount = indexedValuesDocValues.advanceExact(docId) ? indexedValuesDocValues.docValueCount() : 0;
+                    int ignoredValueCount = ignoredValuesDocValues.advanceExact(docId) ? ignoredValuesDocValues.docValueCount() : 0;
+                    var values = new ArrayList<>(indexedValueCount + ignoredValueCount);
 
-                        int docValueCount = stream.readVInt();
-                        var values = new ArrayList<>(docValueCount);
-
-                        for (int i = 0; i < docValueCount; i++) {
-                            // this function already knows how to decode the underlying bytes array, so no need to explicitly call VInt()
-                            BytesRef valueBytes = stream.readBytesRef();
-                            values.add(valueBytes.utf8ToString());
-                        }
+                    // extract indexed values from doc values
+                    for (int i = 0; i < indexedValueCount; i++) {
+                        values.add(indexedValuesDocValues.nextValue().utf8ToString());
+                    }
 
-                        return values;
+                    // extract ignored values from doc values
+                    for (int i = 0; i < ignoredValueCount; i++) {
+                        values.add(ignoredValuesDocValues.nextValue().utf8ToString());
                     }
+
+                    return values;
                 };
             };
         }
@@ -817,4 +785,52 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException {
 
         return fieldLoader;
     }
+
+    private static class CustomBinaryDocValues extends AbstractBinaryDocValues {
+
+        private final BinaryDocValues binaryDocValues;
+
+        private ByteArrayDataInput data;
+        private int docValueCount = 0;
+
+        CustomBinaryDocValues(BinaryDocValues binaryDocValues) {
+            this.binaryDocValues = binaryDocValues;
+        }
+
+        public BytesRef nextValue() {
+            // get the length of the value
+            int length = data.readVInt();
+
+            // read that many bytes from the underlying bytes array
+            // the read will automatically move the offset to the next value
+            byte[] valueBytes = new byte[length];
+            data.readBytes(valueBytes, 0, length);
+
+            return new BytesRef(valueBytes);
+        }
+
+        @Override
+        public BytesRef binaryValue() throws IOException {
+            return binaryDocValues.binaryValue();
+        }
+
+        @Override
+        public boolean advanceExact(int docId) throws IOException {
+            // if document has a value, read underlying bytes
+            if (binaryDocValues.advanceExact(docId)) {
+                BytesRef docValuesBytes = binaryDocValues.binaryValue();
+                data = new ByteArrayDataInput(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length);
+                docValueCount = data.readVInt();
+                return true;
+            }
+
+            // otherwise there is nothing to do
+            docValueCount = 0;
+            return false;
+        }
+
+        public int docValueCount() {
+            return docValueCount;
+        }
+    }
 }
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java b/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java
index 498bb7a3e6209..157c0f578d6a0 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java
@@ -11,8 +11,8 @@
 
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.util.BytesRef;
-import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
 import org.elasticsearch.xcontent.XContentBuilder;
 
 import java.io.IOException;
@@ -23,7 +23,7 @@ public final class BinaryDocValuesSyntheticFieldLoaderLayer implements Composite
 
     // the binary doc values for a document are all encoded in a single binary array, which this stream knows how to read
     // the doc values in the array take the form of [doc value count][length of value 1][value 1][length of value 2][value 2]...
-    private final ByteArrayStreamInput stream = new ByteArrayStreamInput();
+    private final ByteArrayDataInput data = new ByteArrayDataInput();
     private int valueCount;
 
     public BinaryDocValuesSyntheticFieldLoaderLayer(String fieldName) {
@@ -49,8 +49,8 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
 
             // otherwise, extract the doc values into a stream to later read from
             BytesRef docValuesBytes = docValues.binaryValue();
-            stream.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length);
-            valueCount = stream.readVInt();
+            data.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length);
+            valueCount = data.readVInt();
 
             return hasValue();
         };
@@ -59,9 +59,16 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
     @Override
     public void write(XContentBuilder b) throws IOException {
         for (int i = 0; i < valueCount; i++) {
-            // this function already knows how to decode the underlying bytes array, so no need to explicitly call VInt()
-            BytesRef valueBytes = stream.readBytesRef();
-            b.value(valueBytes.utf8ToString());
+            // read the length of the value
+            int length = data.readVInt();
+
+            // read that many bytes from the input
+            // the read will automatically move the offset to the next value
+            byte[] valueBytes = new byte[length];
+            data.readBytes(valueBytes, 0, length);
+
+            // finally, write those bytes into XContentBuilder
+            b.value(new BytesRef(valueBytes).utf8ToString());
         }
     }
 
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
index 901257e28f404..c61432bb08f42 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
@@ -1359,7 +1359,6 @@ public void add(final BytesRef value) {
                 // might as well track these on the go as opposed to having to loop through all entries later
                 docValuesByteCount += value.length;
             }
-            ;
         }
 
         /**

From 9bac251f181a3361f6232623dd74b0639c16fb52 Mon Sep 17 00:00:00 2001
From: Dmitry Kubikov <dmitry.kubikov@elastic.co>
Date: Fri, 7 Nov 2025 16:02:47 -0800
Subject: [PATCH 4/6] Moved ignore values doc value field fetcher inside of
 existing fetcher function

---
 .../index/mapper/extras/MatchOnlyTextFieldMapper.java         | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java
index 7dc66fe31980b..e13e8c96c2b5e 100644
--- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java
+++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java
@@ -786,6 +786,10 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException {
         return fieldLoader;
     }
 
+
+    /**
+     * A wrapper around {@link BinaryDocValues} that exposes some quality of life functions.
+     */
     private static class CustomBinaryDocValues extends AbstractBinaryDocValues {
 
         private final BinaryDocValues binaryDocValues;

From ab289467a81228900ecc4eb5b317c293c61af966 Mon Sep 17 00:00:00 2001
From: Dmitry Kubikov <dmitry.kubikov@elastic.co>
Date: Fri, 7 Nov 2025 16:02:47 -0800
Subject: [PATCH 5/6] Moved ignore values doc value field fetcher inside of
 existing fetcher function

---
 .../extras/MatchOnlyTextFieldMapper.java      | 23 +++++++------------
 ...aryDocValuesSyntheticFieldLoaderLayer.java | 22 +++++++-----------
 2 files changed, 16 insertions(+), 29 deletions(-)

diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java
index e13e8c96c2b5e..04b6b66dd7d83 100644
--- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java
+++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java
@@ -29,10 +29,10 @@
 import org.apache.lucene.search.PrefixQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOFunction;
 import org.elasticsearch.common.CheckedIntFunction;
+import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.text.UTF8DecodingReader;
 import org.elasticsearch.common.unit.Fuzziness;
@@ -786,31 +786,24 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException {
         return fieldLoader;
     }
 
-
     /**
      * A wrapper around {@link BinaryDocValues} that exposes some quality of life functions.
      */
     private static class CustomBinaryDocValues extends AbstractBinaryDocValues {
 
         private final BinaryDocValues binaryDocValues;
+        private final ByteArrayStreamInput stream;
 
-        private ByteArrayDataInput data;
         private int docValueCount = 0;
 
         CustomBinaryDocValues(BinaryDocValues binaryDocValues) {
             this.binaryDocValues = binaryDocValues;
+            this.stream = new ByteArrayStreamInput();
         }
 
-        public BytesRef nextValue() {
-            // get the length of the value
-            int length = data.readVInt();
-
-            // read that many bytes from the underlying bytes array
-            // the read will automatically move the offset to the next value
-            byte[] valueBytes = new byte[length];
-            data.readBytes(valueBytes, 0, length);
-
-            return new BytesRef(valueBytes);
+        public BytesRef nextValue() throws IOException {
+            // this function already knows how to decode the underlying bytes array, so no need to explicitly call VInt()
+            return stream.readBytesRef();
         }
 
         @Override
@@ -823,8 +816,8 @@ public boolean advanceExact(int docId) throws IOException {
             // if document has a value, read underlying bytes
             if (binaryDocValues.advanceExact(docId)) {
                 BytesRef docValuesBytes = binaryDocValues.binaryValue();
-                data = new ByteArrayDataInput(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length);
-                docValueCount = data.readVInt();
+                stream.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length);
+                docValueCount = stream.readVInt();
                 return true;
             }
 
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java b/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java
index 157c0f578d6a0..1f0c0be1f9555 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java
@@ -11,8 +11,8 @@
 
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.LeafReader;
-import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
 import org.elasticsearch.xcontent.XContentBuilder;
 
 import java.io.IOException;
@@ -23,11 +23,12 @@ public final class BinaryDocValuesSyntheticFieldLoaderLayer implements Composite
 
     // the binary doc values for a document are all encoded in a single binary array, which this stream knows how to read
     // the doc values in the array take the form of [doc value count][length of value 1][value 1][length of value 2][value 2]...
-    private final ByteArrayDataInput data = new ByteArrayDataInput();
+    private final ByteArrayStreamInput stream;
     private int valueCount;
 
     public BinaryDocValuesSyntheticFieldLoaderLayer(String fieldName) {
         this.fieldName = fieldName;
+        this.stream = new ByteArrayStreamInput();
     }
 
     @Override
@@ -49,8 +50,8 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
 
             // otherwise, extract the doc values into a stream to later read from
             BytesRef docValuesBytes = docValues.binaryValue();
-            data.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length);
-            valueCount = data.readVInt();
+            stream.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length);
+            valueCount = stream.readVInt();
 
             return hasValue();
         };
@@ -59,16 +60,9 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
     @Override
     public void write(XContentBuilder b) throws IOException {
         for (int i = 0; i < valueCount; i++) {
-            // read the length of the value
-            int length = data.readVInt();
-
-            // read that many bytes from the input
-            // the read will automatically move the offset to the next value
-            byte[] valueBytes = new byte[length];
-            data.readBytes(valueBytes, 0, length);
-
-            // finally, write those bytes into XContentBuilder
-            b.value(new BytesRef(valueBytes).utf8ToString());
+            // this function already knows how to decode the underlying bytes array, so no need to explicitly call VInt()
+            BytesRef valueBytes = stream.readBytesRef();
+            b.value(valueBytes.utf8ToString());
         }
     }
 

From a1d1d2a35e51c06da55d418ab92d63e481d309a9 Mon Sep 17 00:00:00 2001
From: Dmitry Kubikov <dmitry.kubikov@elastic.co>
Date: Fri, 7 Nov 2025 16:02:47 -0800
Subject: [PATCH 6/6] Moved ignore values doc value field fetcher inside of
 existing fetcher function

---
 .../extras/MatchOnlyTextFieldMapper.java      | 65 ++++++++++---------
 1 file changed, 34 insertions(+), 31 deletions(-)

diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java
index 04b6b66dd7d83..6c98720d56d45 100644
--- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java
+++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java
@@ -40,7 +40,6 @@
 import org.elasticsearch.index.IndexVersions;
 import org.elasticsearch.index.analysis.IndexAnalyzers;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
-import org.elasticsearch.index.fielddata.AbstractBinaryDocValues;
 import org.elasticsearch.index.fielddata.FieldDataContext;
 import org.elasticsearch.index.fielddata.IndexFieldData;
 import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
@@ -302,11 +301,17 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
 
             if (parent instanceof KeywordFieldMapper.KeywordFieldType keywordParent
                 && keywordParent.ignoreAbove().valuesPotentiallyIgnored()) {
-                var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH);
                 if (parent.isStored()) {
-                    return combineFieldFetchers(storedFieldFetcher(parentFieldName), docValuesFieldFetcher(ifd));
+                    return combineFieldFetchers(
+                        storedFieldFetcher(parentFieldName),
+                        ignoredValuesDocValuesFieldFetcher(keywordParent.syntheticSourceFallbackFieldName())
+                    );
                 } else if (parent.hasDocValues()) {
-                    return docValuesFieldFetcher(ifd);
+                    var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH);
+                    return combineFieldFetchers(
+                        docValuesFieldFetcher(ifd),
+                        ignoredValuesDocValuesFieldFetcher(keywordParent.syntheticSourceFallbackFieldName())
+                    );
                 }
             }
 
@@ -356,30 +361,31 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
         private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> docValuesFieldFetcher(IndexFieldData<?> ifd) {
             return context -> {
                 SortedBinaryDocValues indexedValuesDocValues = ifd.load(context).getBytesValues();
-                CustomBinaryDocValues ignoredValuesDocValues = new CustomBinaryDocValues(
-                    DocValues.getBinary(context.reader(), ifd.getFieldName() + TextFamilyFieldType.FALLBACK_FIELD_NAME_SUFFIX)
-                );
-
-                return docId -> {
-                    int indexedValueCount = indexedValuesDocValues.advanceExact(docId) ? indexedValuesDocValues.docValueCount() : 0;
-                    int ignoredValueCount = ignoredValuesDocValues.advanceExact(docId) ? ignoredValuesDocValues.docValueCount() : 0;
-                    var values = new ArrayList<>(indexedValueCount + ignoredValueCount);
-
-                    // extract indexed values from doc values
-                    for (int i = 0; i < indexedValueCount; i++) {
-                        values.add(indexedValuesDocValues.nextValue().utf8ToString());
-                    }
-
-                    // extract ignored values from doc values
-                    for (int i = 0; i < ignoredValueCount; i++) {
-                        values.add(ignoredValuesDocValues.nextValue().utf8ToString());
-                    }
+                return docId -> getValuesFromDocValues(indexedValuesDocValues, docId);
+            };
+        }
 
-                    return values;
-                };
+        private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> ignoredValuesDocValuesFieldFetcher(
+            String fieldName
+        ) {
+            return context -> {
+                CustomBinaryDocValues ignoredValuesDocValues = new CustomBinaryDocValues(DocValues.getBinary(context.reader(), fieldName));
+                return docId -> getValuesFromDocValues(ignoredValuesDocValues, docId);
             };
         }
 
+        private List<Object> getValuesFromDocValues(SortedBinaryDocValues docValues, int docId) throws IOException {
+            if (docValues.advanceExact(docId)) {
+                var values = new ArrayList<>(docValues.docValueCount());
+                for (int i = 0; i < docValues.docValueCount(); i++) {
+                    values.add(docValues.nextValue().utf8ToString());
+                }
+                return values;
+            } else {
+                return List.of();
+            }
+        }
+
         private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> storedFieldFetcher(String... names) {
             var loader = StoredFieldLoader.create(false, Set.of(names));
             return context -> {
@@ -787,9 +793,9 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException {
     }
 
     /**
-     * A wrapper around {@link BinaryDocValues} that exposes some quality of life functions.
+     * A wrapper around {@link BinaryDocValues} that exposes some quality of life functions. Note, these values are not sorted.
      */
-    private static class CustomBinaryDocValues extends AbstractBinaryDocValues {
+    private static class CustomBinaryDocValues extends SortedBinaryDocValues {
 
         private final BinaryDocValues binaryDocValues;
         private final ByteArrayStreamInput stream;
@@ -801,16 +807,12 @@ private static class CustomBinaryDocValues extends AbstractBinaryDocValues {
             this.stream = new ByteArrayStreamInput();
         }
 
+        @Override
         public BytesRef nextValue() throws IOException {
             // this function already knows how to decode the underlying bytes array, so no need to explicitly call VInt()
             return stream.readBytesRef();
         }
 
-        @Override
-        public BytesRef binaryValue() throws IOException {
-            return binaryDocValues.binaryValue();
-        }
-
         @Override
         public boolean advanceExact(int docId) throws IOException {
             // if document has a value, read underlying bytes
@@ -826,6 +828,7 @@ public boolean advanceExact(int docId) throws IOException {
             return false;
         }
 
+        @Override
         public int docValueCount() {
             return docValueCount;
         }