From 5a4823256984f4a627e3cac6cc5a31bcbdb57754 Mon Sep 17 00:00:00 2001 From: Dmitry Kubikov Date: Fri, 31 Oct 2025 15:00:58 -0700 Subject: [PATCH 1/6] Store keyword fields that trip ignore_above in binary doc values --- .../extras/MatchOnlyTextFieldMapper.java | 66 +++++++++++---- .../test/match_only_text/10_basic.yml | 4 +- ...aryDocValuesSyntheticFieldLoaderLayer.java | 81 +++++++++++++++++++ .../index/mapper/KeywordFieldMapper.java | 66 +++++++++++++-- 4 files changed, 193 insertions(+), 24 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index d54764d803101..ab7571e91769b 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -14,6 +14,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; @@ -30,6 +31,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOFunction; import org.elasticsearch.common.CheckedIntFunction; +import org.elasticsearch.common.io.stream.ByteArrayStreamInput; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.text.UTF8DecodingReader; import org.elasticsearch.common.unit.Fuzziness; @@ -297,12 +299,18 @@ private IOFunction, IOExcepti if (parent instanceof KeywordFieldMapper.KeywordFieldType keywordParent && keywordParent.ignoreAbove().valuesPotentiallyIgnored()) { - final String parentFallbackFieldName = keywordParent.syntheticSourceFallbackFieldName(); if (parent.isStored()) { - return storedFieldFetcher(parentFieldName, parentFallbackFieldName); + // if the parent keyword field has ignore_above set, then any ignored values will be stored under a fallback field + return combineFieldFetchers( + storedFieldFetcher(parentFieldName), + binaryDocValuesFieldFetcher(keywordParent.syntheticSourceFallbackFieldName()) + ); } else if (parent.hasDocValues()) { var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH); - return combineFieldFetchers(docValuesFieldFetcher(ifd), storedFieldFetcher(parentFallbackFieldName)); + return combineFieldFetchers( + docValuesFieldFetcher(ifd), + binaryDocValuesFieldFetcher(keywordParent.syntheticSourceFallbackFieldName()) + ); } } @@ -325,22 +333,16 @@ private IOFunction, IOExcepti final KeywordFieldMapper.KeywordFieldType keywordDelegate ) { if (keywordDelegate.ignoreAbove().valuesPotentiallyIgnored()) { - // because we don't know whether the delegate field will be ignored during parsing, we must also check the current field - String fieldName = name(); - String fallbackName = syntheticSourceFallbackFieldName(); - - // delegate field names String delegateFieldName = keywordDelegate.name(); - String delegateFieldFallbackName = keywordDelegate.syntheticSourceFallbackFieldName(); + // bc we don't know whether the delegate will ignore a value, we must also check the fallback field created by this + // match_only_text field + String fallbackName = syntheticSourceFallbackFieldName(); if (keywordDelegate.isStored()) { - return storedFieldFetcher(delegateFieldName, delegateFieldFallbackName, fieldName, fallbackName); + return storedFieldFetcher(delegateFieldName, fallbackName); } else if (keywordDelegate.hasDocValues()) { var ifd = searchExecutionContext.getForField(keywordDelegate, MappedFieldType.FielddataOperation.SEARCH); - return combineFieldFetchers( - docValuesFieldFetcher(ifd), - storedFieldFetcher(delegateFieldFallbackName, fieldName, fallbackName) - ); + return combineFieldFetchers(docValuesFieldFetcher(ifd), storedFieldFetcher(fallbackName)); } } @@ -374,6 +376,42 @@ private static IOFunction, IO }; } + /** + * Used exclusively to load ignored values from binary doc values. These values are stored in a separate fallback field in order to + * retain the original value and hence be able to support synthetic source. + */ + private static IOFunction, IOException>> binaryDocValuesFieldFetcher( + String fieldName + ) { + return context -> { + var binaryDocValues = DocValues.getBinary(context.reader(), fieldName); + return docId -> { + if (binaryDocValues == null || binaryDocValues.advanceExact(docId) == false) { + return List.of(); + } + + // see KeywordFieldMapper.MultiValuedBinaryDocValuesField for context on how to decode these binary doc values back into + // strings + BytesRef docValuesBytes = binaryDocValues.binaryValue(); + + try (ByteArrayStreamInput stream = new ByteArrayStreamInput()) { + stream.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length); + + int docValueCount = stream.readVInt(); + var values = new ArrayList<>(docValueCount); + + for (int i = 0; i < docValueCount; i++) { + // this function already knows how to decode the underlying bytes array, so no need to explicitly call VInt() + BytesRef valueBytes = stream.readBytesRef(); + values.add(valueBytes.utf8ToString()); + } + + return values; + } + }; + }; + } + private static IOFunction, IOException>> storedFieldFetcher(String... names) { var loader = StoredFieldLoader.create(false, Set.of(names)); return context -> { diff --git a/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml index 0050618beeb67..581841df3fe52 100644 --- a/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml +++ b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml @@ -465,7 +465,7 @@ synthetic_source match_only_text as multi-field with ignored keyword as parent: id: "1" refresh: true body: - foo: [ "Apache Lucene powers Elasticsearch", "Apache" ] + foo: [ "Apache Lucene powers Elasticsearch", "Apache", "Apache Lucene" ] - do: search: @@ -477,7 +477,7 @@ synthetic_source match_only_text as multi-field with ignored keyword as parent: - match: { "hits.total.value": 1 } - match: - hits.hits.0._source.foo: [ "Apache", "Apache Lucene powers Elasticsearch" ] + hits.hits.0._source.foo: [ "Apache", "Apache Lucene powers Elasticsearch", "Apache Lucene" ] --- synthetic_source match_only_text as multi-field with stored keyword as parent: diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java b/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java new file mode 100644 index 0000000000000..bc25251fead6a --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java @@ -0,0 +1,81 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.io.stream.ByteArrayStreamInput; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; + +public final class BinaryDocValuesSyntheticFieldLoaderLayer implements CompositeSyntheticFieldLoader.DocValuesLayer { + + private final String fieldName; + + // the binary doc values for a document are all encoded in a single binary array, which this stream knows how to read + // the doc values in the array take the form of [doc value count][length of value 1][value 1][length of value 2][value 2]... + private final ByteArrayStreamInput stream = new ByteArrayStreamInput(); + private BytesRef docValuesBytes; + private int valueCount; + + public BinaryDocValuesSyntheticFieldLoaderLayer(String fieldName) { + this.fieldName = fieldName; + } + + @Override + public long valueCount() { + return valueCount; + } + + @Override + public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException { + BinaryDocValues docValues = leafReader.getBinaryDocValues(fieldName); + + // there are no values associated with this field + if (docValues == null) return null; + + return docId -> { + // there are no more documents to process + if (docValues.advanceExact(docId) == false) { + valueCount = 0; + return false; + } + + // otherwise, extract the doc values into a stream to later read from + docValuesBytes = docValues.binaryValue(); + stream.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length); + valueCount = stream.readVInt(); + + return hasValue(); + }; + } + + @Override + public boolean hasValue() { + return valueCount > 0; + } + + @Override + public void write(XContentBuilder b) throws IOException { + for (int i = 0; i < valueCount; i++) { + // this function already knows how to decode the underlying bytes array, so no need to explicitly call VInt() + BytesRef valueBytes = stream.readBytesRef(); + b.value(valueBytes.utf8ToString()); + } + } + + @Override + public String fieldName() { + return fieldName; + } + +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index 773c42d584d4f..daa5894c289cb 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -40,6 +40,8 @@ import org.apache.lucene.util.automaton.CompiledAutomaton; import org.apache.lucene.util.automaton.CompiledAutomaton.AUTOMATON_TYPE; import org.apache.lucene.util.automaton.Operations; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.search.AutomatonQueries; @@ -85,6 +87,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.LinkedHashSet; import java.util.List; import java.util.Locale; import java.util.Map; @@ -1160,7 +1163,14 @@ private boolean indexValue(DocumentParserContext context, XContentString value) var utfBytes = value.bytes(); var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length()); final String fieldName = fieldType().syntheticSourceFallbackFieldName(); - context.doc().add(new StoredField(fieldName, bytesRef)); + + // store the value in a binary doc values field, create one if it doesn't exist + MultiValuedBinaryDocValuesField field = (MultiValuedBinaryDocValuesField) context.doc().getByKey(fieldName); + if (field == null) { + field = new MultiValuedBinaryDocValuesField(fieldName); + context.doc().addWithKey(fieldName, field); + } + field.add(bytesRef); } return false; @@ -1323,15 +1333,55 @@ protected BytesRef preserve(BytesRef value) { // extra copy of the field for supporting synthetic source. This layer will check that copy. if (fieldType().ignoreAbove.valuesPotentiallyIgnored()) { final String fieldName = fieldType().syntheticSourceFallbackFieldName(); - layers.add(new CompositeSyntheticFieldLoader.StoredFieldLayer(fieldName) { - @Override - protected void writeValue(Object value, XContentBuilder b) throws IOException { - BytesRef ref = (BytesRef) value; - b.utf8Value(ref.bytes, ref.offset, ref.length); - } - }); + layers.add(new BinaryDocValuesSyntheticFieldLoaderLayer(fieldName)); } return new CompositeSyntheticFieldLoader(leafFieldName, fullFieldName, layers); } + + /** + * A custom implementation of {@link org.apache.lucene.index.BinaryDocValues} that uses a {@link Set} to maintain a collection of unique + * binary doc values for fields with multiple values per document. + */ + private static final class MultiValuedBinaryDocValuesField extends CustomDocValuesField { + + private final Set uniqueValues; + private int docValuesByteCount = 0; + + MultiValuedBinaryDocValuesField(String name) { + super(name); + // linked hash set to maintain insertion order of elements + uniqueValues = new LinkedHashSet<>(); + } + + public void add(final BytesRef value) { + uniqueValues.add(value); + // might as well track these on the go as opposed to having to loop through all entries later + docValuesByteCount += value.length; + } + + /** + * Encodes the collection of binary doc values as a single contiguous binary array, wrapped in {@link BytesRef}. This array takes + * the form of [doc value count][length of value 1][value 1][length of value 2][value 2]... + */ + @Override + public BytesRef binaryValue() { + int docValuesCount = uniqueValues.size(); + // the + 1 is for the total doc values count, which is prefixed at the start of the array + int streamSize = docValuesByteCount + (docValuesCount + 1) * Integer.BYTES; + + try (BytesStreamOutput out = new BytesStreamOutput(streamSize)) { + out.writeVInt(docValuesCount); + for (BytesRef value : uniqueValues) { + int valueLength = value.length; + out.writeVInt(valueLength); + out.writeBytes(value.bytes, value.offset, valueLength); + } + return out.bytes().toBytesRef(); + } catch (IOException e) { + throw new ElasticsearchException("Failed to get binary value", e); + } + } + + } } From 9d13387d794fc6e3aa53d7c6a962314a64916bd8 Mon Sep 17 00:00:00 2001 From: Dmitry Kubikov Date: Thu, 6 Nov 2025 12:16:03 -0800 Subject: [PATCH 2/6] Addressed feedback --- ...aryDocValuesSyntheticFieldLoaderLayer.java | 28 +++++----- .../index/mapper/KeywordFieldMapper.java | 8 ++- ...eticSourceNativeArrayIntegrationTests.java | 12 +++- .../NativeArrayIntegrationTestCase.java | 14 +++-- .../wildcard/mapper/WildcardFieldMapper.java | 56 +------------------ 5 files changed, 43 insertions(+), 75 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java b/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java index bc25251fead6a..498bb7a3e6209 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java @@ -24,24 +24,21 @@ public final class BinaryDocValuesSyntheticFieldLoaderLayer implements Composite // the binary doc values for a document are all encoded in a single binary array, which this stream knows how to read // the doc values in the array take the form of [doc value count][length of value 1][value 1][length of value 2][value 2]... private final ByteArrayStreamInput stream = new ByteArrayStreamInput(); - private BytesRef docValuesBytes; private int valueCount; public BinaryDocValuesSyntheticFieldLoaderLayer(String fieldName) { this.fieldName = fieldName; } - @Override - public long valueCount() { - return valueCount; - } - @Override public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException { BinaryDocValues docValues = leafReader.getBinaryDocValues(fieldName); // there are no values associated with this field - if (docValues == null) return null; + if (docValues == null) { + valueCount = 0; + return null; + } return docId -> { // there are no more documents to process @@ -51,7 +48,7 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf } // otherwise, extract the doc values into a stream to later read from - docValuesBytes = docValues.binaryValue(); + BytesRef docValuesBytes = docValues.binaryValue(); stream.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length); valueCount = stream.readVInt(); @@ -59,11 +56,6 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf }; } - @Override - public boolean hasValue() { - return valueCount > 0; - } - @Override public void write(XContentBuilder b) throws IOException { for (int i = 0; i < valueCount; i++) { @@ -73,6 +65,16 @@ public void write(XContentBuilder b) throws IOException { } } + @Override + public boolean hasValue() { + return valueCount > 0; + } + + @Override + public long valueCount() { + return valueCount; + } + @Override public String fieldName() { return fieldName; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index daa5894c289cb..901257e28f404 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -1355,9 +1355,11 @@ private static final class MultiValuedBinaryDocValuesField extends CustomDocValu } public void add(final BytesRef value) { - uniqueValues.add(value); - // might as well track these on the go as opposed to having to loop through all entries later - docValuesByteCount += value.length; + if (uniqueValues.add(value)) { + // might as well track these on the go as opposed to having to loop through all entries later + docValuesByteCount += value.length; + } + ; } /** diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordSyntheticSourceNativeArrayIntegrationTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordSyntheticSourceNativeArrayIntegrationTests.java index 41e0c644ee20e..ec71e07fc9231 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordSyntheticSourceNativeArrayIntegrationTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordSyntheticSourceNativeArrayIntegrationTests.java @@ -52,6 +52,7 @@ public void testSynthesizeArrayIgnoreAbove() throws Exception { .endObject() .endObject() .endObject(); + // Note values that would be ignored are added at the end of arrays, // this makes testing easier as ignored values are always synthesized after regular values: var arrayValues = new Object[][] { @@ -60,7 +61,16 @@ public void testSynthesizeArrayIgnoreAbove() throws Exception { new Object[] { "123", "1234", "12345" }, new Object[] { null, null, null, "blabla" }, new Object[] { "1", "2", "3", "blabla" } }; - verifySyntheticArray(arrayValues, mapping, "_id", "field._original"); + + // values in the original array should be deduplicated + var expectedArrayValues = new Object[][] { + new Object[] { null, "a", "ab", "abc", "abcd", null, "abcde" }, + new Object[] { "12345" }, + new Object[] { "123", "1234", "12345" }, + new Object[] { null, null, null, "blabla" }, + new Object[] { "1", "2", "3", "blabla" } }; + + verifySyntheticArray(arrayValues, expectedArrayValues, mapping, "_id"); } public void testSynthesizeObjectArray() throws Exception { diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/NativeArrayIntegrationTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/NativeArrayIntegrationTestCase.java index d1ab3c0907562..950626292d120 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/NativeArrayIntegrationTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/NativeArrayIntegrationTestCase.java @@ -259,11 +259,17 @@ protected void verifySyntheticArray(Object[][] arrays, XContentBuilder mapping, private XContentBuilder arrayToSource(Object[] array) throws IOException { var source = jsonBuilder().startObject(); if (array != null) { - source.startArray("field"); - for (Object arrayValue : array) { - source.value(arrayValue); + // collapse array if it only consists of one element + // if the only element is null, then we'll skip synthesizing source for that field + if (array.length == 1 && array[0] != null) { + source.field("field", array[0]); + } else { + source.startArray("field"); + for (Object arrayValue : array) { + source.value(arrayValue); + } + source.endArray(); } - source.endArray(); } else { source.field("field").nullValue(); } diff --git a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java index ad28b0336d855..6dbf619e0c140 100644 --- a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java +++ b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java @@ -17,10 +17,8 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.StoredField; -import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; -import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause.Occur; @@ -43,7 +41,6 @@ import org.apache.lucene.util.automaton.RegExp; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.common.geo.ShapeRelation; -import org.elasticsearch.common.io.stream.ByteArrayStreamInput; import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.time.DateMathParser; @@ -58,6 +55,7 @@ import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.plain.StringBinaryIndexFieldData; +import org.elasticsearch.index.mapper.BinaryDocValuesSyntheticFieldLoaderLayer; import org.elasticsearch.index.mapper.BinaryFieldMapper.CustomBinaryDocValuesField; import org.elasticsearch.index.mapper.BlockLoader; import org.elasticsearch.index.mapper.CompositeSyntheticFieldLoader; @@ -1106,7 +1104,7 @@ public FieldMapper.Builder getMergeBuilder() { protected SyntheticSourceSupport syntheticSourceSupport() { return new SyntheticSourceSupport.Native(() -> { var layers = new ArrayList(); - layers.add(new WildcardSyntheticFieldLoader()); + layers.add(new BinaryDocValuesSyntheticFieldLoaderLayer(fullPath())); if (ignoreAbove.valuesPotentiallyIgnored()) { layers.add(new CompositeSyntheticFieldLoader.StoredFieldLayer(originalName()) { @Override @@ -1120,54 +1118,4 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException { }); } - private class WildcardSyntheticFieldLoader implements CompositeSyntheticFieldLoader.DocValuesLayer { - private final ByteArrayStreamInput docValuesStream = new ByteArrayStreamInput(); - private int docValueCount; - private BytesRef docValueBytes; - - @Override - public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException { - BinaryDocValues values = leafReader.getBinaryDocValues(fullPath()); - if (values == null) { - docValueCount = 0; - return null; - } - - return docId -> { - if (values.advanceExact(docId) == false) { - docValueCount = 0; - return hasValue(); - } - docValueBytes = values.binaryValue(); - docValuesStream.reset(docValueBytes.bytes); - docValuesStream.setPosition(docValueBytes.offset); - docValueCount = docValuesStream.readVInt(); - return hasValue(); - }; - } - - @Override - public boolean hasValue() { - return docValueCount > 0; - } - - @Override - public long valueCount() { - return docValueCount; - } - - @Override - public void write(XContentBuilder b) throws IOException { - for (int i = 0; i < docValueCount; i++) { - int length = docValuesStream.readVInt(); - b.utf8Value(docValueBytes.bytes, docValuesStream.getPosition(), length); - docValuesStream.skipBytes(length); - } - } - - @Override - public String fieldName() { - return fullPath(); - } - } } From c2ee79e55667c259ff6d9591cf21929599003a11 Mon Sep 17 00:00:00 2001 From: Dmitry Kubikov Date: Fri, 7 Nov 2025 16:02:47 -0800 Subject: [PATCH 3/6] Moved ignore values doc value field fetcher inside of existing fetcher function --- .../extras/MatchOnlyTextFieldMapper.java | 128 ++++++++++-------- ...aryDocValuesSyntheticFieldLoaderLayer.java | 21 ++- .../index/mapper/KeywordFieldMapper.java | 1 - 3 files changed, 86 insertions(+), 64 deletions(-) diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index ab7571e91769b..7dc66fe31980b 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -14,6 +14,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.LeafReaderContext; @@ -28,10 +29,10 @@ import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOFunction; import org.elasticsearch.common.CheckedIntFunction; -import org.elasticsearch.common.io.stream.ByteArrayStreamInput; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.text.UTF8DecodingReader; import org.elasticsearch.common.unit.Fuzziness; @@ -39,8 +40,10 @@ import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.fielddata.AbstractBinaryDocValues; import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.fielddata.SortedBinaryDocValues; import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData; import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData; import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; @@ -299,18 +302,11 @@ private IOFunction, IOExcepti if (parent instanceof KeywordFieldMapper.KeywordFieldType keywordParent && keywordParent.ignoreAbove().valuesPotentiallyIgnored()) { + var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH); if (parent.isStored()) { - // if the parent keyword field has ignore_above set, then any ignored values will be stored under a fallback field - return combineFieldFetchers( - storedFieldFetcher(parentFieldName), - binaryDocValuesFieldFetcher(keywordParent.syntheticSourceFallbackFieldName()) - ); + return combineFieldFetchers(storedFieldFetcher(parentFieldName), docValuesFieldFetcher(ifd)); } else if (parent.hasDocValues()) { - var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH); - return combineFieldFetchers( - docValuesFieldFetcher(ifd), - binaryDocValuesFieldFetcher(keywordParent.syntheticSourceFallbackFieldName()) - ); + return docValuesFieldFetcher(ifd); } } @@ -357,57 +353,29 @@ private IOFunction, IOExcepti } } - private static IOFunction, IOException>> docValuesFieldFetcher( - IndexFieldData ifd - ) { + private IOFunction, IOException>> docValuesFieldFetcher(IndexFieldData ifd) { return context -> { - var sortedBinaryDocValues = ifd.load(context).getBytesValues(); - return docId -> { - if (sortedBinaryDocValues.advanceExact(docId)) { - var values = new ArrayList<>(sortedBinaryDocValues.docValueCount()); - for (int i = 0; i < sortedBinaryDocValues.docValueCount(); i++) { - values.add(sortedBinaryDocValues.nextValue().utf8ToString()); - } - return values; - } else { - return List.of(); - } - }; - }; - } + SortedBinaryDocValues indexedValuesDocValues = ifd.load(context).getBytesValues(); + CustomBinaryDocValues ignoredValuesDocValues = new CustomBinaryDocValues( + DocValues.getBinary(context.reader(), ifd.getFieldName() + TextFamilyFieldType.FALLBACK_FIELD_NAME_SUFFIX) + ); - /** - * Used exclusively to load ignored values from binary doc values. These values are stored in a separate fallback field in order to - * retain the original value and hence be able to support synthetic source. - */ - private static IOFunction, IOException>> binaryDocValuesFieldFetcher( - String fieldName - ) { - return context -> { - var binaryDocValues = DocValues.getBinary(context.reader(), fieldName); return docId -> { - if (binaryDocValues == null || binaryDocValues.advanceExact(docId) == false) { - return List.of(); - } - - // see KeywordFieldMapper.MultiValuedBinaryDocValuesField for context on how to decode these binary doc values back into - // strings - BytesRef docValuesBytes = binaryDocValues.binaryValue(); - - try (ByteArrayStreamInput stream = new ByteArrayStreamInput()) { - stream.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length); + int indexedValueCount = indexedValuesDocValues.advanceExact(docId) ? indexedValuesDocValues.docValueCount() : 0; + int ignoredValueCount = ignoredValuesDocValues.advanceExact(docId) ? ignoredValuesDocValues.docValueCount() : 0; + var values = new ArrayList<>(indexedValueCount + ignoredValueCount); - int docValueCount = stream.readVInt(); - var values = new ArrayList<>(docValueCount); - - for (int i = 0; i < docValueCount; i++) { - // this function already knows how to decode the underlying bytes array, so no need to explicitly call VInt() - BytesRef valueBytes = stream.readBytesRef(); - values.add(valueBytes.utf8ToString()); - } + // extract indexed values from doc values + for (int i = 0; i < indexedValueCount; i++) { + values.add(indexedValuesDocValues.nextValue().utf8ToString()); + } - return values; + // extract ignored values from doc values + for (int i = 0; i < ignoredValueCount; i++) { + values.add(ignoredValuesDocValues.nextValue().utf8ToString()); } + + return values; }; }; } @@ -817,4 +785,52 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException { return fieldLoader; } + + private static class CustomBinaryDocValues extends AbstractBinaryDocValues { + + private final BinaryDocValues binaryDocValues; + + private ByteArrayDataInput data; + private int docValueCount = 0; + + CustomBinaryDocValues(BinaryDocValues binaryDocValues) { + this.binaryDocValues = binaryDocValues; + } + + public BytesRef nextValue() { + // get the length of the value + int length = data.readVInt(); + + // read that many bytes from the underlying bytes array + // the read will automatically move the offset to the next value + byte[] valueBytes = new byte[length]; + data.readBytes(valueBytes, 0, length); + + return new BytesRef(valueBytes); + } + + @Override + public BytesRef binaryValue() throws IOException { + return binaryDocValues.binaryValue(); + } + + @Override + public boolean advanceExact(int docId) throws IOException { + // if document has a value, read underlying bytes + if (binaryDocValues.advanceExact(docId)) { + BytesRef docValuesBytes = binaryDocValues.binaryValue(); + data = new ByteArrayDataInput(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length); + docValueCount = data.readVInt(); + return true; + } + + // otherwise there is nothing to do + docValueCount = 0; + return false; + } + + public int docValueCount() { + return docValueCount; + } + } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java b/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java index 498bb7a3e6209..157c0f578d6a0 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java @@ -11,8 +11,8 @@ import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.LeafReader; +import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.util.BytesRef; -import org.elasticsearch.common.io.stream.ByteArrayStreamInput; import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; @@ -23,7 +23,7 @@ public final class BinaryDocValuesSyntheticFieldLoaderLayer implements Composite // the binary doc values for a document are all encoded in a single binary array, which this stream knows how to read // the doc values in the array take the form of [doc value count][length of value 1][value 1][length of value 2][value 2]... - private final ByteArrayStreamInput stream = new ByteArrayStreamInput(); + private final ByteArrayDataInput data = new ByteArrayDataInput(); private int valueCount; public BinaryDocValuesSyntheticFieldLoaderLayer(String fieldName) { @@ -49,8 +49,8 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf // otherwise, extract the doc values into a stream to later read from BytesRef docValuesBytes = docValues.binaryValue(); - stream.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length); - valueCount = stream.readVInt(); + data.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length); + valueCount = data.readVInt(); return hasValue(); }; @@ -59,9 +59,16 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf @Override public void write(XContentBuilder b) throws IOException { for (int i = 0; i < valueCount; i++) { - // this function already knows how to decode the underlying bytes array, so no need to explicitly call VInt() - BytesRef valueBytes = stream.readBytesRef(); - b.value(valueBytes.utf8ToString()); + // read the length of the value + int length = data.readVInt(); + + // read that many bytes from the input + // the read will automatically move the offset to the next value + byte[] valueBytes = new byte[length]; + data.readBytes(valueBytes, 0, length); + + // finally, write those bytes into XContentBuilder + b.value(new BytesRef(valueBytes).utf8ToString()); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index 901257e28f404..c61432bb08f42 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -1359,7 +1359,6 @@ public void add(final BytesRef value) { // might as well track these on the go as opposed to having to loop through all entries later docValuesByteCount += value.length; } - ; } /** From 9bac251f181a3361f6232623dd74b0639c16fb52 Mon Sep 17 00:00:00 2001 From: Dmitry Kubikov Date: Fri, 7 Nov 2025 16:02:47 -0800 Subject: [PATCH 4/6] Moved ignore values doc value field fetcher inside of existing fetcher function --- .../index/mapper/extras/MatchOnlyTextFieldMapper.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index 7dc66fe31980b..e13e8c96c2b5e 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -786,6 +786,10 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException { return fieldLoader; } + + /** + * A wrapper around {@link BinaryDocValues} that exposes some quality of life functions. + */ private static class CustomBinaryDocValues extends AbstractBinaryDocValues { private final BinaryDocValues binaryDocValues; From ab289467a81228900ecc4eb5b317c293c61af966 Mon Sep 17 00:00:00 2001 From: Dmitry Kubikov Date: Fri, 7 Nov 2025 16:02:47 -0800 Subject: [PATCH 5/6] Moved ignore values doc value field fetcher inside of existing fetcher function --- .../extras/MatchOnlyTextFieldMapper.java | 23 +++++++------------ ...aryDocValuesSyntheticFieldLoaderLayer.java | 22 +++++++----------- 2 files changed, 16 insertions(+), 29 deletions(-) diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index e13e8c96c2b5e..04b6b66dd7d83 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -29,10 +29,10 @@ import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOFunction; import org.elasticsearch.common.CheckedIntFunction; +import org.elasticsearch.common.io.stream.ByteArrayStreamInput; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.text.UTF8DecodingReader; import org.elasticsearch.common.unit.Fuzziness; @@ -786,31 +786,24 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException { return fieldLoader; } - /** * A wrapper around {@link BinaryDocValues} that exposes some quality of life functions. */ private static class CustomBinaryDocValues extends AbstractBinaryDocValues { private final BinaryDocValues binaryDocValues; + private final ByteArrayStreamInput stream; - private ByteArrayDataInput data; private int docValueCount = 0; CustomBinaryDocValues(BinaryDocValues binaryDocValues) { this.binaryDocValues = binaryDocValues; + this.stream = new ByteArrayStreamInput(); } - public BytesRef nextValue() { - // get the length of the value - int length = data.readVInt(); - - // read that many bytes from the underlying bytes array - // the read will automatically move the offset to the next value - byte[] valueBytes = new byte[length]; - data.readBytes(valueBytes, 0, length); - - return new BytesRef(valueBytes); + public BytesRef nextValue() throws IOException { + // this function already knows how to decode the underlying bytes array, so no need to explicitly call VInt() + return stream.readBytesRef(); } @Override @@ -823,8 +816,8 @@ public boolean advanceExact(int docId) throws IOException { // if document has a value, read underlying bytes if (binaryDocValues.advanceExact(docId)) { BytesRef docValuesBytes = binaryDocValues.binaryValue(); - data = new ByteArrayDataInput(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length); - docValueCount = data.readVInt(); + stream.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length); + docValueCount = stream.readVInt(); return true; } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java b/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java index 157c0f578d6a0..1f0c0be1f9555 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java @@ -11,8 +11,8 @@ import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.LeafReader; -import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.io.stream.ByteArrayStreamInput; import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; @@ -23,11 +23,12 @@ public final class BinaryDocValuesSyntheticFieldLoaderLayer implements Composite // the binary doc values for a document are all encoded in a single binary array, which this stream knows how to read // the doc values in the array take the form of [doc value count][length of value 1][value 1][length of value 2][value 2]... - private final ByteArrayDataInput data = new ByteArrayDataInput(); + private final ByteArrayStreamInput stream; private int valueCount; public BinaryDocValuesSyntheticFieldLoaderLayer(String fieldName) { this.fieldName = fieldName; + this.stream = new ByteArrayStreamInput(); } @Override @@ -49,8 +50,8 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf // otherwise, extract the doc values into a stream to later read from BytesRef docValuesBytes = docValues.binaryValue(); - data.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length); - valueCount = data.readVInt(); + stream.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length); + valueCount = stream.readVInt(); return hasValue(); }; @@ -59,16 +60,9 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf @Override public void write(XContentBuilder b) throws IOException { for (int i = 0; i < valueCount; i++) { - // read the length of the value - int length = data.readVInt(); - - // read that many bytes from the input - // the read will automatically move the offset to the next value - byte[] valueBytes = new byte[length]; - data.readBytes(valueBytes, 0, length); - - // finally, write those bytes into XContentBuilder - b.value(new BytesRef(valueBytes).utf8ToString()); + // this function already knows how to decode the underlying bytes array, so no need to explicitly call VInt() + BytesRef valueBytes = stream.readBytesRef(); + b.value(valueBytes.utf8ToString()); } } From a1d1d2a35e51c06da55d418ab92d63e481d309a9 Mon Sep 17 00:00:00 2001 From: Dmitry Kubikov Date: Fri, 7 Nov 2025 16:02:47 -0800 Subject: [PATCH 6/6] Moved ignore values doc value field fetcher inside of existing fetcher function --- .../extras/MatchOnlyTextFieldMapper.java | 65 ++++++++++--------- 1 file changed, 34 insertions(+), 31 deletions(-) diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index 04b6b66dd7d83..6c98720d56d45 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -40,7 +40,6 @@ import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.NamedAnalyzer; -import org.elasticsearch.index.fielddata.AbstractBinaryDocValues; import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.SortedBinaryDocValues; @@ -302,11 +301,17 @@ private IOFunction, IOExcepti if (parent instanceof KeywordFieldMapper.KeywordFieldType keywordParent && keywordParent.ignoreAbove().valuesPotentiallyIgnored()) { - var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH); if (parent.isStored()) { - return combineFieldFetchers(storedFieldFetcher(parentFieldName), docValuesFieldFetcher(ifd)); + return combineFieldFetchers( + storedFieldFetcher(parentFieldName), + ignoredValuesDocValuesFieldFetcher(keywordParent.syntheticSourceFallbackFieldName()) + ); } else if (parent.hasDocValues()) { - return docValuesFieldFetcher(ifd); + var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH); + return combineFieldFetchers( + docValuesFieldFetcher(ifd), + ignoredValuesDocValuesFieldFetcher(keywordParent.syntheticSourceFallbackFieldName()) + ); } } @@ -356,30 +361,31 @@ private IOFunction, IOExcepti private IOFunction, IOException>> docValuesFieldFetcher(IndexFieldData ifd) { return context -> { SortedBinaryDocValues indexedValuesDocValues = ifd.load(context).getBytesValues(); - CustomBinaryDocValues ignoredValuesDocValues = new CustomBinaryDocValues( - DocValues.getBinary(context.reader(), ifd.getFieldName() + TextFamilyFieldType.FALLBACK_FIELD_NAME_SUFFIX) - ); - - return docId -> { - int indexedValueCount = indexedValuesDocValues.advanceExact(docId) ? indexedValuesDocValues.docValueCount() : 0; - int ignoredValueCount = ignoredValuesDocValues.advanceExact(docId) ? ignoredValuesDocValues.docValueCount() : 0; - var values = new ArrayList<>(indexedValueCount + ignoredValueCount); - - // extract indexed values from doc values - for (int i = 0; i < indexedValueCount; i++) { - values.add(indexedValuesDocValues.nextValue().utf8ToString()); - } - - // extract ignored values from doc values - for (int i = 0; i < ignoredValueCount; i++) { - values.add(ignoredValuesDocValues.nextValue().utf8ToString()); - } + return docId -> getValuesFromDocValues(indexedValuesDocValues, docId); + }; + } - return values; - }; + private IOFunction, IOException>> ignoredValuesDocValuesFieldFetcher( + String fieldName + ) { + return context -> { + CustomBinaryDocValues ignoredValuesDocValues = new CustomBinaryDocValues(DocValues.getBinary(context.reader(), fieldName)); + return docId -> getValuesFromDocValues(ignoredValuesDocValues, docId); }; } + private List getValuesFromDocValues(SortedBinaryDocValues docValues, int docId) throws IOException { + if (docValues.advanceExact(docId)) { + var values = new ArrayList<>(docValues.docValueCount()); + for (int i = 0; i < docValues.docValueCount(); i++) { + values.add(docValues.nextValue().utf8ToString()); + } + return values; + } else { + return List.of(); + } + } + private static IOFunction, IOException>> storedFieldFetcher(String... names) { var loader = StoredFieldLoader.create(false, Set.of(names)); return context -> { @@ -787,9 +793,9 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException { } /** - * A wrapper around {@link BinaryDocValues} that exposes some quality of life functions. + * A wrapper around {@link BinaryDocValues} that exposes some quality of life functions. Note, these values are not sorted. */ - private static class CustomBinaryDocValues extends AbstractBinaryDocValues { + private static class CustomBinaryDocValues extends SortedBinaryDocValues { private final BinaryDocValues binaryDocValues; private final ByteArrayStreamInput stream; @@ -801,16 +807,12 @@ private static class CustomBinaryDocValues extends AbstractBinaryDocValues { this.stream = new ByteArrayStreamInput(); } + @Override public BytesRef nextValue() throws IOException { // this function already knows how to decode the underlying bytes array, so no need to explicitly call VInt() return stream.readBytesRef(); } - @Override - public BytesRef binaryValue() throws IOException { - return binaryDocValues.binaryValue(); - } - @Override public boolean advanceExact(int docId) throws IOException { // if document has a value, read underlying bytes @@ -826,6 +828,7 @@ public boolean advanceExact(int docId) throws IOException { return false; } + @Override public int docValueCount() { return docValueCount; }