Skip to content

Commit 23c0027

Browse files
committed
Moved ignore values doc value field fetcher inside of existing fetcher function
1 parent 5f244e6 commit 23c0027

File tree

3 files changed

+86
-64
lines changed

3 files changed

+86
-64
lines changed

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 72 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.apache.lucene.document.Field;
1515
import org.apache.lucene.document.FieldType;
1616
import org.apache.lucene.document.StoredField;
17+
import org.apache.lucene.index.BinaryDocValues;
1718
import org.apache.lucene.index.DocValues;
1819
import org.apache.lucene.index.IndexOptions;
1920
import org.apache.lucene.index.LeafReaderContext;
@@ -28,19 +29,21 @@
2829
import org.apache.lucene.search.PrefixQuery;
2930
import org.apache.lucene.search.Query;
3031
import org.apache.lucene.search.TermQuery;
32+
import org.apache.lucene.store.ByteArrayDataInput;
3133
import org.apache.lucene.util.BytesRef;
3234
import org.apache.lucene.util.IOFunction;
3335
import org.elasticsearch.common.CheckedIntFunction;
34-
import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
3536
import org.elasticsearch.common.lucene.Lucene;
3637
import org.elasticsearch.common.text.UTF8DecodingReader;
3738
import org.elasticsearch.common.unit.Fuzziness;
3839
import org.elasticsearch.index.IndexVersion;
3940
import org.elasticsearch.index.IndexVersions;
4041
import org.elasticsearch.index.analysis.IndexAnalyzers;
4142
import org.elasticsearch.index.analysis.NamedAnalyzer;
43+
import org.elasticsearch.index.fielddata.AbstractBinaryDocValues;
4244
import org.elasticsearch.index.fielddata.FieldDataContext;
4345
import org.elasticsearch.index.fielddata.IndexFieldData;
46+
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
4447
import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
4548
import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData;
4649
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
@@ -299,18 +302,11 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
299302

300303
if (parent instanceof KeywordFieldMapper.KeywordFieldType keywordParent
301304
&& keywordParent.ignoreAbove().valuesPotentiallyIgnored()) {
305+
var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH);
302306
if (parent.isStored()) {
303-
// if the parent keyword field has ignore_above set, then any ignored values will be stored under a fallback field
304-
return combineFieldFetchers(
305-
storedFieldFetcher(parentFieldName),
306-
binaryDocValuesFieldFetcher(keywordParent.syntheticSourceFallbackFieldName())
307-
);
307+
return combineFieldFetchers(storedFieldFetcher(parentFieldName), docValuesFieldFetcher(ifd));
308308
} else if (parent.hasDocValues()) {
309-
var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH);
310-
return combineFieldFetchers(
311-
docValuesFieldFetcher(ifd),
312-
binaryDocValuesFieldFetcher(keywordParent.syntheticSourceFallbackFieldName())
313-
);
309+
return docValuesFieldFetcher(ifd);
314310
}
315311
}
316312

@@ -357,57 +353,29 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
357353
}
358354
}
359355

360-
private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> docValuesFieldFetcher(
361-
IndexFieldData<?> ifd
362-
) {
356+
private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> docValuesFieldFetcher(IndexFieldData<?> ifd) {
363357
return context -> {
364-
var sortedBinaryDocValues = ifd.load(context).getBytesValues();
365-
return docId -> {
366-
if (sortedBinaryDocValues.advanceExact(docId)) {
367-
var values = new ArrayList<>(sortedBinaryDocValues.docValueCount());
368-
for (int i = 0; i < sortedBinaryDocValues.docValueCount(); i++) {
369-
values.add(sortedBinaryDocValues.nextValue().utf8ToString());
370-
}
371-
return values;
372-
} else {
373-
return List.of();
374-
}
375-
};
376-
};
377-
}
358+
SortedBinaryDocValues indexedValuesDocValues = ifd.load(context).getBytesValues();
359+
CustomBinaryDocValues ignoredValuesDocValues = new CustomBinaryDocValues(
360+
DocValues.getBinary(context.reader(), ifd.getFieldName() + TextFamilyFieldType.FALLBACK_FIELD_NAME_SUFFIX)
361+
);
378362

379-
/**
380-
* Used exclusively to load ignored values from binary doc values. These values are stored in a separate fallback field in order to
381-
* retain the original value and hence be able to support synthetic source.
382-
*/
383-
private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> binaryDocValuesFieldFetcher(
384-
String fieldName
385-
) {
386-
return context -> {
387-
var binaryDocValues = DocValues.getBinary(context.reader(), fieldName);
388363
return docId -> {
389-
if (binaryDocValues == null || binaryDocValues.advanceExact(docId) == false) {
390-
return List.of();
391-
}
392-
393-
// see KeywordFieldMapper.MultiValuedBinaryDocValuesField for context on how to decode these binary doc values back into
394-
// strings
395-
BytesRef docValuesBytes = binaryDocValues.binaryValue();
396-
397-
try (ByteArrayStreamInput stream = new ByteArrayStreamInput()) {
398-
stream.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length);
364+
int indexedValueCount = indexedValuesDocValues.advanceExact(docId) ? indexedValuesDocValues.docValueCount() : 0;
365+
int ignoredValueCount = ignoredValuesDocValues.advanceExact(docId) ? ignoredValuesDocValues.docValueCount() : 0;
366+
var values = new ArrayList<>(indexedValueCount + ignoredValueCount);
399367

400-
int docValueCount = stream.readVInt();
401-
var values = new ArrayList<>(docValueCount);
402-
403-
for (int i = 0; i < docValueCount; i++) {
404-
// this function already knows how to decode the underlying bytes array, so no need to explicitly call VInt()
405-
BytesRef valueBytes = stream.readBytesRef();
406-
values.add(valueBytes.utf8ToString());
407-
}
368+
// extract indexed values from doc values
369+
for (int i = 0; i < indexedValueCount; i++) {
370+
values.add(indexedValuesDocValues.nextValue().utf8ToString());
371+
}
408372

409-
return values;
373+
// extract ignored values from doc values
374+
for (int i = 0; i < ignoredValueCount; i++) {
375+
values.add(ignoredValuesDocValues.nextValue().utf8ToString());
410376
}
377+
378+
return values;
411379
};
412380
};
413381
}
@@ -817,4 +785,52 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException {
817785

818786
return fieldLoader;
819787
}
788+
789+
private static class CustomBinaryDocValues extends AbstractBinaryDocValues {
790+
791+
private final BinaryDocValues binaryDocValues;
792+
793+
private ByteArrayDataInput data;
794+
private int docValueCount = 0;
795+
796+
CustomBinaryDocValues(BinaryDocValues binaryDocValues) {
797+
this.binaryDocValues = binaryDocValues;
798+
}
799+
800+
public BytesRef nextValue() {
801+
// get the length of the value
802+
int length = data.readVInt();
803+
804+
// read that many bytes from the underlying bytes array
805+
// the read will automatically move the offset to the next value
806+
byte[] valueBytes = new byte[length];
807+
data.readBytes(valueBytes, 0, length);
808+
809+
return new BytesRef(valueBytes);
810+
}
811+
812+
@Override
813+
public BytesRef binaryValue() throws IOException {
814+
return binaryDocValues.binaryValue();
815+
}
816+
817+
@Override
818+
public boolean advanceExact(int docId) throws IOException {
819+
// if document has a value, read underlying bytes
820+
if (binaryDocValues.advanceExact(docId)) {
821+
BytesRef docValuesBytes = binaryDocValues.binaryValue();
822+
data = new ByteArrayDataInput(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length);
823+
docValueCount = data.readVInt();
824+
return true;
825+
}
826+
827+
// otherwise there is nothing to do
828+
docValueCount = 0;
829+
return false;
830+
}
831+
832+
public int docValueCount() {
833+
return docValueCount;
834+
}
835+
}
820836
}

server/src/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111

1212
import org.apache.lucene.index.BinaryDocValues;
1313
import org.apache.lucene.index.LeafReader;
14+
import org.apache.lucene.store.ByteArrayDataInput;
1415
import org.apache.lucene.util.BytesRef;
15-
import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
1616
import org.elasticsearch.xcontent.XContentBuilder;
1717

1818
import java.io.IOException;
@@ -23,7 +23,7 @@ public final class BinaryDocValuesSyntheticFieldLoaderLayer implements Composite
2323

2424
// the binary doc values for a document are all encoded in a single binary array, which this stream knows how to read
2525
// the doc values in the array take the form of [doc value count][length of value 1][value 1][length of value 2][value 2]...
26-
private final ByteArrayStreamInput stream = new ByteArrayStreamInput();
26+
private final ByteArrayDataInput data = new ByteArrayDataInput();
2727
private int valueCount;
2828

2929
public BinaryDocValuesSyntheticFieldLoaderLayer(String fieldName) {
@@ -49,8 +49,8 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
4949

5050
// otherwise, extract the doc values into a stream to later read from
5151
BytesRef docValuesBytes = docValues.binaryValue();
52-
stream.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length);
53-
valueCount = stream.readVInt();
52+
data.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length);
53+
valueCount = data.readVInt();
5454

5555
return hasValue();
5656
};
@@ -59,9 +59,16 @@ public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf
5959
@Override
6060
public void write(XContentBuilder b) throws IOException {
6161
for (int i = 0; i < valueCount; i++) {
62-
// this function already knows how to decode the underlying bytes array, so no need to explicitly call VInt()
63-
BytesRef valueBytes = stream.readBytesRef();
64-
b.value(valueBytes.utf8ToString());
62+
// read the length of the value
63+
int length = data.readVInt();
64+
65+
// read that many bytes from the input
66+
// the read will automatically move the offset to the next value
67+
byte[] valueBytes = new byte[length];
68+
data.readBytes(valueBytes, 0, length);
69+
70+
// finally, write those bytes into XContentBuilder
71+
b.value(new BytesRef(valueBytes).utf8ToString());
6572
}
6673
}
6774

server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1452,7 +1452,6 @@ public void add(final BytesRef value) {
14521452
// might as well track these on the go as opposed to having to loop through all entries later
14531453
docValuesByteCount += value.length;
14541454
}
1455-
;
14561455
}
14571456

14581457
/**

0 commit comments

Comments
 (0)