|
14 | 14 | import org.apache.lucene.document.Field; |
15 | 15 | import org.apache.lucene.document.FieldType; |
16 | 16 | import org.apache.lucene.document.StoredField; |
| 17 | +import org.apache.lucene.index.BinaryDocValues; |
17 | 18 | import org.apache.lucene.index.DocValues; |
18 | 19 | import org.apache.lucene.index.IndexOptions; |
19 | 20 | import org.apache.lucene.index.LeafReaderContext; |
|
28 | 29 | import org.apache.lucene.search.PrefixQuery; |
29 | 30 | import org.apache.lucene.search.Query; |
30 | 31 | import org.apache.lucene.search.TermQuery; |
| 32 | +import org.apache.lucene.store.ByteArrayDataInput; |
31 | 33 | import org.apache.lucene.util.BytesRef; |
32 | 34 | import org.apache.lucene.util.IOFunction; |
33 | 35 | import org.elasticsearch.common.CheckedIntFunction; |
34 | | -import org.elasticsearch.common.io.stream.ByteArrayStreamInput; |
35 | 36 | import org.elasticsearch.common.lucene.Lucene; |
36 | 37 | import org.elasticsearch.common.text.UTF8DecodingReader; |
37 | 38 | import org.elasticsearch.common.unit.Fuzziness; |
38 | 39 | import org.elasticsearch.index.IndexVersion; |
39 | 40 | import org.elasticsearch.index.IndexVersions; |
40 | 41 | import org.elasticsearch.index.analysis.IndexAnalyzers; |
41 | 42 | import org.elasticsearch.index.analysis.NamedAnalyzer; |
| 43 | +import org.elasticsearch.index.fielddata.AbstractBinaryDocValues; |
42 | 44 | import org.elasticsearch.index.fielddata.FieldDataContext; |
43 | 45 | import org.elasticsearch.index.fielddata.IndexFieldData; |
| 46 | +import org.elasticsearch.index.fielddata.SortedBinaryDocValues; |
44 | 47 | import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData; |
45 | 48 | import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData; |
46 | 49 | import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; |
@@ -299,18 +302,11 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti |
299 | 302 |
|
300 | 303 | if (parent instanceof KeywordFieldMapper.KeywordFieldType keywordParent |
301 | 304 | && keywordParent.ignoreAbove().valuesPotentiallyIgnored()) { |
| 305 | + var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH); |
302 | 306 | if (parent.isStored()) { |
303 | | - // if the parent keyword field has ignore_above set, then any ignored values will be stored under a fallback field |
304 | | - return combineFieldFetchers( |
305 | | - storedFieldFetcher(parentFieldName), |
306 | | - binaryDocValuesFieldFetcher(keywordParent.syntheticSourceFallbackFieldName()) |
307 | | - ); |
| 307 | + return combineFieldFetchers(storedFieldFetcher(parentFieldName), docValuesFieldFetcher(ifd)); |
308 | 308 | } else if (parent.hasDocValues()) { |
309 | | - var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH); |
310 | | - return combineFieldFetchers( |
311 | | - docValuesFieldFetcher(ifd), |
312 | | - binaryDocValuesFieldFetcher(keywordParent.syntheticSourceFallbackFieldName()) |
313 | | - ); |
| 309 | + return docValuesFieldFetcher(ifd); |
314 | 310 | } |
315 | 311 | } |
316 | 312 |
|
@@ -357,57 +353,29 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti |
357 | 353 | } |
358 | 354 | } |
359 | 355 |
|
360 | | - private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> docValuesFieldFetcher( |
361 | | - IndexFieldData<?> ifd |
362 | | - ) { |
| 356 | + private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> docValuesFieldFetcher(IndexFieldData<?> ifd) { |
363 | 357 | return context -> { |
364 | | - var sortedBinaryDocValues = ifd.load(context).getBytesValues(); |
365 | | - return docId -> { |
366 | | - if (sortedBinaryDocValues.advanceExact(docId)) { |
367 | | - var values = new ArrayList<>(sortedBinaryDocValues.docValueCount()); |
368 | | - for (int i = 0; i < sortedBinaryDocValues.docValueCount(); i++) { |
369 | | - values.add(sortedBinaryDocValues.nextValue().utf8ToString()); |
370 | | - } |
371 | | - return values; |
372 | | - } else { |
373 | | - return List.of(); |
374 | | - } |
375 | | - }; |
376 | | - }; |
377 | | - } |
| 358 | + SortedBinaryDocValues indexedValuesDocValues = ifd.load(context).getBytesValues(); |
| 359 | + CustomBinaryDocValues ignoredValuesDocValues = new CustomBinaryDocValues( |
| 360 | + DocValues.getBinary(context.reader(), ifd.getFieldName() + TextFamilyFieldType.FALLBACK_FIELD_NAME_SUFFIX) |
| 361 | + ); |
378 | 362 |
|
379 | | - /** |
380 | | - * Used exclusively to load ignored values from binary doc values. These values are stored in a separate fallback field in order to |
381 | | - * retain the original value and hence be able to support synthetic source. |
382 | | - */ |
383 | | - private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> binaryDocValuesFieldFetcher( |
384 | | - String fieldName |
385 | | - ) { |
386 | | - return context -> { |
387 | | - var binaryDocValues = DocValues.getBinary(context.reader(), fieldName); |
388 | 363 | return docId -> { |
389 | | - if (binaryDocValues == null || binaryDocValues.advanceExact(docId) == false) { |
390 | | - return List.of(); |
391 | | - } |
392 | | - |
393 | | - // see KeywordFieldMapper.MultiValuedBinaryDocValuesField for context on how to decode these binary doc values back into |
394 | | - // strings |
395 | | - BytesRef docValuesBytes = binaryDocValues.binaryValue(); |
396 | | - |
397 | | - try (ByteArrayStreamInput stream = new ByteArrayStreamInput()) { |
398 | | - stream.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length); |
| 364 | + int indexedValueCount = indexedValuesDocValues.advanceExact(docId) ? indexedValuesDocValues.docValueCount() : 0; |
| 365 | + int ignoredValueCount = ignoredValuesDocValues.advanceExact(docId) ? ignoredValuesDocValues.docValueCount() : 0; |
| 366 | + var values = new ArrayList<>(indexedValueCount + ignoredValueCount); |
399 | 367 |
|
400 | | - int docValueCount = stream.readVInt(); |
401 | | - var values = new ArrayList<>(docValueCount); |
402 | | - |
403 | | - for (int i = 0; i < docValueCount; i++) { |
404 | | - // this function already knows how to decode the underlying bytes array, so no need to explicitly call VInt() |
405 | | - BytesRef valueBytes = stream.readBytesRef(); |
406 | | - values.add(valueBytes.utf8ToString()); |
407 | | - } |
| 368 | + // extract indexed values from doc values |
| 369 | + for (int i = 0; i < indexedValueCount; i++) { |
| 370 | + values.add(indexedValuesDocValues.nextValue().utf8ToString()); |
| 371 | + } |
408 | 372 |
|
409 | | - return values; |
| 373 | + // extract ignored values from doc values |
| 374 | + for (int i = 0; i < ignoredValueCount; i++) { |
| 375 | + values.add(ignoredValuesDocValues.nextValue().utf8ToString()); |
410 | 376 | } |
| 377 | + |
| 378 | + return values; |
411 | 379 | }; |
412 | 380 | }; |
413 | 381 | } |
@@ -817,4 +785,52 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException { |
817 | 785 |
|
818 | 786 | return fieldLoader; |
819 | 787 | } |
| 788 | + |
| 789 | + private static class CustomBinaryDocValues extends AbstractBinaryDocValues { |
| 790 | + |
| 791 | + private final BinaryDocValues binaryDocValues; |
| 792 | + |
| 793 | + private ByteArrayDataInput data; |
| 794 | + private int docValueCount = 0; |
| 795 | + |
| 796 | + CustomBinaryDocValues(BinaryDocValues binaryDocValues) { |
| 797 | + this.binaryDocValues = binaryDocValues; |
| 798 | + } |
| 799 | + |
| 800 | + public BytesRef nextValue() { |
| 801 | + // get the length of the value |
| 802 | + int length = data.readVInt(); |
| 803 | + |
| 804 | + // read that many bytes from the underlying bytes array |
| 805 | + // the read will automatically move the offset to the next value |
| 806 | + byte[] valueBytes = new byte[length]; |
| 807 | + data.readBytes(valueBytes, 0, length); |
| 808 | + |
| 809 | + return new BytesRef(valueBytes); |
| 810 | + } |
| 811 | + |
| 812 | + @Override |
| 813 | + public BytesRef binaryValue() throws IOException { |
| 814 | + return binaryDocValues.binaryValue(); |
| 815 | + } |
| 816 | + |
| 817 | + @Override |
| 818 | + public boolean advanceExact(int docId) throws IOException { |
| 819 | + // if document has a value, read underlying bytes |
| 820 | + if (binaryDocValues.advanceExact(docId)) { |
| 821 | + BytesRef docValuesBytes = binaryDocValues.binaryValue(); |
| 822 | + data = new ByteArrayDataInput(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length); |
| 823 | + docValueCount = data.readVInt(); |
| 824 | + return true; |
| 825 | + } |
| 826 | + |
| 827 | + // otherwise there is nothing to do |
| 828 | + docValueCount = 0; |
| 829 | + return false; |
| 830 | + } |
| 831 | + |
| 832 | + public int docValueCount() { |
| 833 | + return docValueCount; |
| 834 | + } |
| 835 | + } |
820 | 836 | } |
0 commit comments