|  | 
|  | 1 | +/* | 
|  | 2 | + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | 
|  | 3 | + * or more contributor license agreements. Licensed under the "Elastic License | 
|  | 4 | + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side | 
|  | 5 | + * Public License v 1"; you may not use this file except in compliance with, at | 
|  | 6 | + * your election, the "Elastic License 2.0", the "GNU Affero General Public | 
|  | 7 | + * License v3.0 only", or the "Server Side Public License, v 1". | 
|  | 8 | + */ | 
|  | 9 | + | 
|  | 10 | +package org.elasticsearch.index.engine; | 
|  | 11 | + | 
|  | 12 | +import org.apache.lucene.codecs.KnnVectorsReader; | 
|  | 13 | +import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader; | 
|  | 14 | +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; | 
|  | 15 | +import org.apache.lucene.index.FieldInfo; | 
|  | 16 | +import org.apache.lucene.index.IndexReader; | 
|  | 17 | +import org.apache.lucene.index.LeafReaderContext; | 
|  | 18 | +import org.apache.lucene.index.MergePolicy; | 
|  | 19 | +import org.apache.lucene.index.SegmentCommitInfo; | 
|  | 20 | +import org.apache.lucene.index.SegmentReader; | 
|  | 21 | +import org.apache.lucene.index.VectorEncoding; | 
|  | 22 | +import org.elasticsearch.common.lucene.Lucene; | 
|  | 23 | + | 
|  | 24 | +import java.util.List; | 
|  | 25 | +import java.util.Map; | 
|  | 26 | +import java.util.stream.Collectors; | 
|  | 27 | + | 
|  | 28 | +/** | 
|  | 29 | + * Provides an estimation of the memory needed to merge segments. | 
|  | 30 | + * | 
|  | 31 | + * This class is a temporary solution until we have a better way to estimate the memory needed for merges in Lucene | 
|  | 32 | + * (see the corresponding <a href="https://github.com/apache/lucene/issues/14225">Lucene issue</a>) | 
|  | 33 | + * We can work iteratively in providing estimations for different types of fields and vector encodings. | 
|  | 34 | + */ | 
|  | 35 | +public class MergeMemoryEstimator { | 
|  | 36 | + | 
|  | 37 | +    // Determined empirically by using Accountable.ramBytesUsed() during merges on Lucene using an instrumented build of Lucene. | 
|  | 38 | +    // Didn't adapted the ramBytesUsed() code for this as it depends on graph levels and size for non-zero levels, which are difficult | 
|  | 39 | +    // to estimate without actually building the graph. | 
|  | 40 | +    public static final long HNSW_PER_DOC_ESTIMATION = 348L; | 
|  | 41 | + | 
|  | 42 | +    /** | 
|  | 43 | +     * Estimates the memory, in bytes, needed to merge the segments of the given merge. | 
|  | 44 | +     */ | 
|  | 45 | +    public static long estimateMergeMemory(MergePolicy.OneMerge merge, IndexReader indexReader) { | 
|  | 46 | +        assert merge.segments.isEmpty() == false; | 
|  | 47 | + | 
|  | 48 | +        long memoryNeeded = 0; | 
|  | 49 | +        Map<String, SegmentCommitInfo> segments = merge.segments.stream().collect(Collectors.toMap(s -> s.info.name, s -> s)); | 
|  | 50 | +        List<LeafReaderContext> leaves = indexReader.leaves(); | 
|  | 51 | +        SegmentReader segmentReader = null; | 
|  | 52 | +        for (LeafReaderContext leafReaderContext : leaves) { | 
|  | 53 | +            segmentReader = Lucene.segmentReader(leafReaderContext.reader()); | 
|  | 54 | +            String segmentName = segmentReader.getSegmentName(); | 
|  | 55 | +            SegmentCommitInfo segmentCommitInfo = segments.get(segmentName); | 
|  | 56 | +            if (segmentCommitInfo != null) { | 
|  | 57 | +                memoryNeeded += estimateMergeMemory(segmentCommitInfo, segmentReader); | 
|  | 58 | +                segments.remove(segmentName); | 
|  | 59 | +                if (segments.isEmpty()) { | 
|  | 60 | +                    break; | 
|  | 61 | +                } | 
|  | 62 | +            } | 
|  | 63 | +        } | 
|  | 64 | + | 
|  | 65 | +        // Estimate segments without readers - the searcher may not have been refreshed yet, so estimate them with the field info from | 
|  | 66 | +        // the last segment reader | 
|  | 67 | +        if (segmentReader != null) { | 
|  | 68 | +            for (SegmentCommitInfo segmentCommitInfo : segments.values()) { | 
|  | 69 | +                memoryNeeded += estimateMergeMemory(segmentCommitInfo, segmentReader); | 
|  | 70 | +            } | 
|  | 71 | +        } | 
|  | 72 | + | 
|  | 73 | +        return memoryNeeded; | 
|  | 74 | +    } | 
|  | 75 | + | 
|  | 76 | +    private static long estimateMergeMemory(SegmentCommitInfo segmentCommitInfo, SegmentReader reader) { | 
|  | 77 | +        long maxMem = 0; | 
|  | 78 | +        for (FieldInfo fieldInfo : reader.getFieldInfos()) { | 
|  | 79 | +            maxMem = Math.max(maxMem, estimateFieldMemory(fieldInfo, segmentCommitInfo, reader)); | 
|  | 80 | +        } | 
|  | 81 | +        return maxMem; | 
|  | 82 | +    } | 
|  | 83 | + | 
|  | 84 | +    private static long estimateFieldMemory(FieldInfo fieldInfo, SegmentCommitInfo segmentCommitInfo, SegmentReader segmentReader) { | 
|  | 85 | + | 
|  | 86 | +        long maxMem = 0; | 
|  | 87 | +        if (fieldInfo.hasVectorValues()) { | 
|  | 88 | +            maxMem = Math.max(maxMem, estimateVectorFieldMemory(fieldInfo, segmentCommitInfo, segmentReader)); | 
|  | 89 | +        } | 
|  | 90 | +        // TODO Work on estimations on other field infos when / if needed | 
|  | 91 | + | 
|  | 92 | +        return maxMem; | 
|  | 93 | +    } | 
|  | 94 | + | 
|  | 95 | +    private static long estimateVectorFieldMemory(FieldInfo fieldInfo, SegmentCommitInfo segmentCommitInfo, SegmentReader segmentReader) { | 
|  | 96 | +        KnnVectorsReader vectorsReader = segmentReader.getVectorReader(); | 
|  | 97 | +        if (vectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader perFieldKnnVectorsFormat) { | 
|  | 98 | +            vectorsReader = perFieldKnnVectorsFormat.getFieldReader(fieldInfo.getName()); | 
|  | 99 | +        } | 
|  | 100 | + | 
|  | 101 | +        return getVectorFieldEstimation(fieldInfo, segmentCommitInfo, vectorsReader); | 
|  | 102 | +    } | 
|  | 103 | + | 
|  | 104 | +    private static long getVectorFieldEstimation(FieldInfo fieldInfo, SegmentCommitInfo segmentCommitInfo, KnnVectorsReader vectorsReader) { | 
|  | 105 | +        int numDocs = segmentCommitInfo.info.maxDoc() - segmentCommitInfo.getDelCount(); | 
|  | 106 | +        if (vectorsReader instanceof Lucene99HnswVectorsReader) { | 
|  | 107 | +            return numDocs * HNSW_PER_DOC_ESTIMATION; | 
|  | 108 | + | 
|  | 109 | +        } else { | 
|  | 110 | +            // Dominated by the heap byte buffer size used to write each vector | 
|  | 111 | +            if (fieldInfo.getVectorEncoding() == VectorEncoding.FLOAT32) { | 
|  | 112 | +                return fieldInfo.getVectorDimension() * VectorEncoding.FLOAT32.byteSize; | 
|  | 113 | +            } | 
|  | 114 | +            // Byte does not use buffering for writing but the IndexOutput directly | 
|  | 115 | +            return 0; | 
|  | 116 | +        } | 
|  | 117 | +    } | 
|  | 118 | +} | 
0 commit comments