From ddb63d0309aa360758e76cc1539eb6ee9e5053d9 Mon Sep 17 00:00:00 2001 From: Jordan Powers Date: Fri, 28 Mar 2025 11:08:06 -0700 Subject: [PATCH] Optimize entry maps in TSDB doc values codec (#125805) Currently, the Lucene90DocValuesProducer uses optimized IntObjectHashMaps to track various entries for each field, while the ES87TSDBDocValuesProducer uses regular HashMap. This patch updates the ES87TSDBDocValuesProducer class to also use the optimized hash maps. (cherry picked from commit 35a6298730b2c931853b00f4ba694616cbe90d17) # Conflicts: # server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java --- .../codec/tsdb/ES87TSDBDocValuesProducer.java | 53 +++++++++---------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java index 5fc708b5ac76a..82d10c62d2763 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java @@ -27,6 +27,7 @@ import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.internal.hppc.IntObjectHashMap; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.DataInput; @@ -40,17 +41,15 @@ import org.elasticsearch.core.IOUtils; import java.io.IOException; -import java.util.HashMap; -import java.util.Map; import static org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat.TERMS_DICT_BLOCK_LZ4_SHIFT; public class ES87TSDBDocValuesProducer extends DocValuesProducer { - private final Map numerics; - private final Map binaries; - private final Map sorted; - private final Map sortedSets; - private final Map sortedNumerics; + private final IntObjectHashMap numerics; + private final IntObjectHashMap binaries; + private final IntObjectHashMap sorted; + private final IntObjectHashMap sortedSets; + private final IntObjectHashMap sortedNumerics; private final IndexInput data; private final int maxDoc; private final int version; @@ -58,11 +57,11 @@ public class ES87TSDBDocValuesProducer extends DocValuesProducer { ES87TSDBDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { - this.numerics = new HashMap<>(); - this.binaries = new HashMap<>(); - this.sorted = new HashMap<>(); - this.sortedSets = new HashMap<>(); - this.sortedNumerics = new HashMap<>(); + this.numerics = new IntObjectHashMap<>(); + this.binaries = new IntObjectHashMap<>(); + this.sorted = new IntObjectHashMap<>(); + this.sortedSets = new IntObjectHashMap<>(); + this.sortedNumerics = new IntObjectHashMap<>(); this.maxDoc = state.segmentInfo.maxDoc(); this.merging = false; @@ -123,11 +122,11 @@ public class ES87TSDBDocValuesProducer extends DocValuesProducer { } private ES87TSDBDocValuesProducer( - Map numerics, - Map binaries, - Map sorted, - Map sortedSets, - Map sortedNumerics, + IntObjectHashMap numerics, + IntObjectHashMap binaries, + IntObjectHashMap sorted, + IntObjectHashMap sortedSets, + IntObjectHashMap sortedNumerics, IndexInput data, int maxDoc, int version, @@ -151,13 +150,13 @@ public DocValuesProducer getMergeInstance() { @Override public NumericDocValues getNumeric(FieldInfo field) throws IOException { - NumericEntry entry = numerics.get(field.name); + NumericEntry entry = numerics.get(field.number); return getNumeric(entry, -1); } @Override public BinaryDocValues getBinary(FieldInfo field) throws IOException { - BinaryEntry entry = binaries.get(field.name); + BinaryEntry entry = binaries.get(field.number); if (entry.docsWithFieldOffset == -2) { return DocValues.emptyBinary(); } @@ -315,7 +314,7 @@ public boolean advanceExact(int target) throws IOException { @Override public SortedDocValues getSorted(FieldInfo field) throws IOException { - SortedEntry entry = sorted.get(field.name); + SortedEntry entry = sorted.get(field.number); return getSorted(entry); } @@ -671,13 +670,13 @@ public int docFreq() throws IOException { @Override public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { - SortedNumericEntry entry = sortedNumerics.get(field.name); + SortedNumericEntry entry = sortedNumerics.get(field.number); return getSortedNumeric(entry, -1); } @Override public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { - SortedSetEntry entry = sortedSets.get(field.name); + SortedSetEntry entry = sortedSets.get(field.number); if (entry.singleValueEntry != null) { return DocValues.singleton(getSorted(entry.singleValueEntry)); } @@ -756,15 +755,15 @@ private void readFields(IndexInput meta, FieldInfos infos) throws IOException { } byte type = meta.readByte(); if (type == ES87TSDBDocValuesFormat.NUMERIC) { - numerics.put(info.name, readNumeric(meta)); + numerics.put(info.number, readNumeric(meta)); } else if (type == ES87TSDBDocValuesFormat.BINARY) { - binaries.put(info.name, readBinary(meta)); + binaries.put(info.number, readBinary(meta)); } else if (type == ES87TSDBDocValuesFormat.SORTED) { - sorted.put(info.name, readSorted(meta)); + sorted.put(info.number, readSorted(meta)); } else if (type == ES87TSDBDocValuesFormat.SORTED_SET) { - sortedSets.put(info.name, readSortedSet(meta)); + sortedSets.put(info.number, readSortedSet(meta)); } else if (type == ES87TSDBDocValuesFormat.SORTED_NUMERIC) { - sortedNumerics.put(info.name, readSortedNumeric(meta)); + sortedNumerics.put(info.number, readSortedNumeric(meta)); } else { throw new CorruptIndexException("invalid type: " + type, meta); }