From 94039695c45e4a4074597c50a467167dbbde650c Mon Sep 17 00:00:00 2001 From: Jordan Powers Date: Thu, 27 Mar 2025 11:17:59 -0700 Subject: [PATCH 1/2] Initial optimization implementation --- .../codec/tsdb/ES87TSDBDocValuesProducer.java | 64 +++++++++---------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java index 8a8095ecf6d21..44b7a78143118 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java @@ -9,6 +9,8 @@ package org.elasticsearch.index.codec.tsdb; +import com.carrotsearch.hppc.IntObjectHashMap; + import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.lucene90.IndexedDISI; @@ -43,20 +45,18 @@ import org.elasticsearch.core.IOUtils; import java.io.IOException; -import java.util.HashMap; -import java.util.Map; import static org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat.SKIP_INDEX_JUMP_LENGTH_PER_LEVEL; import static org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat.SKIP_INDEX_MAX_LEVEL; import static org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat.TERMS_DICT_BLOCK_LZ4_SHIFT; public class ES87TSDBDocValuesProducer extends DocValuesProducer { - private final Map numerics; - private final Map binaries; - private final Map sorted; - private final Map sortedSets; - private final Map sortedNumerics; - private final Map skippers; + private final IntObjectHashMap numerics; + private final IntObjectHashMap binaries; + private final IntObjectHashMap sorted; + private final IntObjectHashMap sortedSets; + private final IntObjectHashMap sortedNumerics; + private final IntObjectHashMap skippers; private final IndexInput data; private final int maxDoc; private final int version; @@ -64,12 +64,12 @@ public class ES87TSDBDocValuesProducer extends DocValuesProducer { ES87TSDBDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { - this.numerics = new HashMap<>(); - this.binaries = new HashMap<>(); - this.sorted = new HashMap<>(); - this.sortedSets = new HashMap<>(); - this.sortedNumerics = new HashMap<>(); - this.skippers = new HashMap<>(); + this.numerics = new IntObjectHashMap<>(); + this.binaries = new IntObjectHashMap<>(); + this.sorted = new IntObjectHashMap<>(); + this.sortedSets = new IntObjectHashMap<>(); + this.sortedNumerics = new IntObjectHashMap<>(); + this.skippers = new IntObjectHashMap<>(); this.maxDoc = state.segmentInfo.maxDoc(); this.merging = false; @@ -130,12 +130,12 @@ public class ES87TSDBDocValuesProducer extends DocValuesProducer { } private ES87TSDBDocValuesProducer( - Map numerics, - Map binaries, - Map sorted, - Map sortedSets, - Map sortedNumerics, - Map skippers, + IntObjectHashMap numerics, + IntObjectHashMap binaries, + IntObjectHashMap sorted, + IntObjectHashMap sortedSets, + IntObjectHashMap sortedNumerics, + IntObjectHashMap skippers, IndexInput data, int maxDoc, int version, @@ -160,13 +160,13 @@ public DocValuesProducer getMergeInstance() { @Override public NumericDocValues getNumeric(FieldInfo field) throws IOException { - NumericEntry entry = numerics.get(field.name); + NumericEntry entry = numerics.get(field.number); return getNumeric(entry, -1); } @Override public BinaryDocValues getBinary(FieldInfo field) throws IOException { - BinaryEntry entry = binaries.get(field.name); + BinaryEntry entry = binaries.get(field.number); if (entry.docsWithFieldOffset == -2) { return DocValues.emptyBinary(); } @@ -320,7 +320,7 @@ public boolean advanceExact(int target) throws IOException { @Override public SortedDocValues getSorted(FieldInfo field) throws IOException { - SortedEntry entry = sorted.get(field.name); + SortedEntry entry = sorted.get(field.number); return getSorted(entry); } @@ -675,13 +675,13 @@ public int docFreq() throws IOException { @Override public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { - SortedNumericEntry entry = sortedNumerics.get(field.name); + SortedNumericEntry entry = sortedNumerics.get(field.number); return getSortedNumeric(entry, -1); } @Override public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { - SortedSetEntry entry = sortedSets.get(field.name); + SortedSetEntry entry = sortedSets.get(field.number); if (entry.singleValueEntry != null) { return DocValues.singleton(getSorted(entry.singleValueEntry)); } @@ -743,7 +743,7 @@ public long cost() { @Override public DocValuesSkipper getSkipper(FieldInfo field) throws IOException { - final DocValuesSkipperEntry entry = skippers.get(field.name); + final DocValuesSkipperEntry entry = skippers.get(field.number); final IndexInput input = data.slice("doc value skipper", entry.offset, entry.length); // Prefetch the first page of data. Following pages are expected to get prefetched through @@ -869,18 +869,18 @@ private void readFields(IndexInput meta, FieldInfos infos) throws IOException { } byte type = meta.readByte(); if (info.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE) { - skippers.put(info.name, readDocValueSkipperMeta(meta)); + skippers.put(info.number, readDocValueSkipperMeta(meta)); } if (type == ES87TSDBDocValuesFormat.NUMERIC) { - numerics.put(info.name, readNumeric(meta)); + numerics.put(info.number, readNumeric(meta)); } else if (type == ES87TSDBDocValuesFormat.BINARY) { - binaries.put(info.name, readBinary(meta)); + binaries.put(info.number, readBinary(meta)); } else if (type == ES87TSDBDocValuesFormat.SORTED) { - sorted.put(info.name, readSorted(meta)); + sorted.put(info.number, readSorted(meta)); } else if (type == ES87TSDBDocValuesFormat.SORTED_SET) { - sortedSets.put(info.name, readSortedSet(meta)); + sortedSets.put(info.number, readSortedSet(meta)); } else if (type == ES87TSDBDocValuesFormat.SORTED_NUMERIC) { - sortedNumerics.put(info.name, readSortedNumeric(meta)); + sortedNumerics.put(info.number, readSortedNumeric(meta)); } else { throw new CorruptIndexException("invalid type: " + type, meta); } From 7ef10f20913d19f4b14ae5fe73b9a2f68468d6dc Mon Sep 17 00:00:00 2001 From: Jordan Powers Date: Thu, 27 Mar 2025 11:30:10 -0700 Subject: [PATCH 2/2] Use correct IntObjectHashMap implementation --- .../index/codec/tsdb/ES87TSDBDocValuesProducer.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java index 44b7a78143118..7edef480dd7b8 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java @@ -9,8 +9,6 @@ package org.elasticsearch.index.codec.tsdb; -import com.carrotsearch.hppc.IntObjectHashMap; - import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.lucene90.IndexedDISI; @@ -31,6 +29,7 @@ import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.internal.hppc.IntObjectHashMap; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ChecksumIndexInput;