From b0f739c780152614c4bddb305a83809599f4a535 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Mon, 2 Jun 2025 17:05:55 +0200 Subject: [PATCH] Optimize sparse vector stats collection (#128740) This change improves the performance of sparse vector statistics gathering by using the document count of terms directly, rather than relying on the field name field to compute stats. By avoiding per-term disk/network reads and instead leveraging statistics already loaded into leaf readers at index opening, we expect to significantly reduce overhead. Relates to #128583 --- docs/changelog/128740.yaml | 5 +++++ .../elasticsearch/index/engine/Engine.java | 19 +++++++++---------- 2 files changed, 14 insertions(+), 10 deletions(-) create mode 100644 docs/changelog/128740.yaml diff --git a/docs/changelog/128740.yaml b/docs/changelog/128740.yaml new file mode 100644 index 0000000000000..89ee856ce5a6a --- /dev/null +++ b/docs/changelog/128740.yaml @@ -0,0 +1,5 @@ +pr: 128740 +summary: Optimize sparse vector stats collection +area: Stats +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/index/engine/Engine.java b/server/src/main/java/org/elasticsearch/index/engine/Engine.java index 36fd18144ad6e..25660f54e849d 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/Engine.java +++ b/server/src/main/java/org/elasticsearch/index/engine/Engine.java @@ -25,7 +25,6 @@ import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.index.SegmentReader; import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.QueryCache; import org.apache.lucene.search.QueryCachingPolicy; @@ -61,7 +60,6 @@ import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.VersionType; import org.elasticsearch.index.mapper.DocumentParser; -import org.elasticsearch.index.mapper.FieldNamesFieldMapper; import org.elasticsearch.index.mapper.LuceneDocument; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.Mapping; @@ -337,14 +335,15 @@ protected final SparseVectorStats sparseVectorStats(IndexReader indexReader, Lis private long getSparseVectorValueCount(final LeafReader atomicReader, List fields) throws IOException { long count = 0; - Terms terms = atomicReader.terms(FieldNamesFieldMapper.NAME); - if (terms == null) { - return count; - } - TermsEnum termsEnum = terms.iterator(); - for (var fieldName : fields) { - if (termsEnum.seekExact(fieldName)) { - count += termsEnum.docFreq(); + for (var fieldNameBR : fields) { + var fieldName = fieldNameBR.utf8ToString(); + var fi = atomicReader.getFieldInfos().fieldInfo(fieldName); + if (fi == null) { + continue; + } + Terms terms = atomicReader.terms(fieldName); + if (terms != null) { + count += terms.getDocCount(); } } return count;