Skip to content

Commit c4b5037

Browse files
committed
Disk usage don't include synthetic _id postings
1 parent 30f327e commit c4b5037

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

server/src/main/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzer.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
import org.elasticsearch.core.CheckedConsumer;
5757
import org.elasticsearch.core.IOUtils;
5858
import org.elasticsearch.index.codec.postings.ES812PostingsFormat;
59+
import org.elasticsearch.index.mapper.SyntheticIdField;
5960
import org.elasticsearch.index.shard.ShardId;
6061
import org.elasticsearch.index.store.LuceneFilesExtensions;
6162

@@ -368,6 +369,11 @@ void analyzeInvertedIndex(SegmentReader reader, IndexDiskUsageStats stats) throw
368369
if (terms == null) {
369370
continue;
370371
}
372+
if (SyntheticIdField.hasSyntheticIdAttributes(field.attributes())) {
373+
// Synthetic _id field doesn't have an inverted index stored on disk,
374+
// but it pretends to have one on the read path by setting IndexOptions.DOCS
375+
continue;
376+
}
371377
// It's expensive to look up every term and visit every document of the postings lists of all terms.
372378
// As we track the min/max positions of read bytes, we just visit the two ends of a partition containing
373379
// the data. We might miss some small parts of the data, but it's an good trade-off to speed up the process.

0 commit comments

Comments
 (0)