Skip to content

Commit 751d79d

Browse files
authored
Disk usage don't include synthetic _id postings (elastic#138745)
* Disk usage don't include synthetic _id postings * Update docs/changelog/138745.yaml * Assert expected _id field in short circuit
1 parent 7b57bb1 commit 751d79d

File tree

2 files changed

+12
-0
lines changed

2 files changed

+12
-0
lines changed

docs/changelog/138745.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 138745
2+
summary: Disk usage don't include synthetic `_id` postings
3+
area: Distributed
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzer.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
import org.elasticsearch.core.CheckedConsumer;
5757
import org.elasticsearch.core.IOUtils;
5858
import org.elasticsearch.index.codec.postings.ES812PostingsFormat;
59+
import org.elasticsearch.index.mapper.SyntheticIdField;
5960
import org.elasticsearch.index.shard.ShardId;
6061
import org.elasticsearch.index.store.LuceneFilesExtensions;
6162

@@ -368,6 +369,12 @@ void analyzeInvertedIndex(SegmentReader reader, IndexDiskUsageStats stats) throw
368369
if (terms == null) {
369370
continue;
370371
}
372+
if (SyntheticIdField.hasSyntheticIdAttributes(field.attributes())) {
373+
// Synthetic _id field doesn't have an inverted index stored on disk,
374+
// but it pretends to have one on the read path by setting IndexOptions.DOCS
375+
assert SyntheticIdField.NAME.equals(field.getName()) : "Expected only synthetic id fields to have synthetic id attribute";
376+
continue;
377+
}
371378
// It's expensive to look up every term and visit every document of the postings lists of all terms.
372379
// As we track the min/max positions of read bytes, we just visit the two ends of a partition containing
373380
// the data. We might miss some small parts of the data, but it's an good trade-off to speed up the process.

0 commit comments

Comments
 (0)