Skip to content

Commit 89f01ed

Browse files
authored
Improve estimating live docs bytes (#132523)
By simulating what `FixedBitSet` exactly does. This also improves testing because we can compare against `FixedBitSet#ramBytesUsed()`. Follow up from #132232
1 parent 0d57996 commit 89f01ed

File tree

3 files changed

+41
-13
lines changed

3 files changed

+41
-13
lines changed

server/src/main/java/org/elasticsearch/index/engine/Engine.java

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.apache.lucene.store.AlreadyClosedException;
3535
import org.apache.lucene.util.Bits;
3636
import org.apache.lucene.util.BytesRef;
37+
import org.apache.lucene.util.FixedBitSet;
3738
import org.apache.lucene.util.RamUsageEstimator;
3839
import org.apache.lucene.util.SetOnce;
3940
import org.elasticsearch.ExceptionsHelper;
@@ -283,7 +284,7 @@ protected static ShardFieldStats shardFieldStats(List<LeafReaderContext> leaves)
283284
int totalFields = 0;
284285
long usages = 0;
285286
long totalPostingBytes = 0;
286-
long liveDocsBytes = 0;
287+
long totalLiveDocsBytes = 0;
287288
for (LeafReaderContext leaf : leaves) {
288289
numSegments++;
289290
var fieldInfos = leaf.reader().getFieldInfos();
@@ -312,18 +313,23 @@ protected static ShardFieldStats shardFieldStats(List<LeafReaderContext> leaves)
312313
var liveDocs = segmentReader.getLiveDocs();
313314
if (liveDocs != null) {
314315
assert validateLiveDocsClass(liveDocs);
315-
// Would prefer to use FixedBitSet#ramBytesUsed() however FixedBits / Bits interface don't expose that.
316-
// This almost does what FixedBitSet#ramBytesUsed() does, liveDocs.length() returns the length of the bits long
317-
// array
318-
liveDocsBytes += RamUsageEstimator.alignObjectSize(
319-
(long) RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (liveDocs.length() / 8L)
320-
);
316+
long liveDocsBytes = getLiveDocsBytes(liveDocs);
317+
totalLiveDocsBytes += liveDocsBytes;
321318
}
322319
}
323320
}
324321
}
325322
}
326-
return new ShardFieldStats(numSegments, totalFields, usages, totalPostingBytes, liveDocsBytes);
323+
return new ShardFieldStats(numSegments, totalFields, usages, totalPostingBytes, totalLiveDocsBytes);
324+
}
325+
326+
// Would prefer to use FixedBitSet#ramBytesUsed() however FixedBits / Bits interface don't expose that.
327+
// This simulates FixedBitSet#ramBytesUsed() does:
328+
private static long getLiveDocsBytes(Bits liveDocs) {
329+
int words = FixedBitSet.bits2words(liveDocs.length());
330+
return ShardFieldStats.FIXED_BITSET_BASE_RAM_BYTES_USED + RamUsageEstimator.alignObjectSize(
331+
RamUsageEstimator.sizeOf(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) Long.BYTES * words)
332+
);
327333
}
328334

329335
private static boolean validateLiveDocsClass(Bits liveDocs) {

server/src/main/java/org/elasticsearch/index/shard/ShardFieldStats.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
package org.elasticsearch.index.shard;
1111

12+
import org.apache.lucene.util.FixedBitSet;
13+
import org.apache.lucene.util.RamUsageEstimator;
1214
import org.elasticsearch.common.util.FeatureFlag;
1315

1416
/**
@@ -25,5 +27,6 @@
2527
public record ShardFieldStats(int numSegments, int totalFields, long fieldUsages, long postingsInMemoryBytes, long liveDocsBytes) {
2628

2729
public static final FeatureFlag TRACK_LIVE_DOCS_IN_MEMORY_BYTES = new FeatureFlag("track_live_docs_in_memory_bytes");
30+
public static final long FIXED_BITSET_BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(FixedBitSet.class);
2831

2932
}

server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.apache.lucene.store.IOContext;
2828
import org.apache.lucene.util.BytesRef;
2929
import org.apache.lucene.util.Constants;
30+
import org.apache.lucene.util.FixedBitSet;
3031
import org.elasticsearch.ElasticsearchException;
3132
import org.elasticsearch.ExceptionsHelper;
3233
import org.elasticsearch.action.ActionListener;
@@ -77,6 +78,7 @@
7778
import org.elasticsearch.index.IndexModule;
7879
import org.elasticsearch.index.IndexSettings;
7980
import org.elasticsearch.index.IndexVersion;
81+
import org.elasticsearch.index.MergePolicyConfig;
8082
import org.elasticsearch.index.codec.CodecService;
8183
import org.elasticsearch.index.codec.TrackingPostingsInMemoryBytesCodec;
8284
import org.elasticsearch.index.engine.CommitStats;
@@ -1981,7 +1983,10 @@ public void testShardFieldStats() throws IOException {
19811983
}
19821984

19831985
public void testShardFieldStatsWithDeletes() throws IOException {
1984-
Settings settings = Settings.builder().put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), TimeValue.MINUS_ONE).build();
1986+
Settings settings = Settings.builder()
1987+
.put(MergePolicyConfig.INDEX_MERGE_ENABLED, false)
1988+
.put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), TimeValue.MINUS_ONE)
1989+
.build();
19851990
IndexShard shard = newShard(true, settings);
19861991
assertNull(shard.getShardFieldStats());
19871992
recoverShardFromStore(shard);
@@ -2010,8 +2015,14 @@ public void testShardFieldStatsWithDeletes() throws IOException {
20102015
stats = shard.getShardFieldStats();
20112016
// More segments because delete operation is stored in the new segment for replication purposes.
20122017
assertThat(stats.numSegments(), equalTo(2));
2013-
// Delete op is stored in new segment, but marked as deleted. All segements have live docs:
2014-
assertThat(stats.liveDocsBytes(), equalTo(liveDocsTrackingEnabled ? 40L : 0L));
2018+
long expectedLiveDocsSize = 0;
2019+
if (liveDocsTrackingEnabled) {
2020+
// Delete op is stored in new segment, but marked as deleted. All segements have live docs:
2021+
expectedLiveDocsSize += new FixedBitSet(numDocs).ramBytesUsed();
2022+
// Second segment the delete operation that is marked as deleted:
2023+
expectedLiveDocsSize += new FixedBitSet(1).ramBytesUsed();
2024+
}
2025+
assertThat(stats.liveDocsBytes(), equalTo(expectedLiveDocsSize));
20152026

20162027
// delete another doc:
20172028
deleteDoc(shard, "first_1");
@@ -2022,8 +2033,16 @@ public void testShardFieldStatsWithDeletes() throws IOException {
20222033
stats = shard.getShardFieldStats();
20232034
// More segments because delete operation is stored in the new segment for replication purposes.
20242035
assertThat(stats.numSegments(), equalTo(3));
2025-
// Delete op is stored in new segment, but marked as deleted. All segements have live docs:
2026-
assertThat(stats.liveDocsBytes(), equalTo(liveDocsTrackingEnabled ? 56L : 0L));
2036+
expectedLiveDocsSize = 0;
2037+
if (liveDocsTrackingEnabled) {
2038+
// Delete op is stored in new segment, but marked as deleted. All segements have live docs:
2039+
// First segment with deletes
2040+
expectedLiveDocsSize += new FixedBitSet(numDocs).ramBytesUsed();
2041+
// Second and third segments the delete operation that is marked as deleted:
2042+
expectedLiveDocsSize += new FixedBitSet(1).ramBytesUsed();
2043+
expectedLiveDocsSize += new FixedBitSet(1).ramBytesUsed();
2044+
}
2045+
assertThat(stats.liveDocsBytes(), equalTo(expectedLiveDocsSize));
20272046

20282047
closeShards(shard);
20292048
}

0 commit comments

Comments
 (0)