Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import static org.apache.bookkeeper.common.concurrent.FutureUtils.result;

import com.google.common.collect.Sets;
import com.google.common.util.concurrent.RateLimiter;
import java.io.IOException;
import java.net.URI;
import java.util.List;
Expand Down Expand Up @@ -84,6 +85,7 @@ public class ScanAndCompareGarbageCollector implements GarbageCollector {
private int activeLedgerCounter;
private StatsLogger statsLogger;
private final int maxConcurrentRequests;
private final RateLimiter gcMetadataOpRateLimiter;

public ScanAndCompareGarbageCollector(LedgerManager ledgerManager, CompactableLedgerStorage ledgerStorage,
ServerConfiguration conf, StatsLogger statsLogger) throws IOException {
Expand All @@ -103,6 +105,7 @@ public ScanAndCompareGarbageCollector(LedgerManager ledgerManager, CompactableLe
enableGcOverReplicatedLedger, gcOverReplicatedLedgerIntervalMillis, maxConcurrentRequests);

verifyMetadataOnGc = conf.getVerifyMetadataOnGC();
this.gcMetadataOpRateLimiter = RateLimiter.create(conf.getGcMetadataOpRateLimit());

this.activeLedgerCounter = 0;
}
Expand Down Expand Up @@ -153,6 +156,7 @@ public void gc(GarbageCleaner garbageCleaner) {
Versioned<LedgerMetadata> metadata = null;
while (!done) {
start = end + 1;
gcMetadataOpRateLimiter.acquire();
if (ledgerRangeIterator.hasNext()) {
LedgerRange lRange = ledgerRangeIterator.next();
ledgersInMetadata = lRange.getLedgers();
Expand All @@ -175,6 +179,7 @@ public void gc(GarbageCleaner garbageCleaner) {
metadata = null;
int rc = BKException.Code.OK;
try {
gcMetadataOpRateLimiter.acquire();
metadata = result(ledgerManager.readLedgerMetadata(bkLid), zkOpTimeoutMs,
TimeUnit.MILLISECONDS);
} catch (BKException | TimeoutException e) {
Expand Down Expand Up @@ -236,6 +241,7 @@ private Set<Long> removeOverReplicatedledgers(Set<Long> bkActiveledgers, final G
// check ledger ensembles before creating lock nodes.
// this is to reduce the number of lock node creations and deletions in ZK.
// the ensemble check is done again after the lock node is created.
gcMetadataOpRateLimiter.acquire();
Versioned<LedgerMetadata> preCheckMetadata = ledgerManager.readLedgerMetadata(ledgerId).get();
if (!isNotBookieIncludedInLedgerEnsembles(preCheckMetadata)) {
latch.countDown();
Expand All @@ -261,6 +267,7 @@ private Set<Long> removeOverReplicatedledgers(Set<Long> bkActiveledgers, final G
// current bookie again and, in that case, we cannot remove the ledger from local storage
lum.acquireUnderreplicatedLedger(ledgerId);
semaphore.acquire();
gcMetadataOpRateLimiter.acquire();
ledgerManager.readLedgerMetadata(ledgerId)
.whenComplete((metadata, exception) -> {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ public class ServerConfiguration extends AbstractConfiguration<ServerConfigurati
protected static final String GC_OVERREPLICATED_LEDGER_WAIT_TIME = "gcOverreplicatedLedgerWaitTime";
protected static final String GC_OVERREPLICATED_LEDGER_MAX_CONCURRENT_REQUESTS =
"gcOverreplicatedLedgerMaxConcurrentRequests";
protected static final String GC_METADATA_OP_RATE_LIMIT = "gcMetadataOpRateLimit";
protected static final String USE_TRANSACTIONAL_COMPACTION = "useTransactionalCompaction";
protected static final String VERIFY_METADATA_ON_GC = "verifyMetadataOnGC";
protected static final String GC_ENTRYLOGMETADATA_CACHE_ENABLED = "gcEntryLogMetadataCacheEnabled";
Expand Down Expand Up @@ -481,6 +482,24 @@ public ServerConfiguration setGcOverreplicatedLedgerMaxConcurrentRequests(
return this;
}

/**
* Get the rate limit of metadata operations in garbage collection.
* @return rate limit of metadata operations in garbage collection
*/
public int getGcMetadataOpRateLimit() {
return this.getInt(GC_METADATA_OP_RATE_LIMIT, 1000);
}

/**
* Set the rate limit of metadata operations in garbage collection.
* @param gcRateLimit
* @return server configuration
*/
public ServerConfiguration setGcMetadataOpRateLimit(int gcRateLimit) {
this.setProperty(GC_METADATA_OP_RATE_LIMIT, Integer.toString(gcRateLimit));
return this;
}

/**
* Get whether to use transactional compaction and using a separate log for compaction or not.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -342,6 +343,53 @@ public void clean(long ledgerId) {
assertEquals("Should have cleaned first ledger" + first, (long) first, (long) cleaned.get(0));
}


/**
* Verifies that the garbage collector respects the configured rate limit for metadata operations.
* @throws Exception
*/
@Test
public void testGcMetadataOpRateLimit() throws Exception {
int numLedgers = 2000;
int numRemovedLedgers = 800;
final Set<Long> createdLedgers = new HashSet<Long>();
createLedgers(numLedgers, createdLedgers);

ServerConfiguration conf = new ServerConfiguration(baseConf);
int customRateLimit = 200;
conf.setGcMetadataOpRateLimit(customRateLimit);
// set true to verify metadata on gc
conf.setVerifyMetadataOnGc(true);

final GarbageCollector garbageCollector = new ScanAndCompareGarbageCollector(
getLedgerManager(), new MockLedgerStorage(), conf, NullStatsLogger.INSTANCE);

// delete created ledgers to simulate the garbage collection scenario
Iterator<Long> createdLedgersIterator = createdLedgers.iterator();
for (int i = 0; i < numRemovedLedgers && createdLedgersIterator.hasNext(); i++) {
long ledgerId = createdLedgersIterator.next();
try {
removeLedger(ledgerId);
} catch (Exception e) {
LOG.error("Failed to remove ledger {}", ledgerId, e);
}
}

long startTime = System.currentTimeMillis();
garbageCollector.gc(new GarbageCollector.GarbageCleaner() {
@Override
public void clean(long ledgerId) {
}
});
long endTime = System.currentTimeMillis();
long duration = endTime - startTime;
long minExpectedTime = (numRemovedLedgers * 1000L) / customRateLimit;

LOG.info("GC operation with rate limit {} took {} ms, theoretical minimum time: {} ms",
customRateLimit, duration, minExpectedTime);
assertTrue("GC operation should be rate limited", duration >= minExpectedTime * 0.7);
}

/*
* in this scenario no ledger is created, so ledgeriterator's hasNext call would return false and next would be
* null. GarbageCollector.gc is expected to behave normally
Expand Down
3 changes: 3 additions & 0 deletions conf/bk_server.conf
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,9 @@ ledgerDirectories=/tmp/bk-data
# interval if there is enough disk capacity.
# gcWaitTime=1000

# The rate limit of metadata operations in garbage collection.
#gcMetadataOpRateLimit=1000

# How long the interval to trigger next garbage collection of overreplicated
# ledgers, in milliseconds [Default: 1 day]. This should not be run very frequently
# since we read the metadata for all the ledgers on the bookie from zk
Expand Down
1 change: 1 addition & 0 deletions site3/website/docs/reference/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ The table below lists parameters that you can set to configure bookies. All conf
| Parameter | Description | Default
| --------- | ----------- | ------- |
| gcWaitTime | How long the interval to trigger next garbage collection, in milliseconds. Since garbage collection is running in background, too frequent gc will heart performance. It is better to give a higher number of gc interval if there is enough disk capacity. | 1000 |
| gcMetadataOpRateLimit | Rate limit for metadata operations in garbage collection, in operations per second. This is used to limit the rate of metadata operations during garbage collection to avoid overwhelming the metadata service. | 1000 |
| gcOverreplicatedLedgerWaitTime | How long the interval to trigger next garbage collection of overreplicated ledgers, in milliseconds. This should not be run very frequently since we read the metadata for all the ledgers on the bookie from zk. | 86400000 |
| gcOverreplicatedLedgerMaxConcurrentRequests | Max number of concurrent requests in garbage collection of overreplicated ledgers. | 1000 |
| isForceGCAllowWhenNoSpace | Whether force compaction is allowed when the disk is full or almost full. Forcing GC may get some space back, but may also fill up disk space more quickly. This is because new log files are created before GC, while old garbage log files are deleted after GC. | false |
Expand Down
Loading