Skip to content

Commit da26c17

Browse files
committed
perf(chunkserver): Protect IO performance during massive delete
This PR further reduces trash-GC impact on foreground I/O by dynamically throttling expired-file deletion based on observed disk throughput, and improves delete wall-clock time by running removals per-disk in parallel. Key changes: - I/O-aware GC throttling (with floor guard) - Adds GC-specific I/O counters in HddStats and increments them on every read/write accounting path. - In ChunkTrashManagerImpl::collectGarbage(): - Samples and resets GC I/O counters each cycle (exchange(0)). - Normalizes by disk count (gDisks.size() under gDisksMutex) to estimate per-disk pressure. - Maintains max observed per-disk bytes (read/write) to create a stable baseline. - Computes totalIOPercentage and feeds it through an inverted sigmoid (steepness=12, center=0.3) to scale down trashGarbageCollectorBulkSize under load. - Adds a minimum threshold: only perform expired-file deletion when the scaled bulk size is >= 5, preventing “death by a thousand tiny deletes” during heavy I/O. - Parallel trash removal across disks -removeTrashFiles() now spawns a std::jthread per disk and delegates actual deletion/index updates to removeTrashFilesFromDisk() allowing independent disks to progress concurrently. - Behavioral defaults - CHUNK_TRASH_ENABLED default flips 0 → 1 (trash manager enabled unless explicitly disabled). - Default retention (kDefaultTrashTimeLimitSeconds) flips 259200 → 0 (immediate expiry unless configured). - Changed documentation accordingly. - Resource thread pacing - hddFreeResourcesThread() loop period increases 2s → 3s, replacing sleep() with Timeout + usleep(timeout.remaining_us()) for more precise loop cadence. - Test dependency: disabled GC for rebalancing test, mainly because chunks in trash affect disk usage statistics. Rationale: Large delete storms can saturate disks and degrade chunkserver latency. This patch makes the garbage collector self-throttling using real I/O signals and avoids thrashing by skipping deletion work when the system is already busy, while parallelizing deletes per disk to reclaim space faster when bandwidth is available. Signed-off-by: GigaCronos <jorge.cabrera@leil.io>
1 parent 1da5c18 commit da26c17

14 files changed

+81
-19
lines changed

doc/sfschunkserver.cfg.5.adoc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ defined at build time), /usr/local/lib/saunafs/plugins/chunkserver, and
186186

187187
*CHUNK_TRASH_ENABLED (EXPERIMENTAL)*:: enables or disables the chunk trash
188188
feature. When enabled, deleted chunks are moved to a trash directory instead of
189-
being immediately removed. (Default: 0)
189+
being immediately removed. (Default: 1)
190190

191191
*CHUNK_TRASH_EXPIRATION_SECONDS (EXPERIMENTAL)*:: specifies the timeout in
192192
seconds for chunks to remain in the trash before being permanently deleted.

src/admin/dump_config_command.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ const static std::unordered_map<std::string, std::string> defaultOptionsCS = {
176176
{"REPLICATION_CONNECTION_TIMEOUT_MS", "1000"},
177177
{"REPLICATION_WAVE_TIMEOUT_MS", "500"},
178178
{"PLUGINS_DIR", ""},
179-
{"CHUNK_TRASH_ENABLED", "0"},
179+
{"CHUNK_TRASH_ENABLED", "1"},
180180
{"CHUNK_TRASH_EXPIRATION_SECONDS", "259200"},
181181
{"CHUNK_TRASH_FREE_SPACE_THRESHOLD_GB", "0"},
182182
{"CHUNK_TRASH_GC_BATCH_SIZE", "1000"},

src/chunkserver/chunkserver-common/chunk_trash_index.cc

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,8 @@ ChunkTrashIndex::TrashIndexDiskEntries ChunkTrashIndex::getExpiredFilesInternal(
8080
const time_t &timeLimit, size_t bulkSize) {
8181
TrashIndexDiskEntries expiredFiles;
8282

83-
size_t count = 0;
8483
for (const auto &diskEntry : trashIndex) {
85-
if (bulkSize != 0 && count >= bulkSize) { break; }
86-
count += getExpiredFilesInternal(diskEntry.first, timeLimit, expiredFiles, bulkSize);
84+
getExpiredFilesInternal(diskEntry.first, timeLimit, expiredFiles, bulkSize);
8785
}
8886

8987
return expiredFiles;

src/chunkserver/chunkserver-common/chunk_trash_manager.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ void ChunkTrashManager::reloadConfig() {
106106
return;
107107
}
108108

109-
isEnabled = cfg_get("CHUNK_TRASH_ENABLED", static_cast<u_short>(0));
109+
isEnabled = cfg_get("CHUNK_TRASH_ENABLED", static_cast<u_short>(1));
110110
safs::log_info("Chunk trash manager is {}", isEnabled ? "enabled" : "disabled");
111111
impl->reloadConfig();
112112
}

src/chunkserver/chunkserver-common/chunk_trash_manager_impl.cc

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,14 @@
2020

2121
#include <sys/statvfs.h>
2222
#include <algorithm>
23+
#include <cstdint>
2324
#include <ctime>
2425

2526
#include "chunkserver-common/chunk_trash_manager_impl.h"
2627
#include "config/cfg.h"
2728
#include "errors/saunafs_error_codes.h"
29+
#include "global_shared_resources.h"
30+
#include "hdd_stats.h"
2831
#include "slogger/slogger.h"
2932

3033
namespace fs = std::filesystem;
@@ -35,6 +38,9 @@ size_t ChunkTrashManagerImpl::trashGarbageCollectorBulkSize = kDefaultTrashGarba
3538
size_t ChunkTrashManagerImpl::garbageCollectorSpaceRecoveryStep =
3639
kDefaultGarbageCollectorSpaceRecoveryStep;
3740

41+
uint64_t ChunkTrashManagerImpl::maxBytesReadPerDisk = 1;
42+
uint64_t ChunkTrashManagerImpl::maxBytesWritePerDisk = 1;
43+
3844
const std::string ChunkTrashManagerImpl::kTrashGuardString =
3945
std::string("/") + ChunkTrashManager::kTrashDirname + "/";
4046

@@ -158,11 +164,18 @@ int ChunkTrashManagerImpl::moveToTrash(const fs::path &filePath, const fs::path
158164

159165
void ChunkTrashManagerImpl::removeTrashFiles(
160166
const ChunkTrashIndex::TrashIndexDiskEntries &filesToRemove) const {
167+
std::vector<std::jthread> paralellRemovers;
168+
161169
for (const auto &[diskPath, fileEntries] : filesToRemove) {
162-
for (const auto &fileEntry : fileEntries) {
163-
if (removeFileFromTrash(fileEntry.second) != SAUNAFS_STATUS_OK) { continue; }
164-
getTrashIndex().remove(fileEntry.first, fileEntry.second, diskPath);
165-
}
170+
paralellRemovers.emplace_back(removeTrashFilesFromDisk, fileEntries, diskPath);
171+
}
172+
}
173+
174+
void ChunkTrashManagerImpl::removeTrashFilesFromDisk(
175+
const ChunkTrashIndex::TrashIndexFileEntries &filesToRemove, const std::string &diskPath) {
176+
for (const auto &fileEntry : filesToRemove) {
177+
if (removeFileFromTrash(fileEntry.second) != SAUNAFS_STATUS_OK) { continue; }
178+
getTrashIndex().remove(fileEntry.first, fileEntry.second, diskPath);
166179
}
167180
}
168181

@@ -254,7 +267,36 @@ void ChunkTrashManagerImpl::collectGarbage() {
254267
if (!ChunkTrashManager::isEnabled) { return; }
255268
std::time_t const currentTime = std::time(nullptr);
256269
std::time_t const expirationTime = currentTime - trashTimeLimitSeconds;
257-
removeExpiredFiles(expirationTime, trashGarbageCollectorBulkSize);
270+
uint64_t currentBytesWrite = HddStats::gBytesWrittenSinceLastGCSweep.exchange(0);
271+
uint64_t currentBytesRead = HddStats::gBytesReadSinceLastGCSweep.exchange(0);
272+
uint64_t currentDiskCount = 1;
273+
{
274+
std::lock_guard disksLockGuard(gDisksMutex);
275+
currentDiskCount = std::max(currentDiskCount, gDisks.size());
276+
}
277+
currentBytesRead /= currentDiskCount;
278+
currentBytesWrite /= currentDiskCount;
279+
maxBytesReadPerDisk = std::max(maxBytesReadPerDisk, currentBytesRead);
280+
maxBytesWritePerDisk = std::max(maxBytesWritePerDisk, currentBytesWrite);
281+
282+
uint64_t totalIOPercentage = currentBytesRead * 100 / maxBytesReadPerDisk +
283+
currentBytesWrite * 100 / maxBytesWritePerDisk;
284+
totalIOPercentage = std::min(totalIOPercentage, uint64_t(100));
285+
286+
auto invertedSigmoid = [](double val) -> double {
287+
const double steepness = 12.0; // steepness
288+
const double center = 0.3; // center in [0,1]
289+
const double valnorm = val / 100.0; // normalize to [0,1]
290+
291+
const double res = 1.0 / (1.0 + std::exp(-steepness * (valnorm - center)));
292+
return 1.0 - res;
293+
};
294+
295+
uint64_t bulksizeScaled = trashGarbageCollectorBulkSize * invertedSigmoid(totalIOPercentage);
296+
297+
static constexpr uint64_t kMinGCBulkSizeForActivation = 5;
298+
299+
if (bulksizeScaled >= kMinGCBulkSizeForActivation) { removeExpiredFiles(expirationTime, bulksizeScaled); }
258300
makeSpace(availableThresholdGB, garbageCollectorSpaceRecoveryStep);
259301
}
260302

src/chunkserver/chunkserver-common/chunk_trash_manager_impl.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,14 @@ class ChunkTrashManagerImpl : public IChunkTrashManagerImpl {
8080
*/
8181
void removeTrashFiles(const ChunkTrashIndex::TrashIndexDiskEntries &filesToRemove) const;
8282

83+
/**
84+
* @brief Removes a set of specified files from the trash for an specific FileEntries.
85+
* @param filesToRemove The list of files to be permanently deleted.
86+
* @param diskPath The path of the disk of the files.
87+
*/
88+
static void removeTrashFilesFromDisk(
89+
const ChunkTrashIndex::TrashIndexFileEntries &filesToRemove, const std::string &diskPath);
90+
8391
/**
8492
* @brief Checks if a given timestamp string matches the expected format.
8593
* @param timestamp The timestamp string to validate.
@@ -137,6 +145,11 @@ class ChunkTrashManagerImpl : public IChunkTrashManagerImpl {
137145
void reloadConfig() override;
138146

139147
private:
148+
149+
/// Maximum record of BytesReadPerDisk and BytesWritePerDisk in a GC cycle
150+
static uint64_t maxBytesReadPerDisk;
151+
static uint64_t maxBytesWritePerDisk;
152+
140153
/// Minimum available space threshold (in GB) before triggering garbage
141154
/// collection.
142155
static size_t availableThresholdGB;
@@ -145,7 +158,7 @@ class ChunkTrashManagerImpl : public IChunkTrashManagerImpl {
145158
/// Time limit (in seconds) for files to be considered eligible for
146159
/// deletion.
147160
static size_t trashTimeLimitSeconds;
148-
static constexpr size_t kDefaultTrashTimeLimitSeconds = 259200;
161+
static constexpr size_t kDefaultTrashTimeLimitSeconds = 0;
149162

150163
/// Number of files processed in each bulk operation during garbage
151164
/// collection.

src/chunkserver/chunkserver-common/hdd_stats.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ static inline void totalRead(IDisk *disk, uint64_t size, MicroSeconds duration)
4141

4242
gStatsTotalOperationsRead++;
4343
gStatsTotalBytesRead += size;
44+
gBytesReadSinceLastGCSweep += size;
4445
gStatsTotalTimeRead += duration;
4546

4647
auto &diskStats = disk->getCurrentStats();
@@ -60,6 +61,7 @@ static inline void totalWrite(IDisk *disk, uint64_t size,
6061

6162
gStatsTotalOperationsWrite++;
6263
gStatsTotalBytesWrite += size;
64+
gBytesWrittenSinceLastGCSweep += size;
6365
gStatsTotalTimeWrite += duration;
6466

6567
auto &diskStats = disk->getCurrentStats();

src/chunkserver/chunkserver-common/hdd_stats.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,12 @@ inline std::atomic<uint32_t> gStatsOperationsDuplicate(0);
5656
inline std::atomic<uint32_t> gStatsOperationsTruncate(0);
5757
inline std::atomic<uint32_t> gStatsOperationsDupTrunc(0);
5858

59+
60+
// This is for internal use of GarbageCollector
61+
inline std::atomic<uint64_t> gBytesWrittenSinceLastGCSweep(0);
62+
inline std::atomic<uint64_t> gBytesReadSinceLastGCSweep(0);
63+
64+
5965
struct statsReport {
6066
statsReport(uint64_t *overBytesRead, uint64_t *overBytesWrite,
6167
uint32_t *overOpsRead, uint32_t *overOpsWrite,

src/chunkserver/hddspacemgr.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2524,22 +2524,22 @@ void hddDisksThread() {
25242524
}
25252525

25262526
void hddFreeResourcesThread() {
2527-
static const int kDelayedStep = 2;
2527+
static const int kDelayedStep = 3;
25282528
static const uint32_t kOldIoBuffersExpirationTimeMs = kDelayedStep * 1000;
25292529
static const int kMaxFreeUnused = 1024;
25302530
TRACETHIS();
25312531

25322532
pthread_setname_np(pthread_self(), "freeResThread");
25332533

25342534
while (!gTerminate) {
2535+
Timeout timeout(std::chrono::microseconds(kDelayedStep * 1'000'000));
25352536
gOpenChunks.freeUnused(eventloop_time(), gChunksMapMutex,
25362537
kMaxFreeUnused);
25372538
ChunkTrashManager::collectGarbage();
25382539
hddReleaseDisksToBeDeleted();
25392540
/// Release buffers older than kDelayedStep seconds
25402541
releaseOldIoBuffers(kOldIoBuffersExpirationTimeMs);
2541-
2542-
sleep(kDelayedStep);
2542+
usleep(timeout.remaining_us());
25432543
}
25442544
}
25452545

tests/test_suites/RebalancingTests/test_chunk_rebalancing.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ rebalancing_timeout=90
44
CHUNKSERVERS=5 \
55
USE_LOOP_DISKS=YES \
66
MOUNT_EXTRA_CONFIG="sfscachemode=NEVER" \
7-
CHUNKSERVER_EXTRA_CONFIG="HDD_TEST_FREQ = 10000|HDD_LEAVE_SPACE_DEFAULT = 0MiB" \
7+
CHUNKSERVER_EXTRA_CONFIG="HDD_TEST_FREQ = 10000|HDD_LEAVE_SPACE_DEFAULT = 0MiB|CHUNK_TRASH_ENABLED=0" \
88
MASTER_EXTRA_CONFIG="ACCEPTABLE_DIFFERENCE = 0.0015`
99
`|CHUNKS_LOOP_MAX_CPS = 2`
1010
`|CHUNKS_LOOP_MAX_CPU = 90`

0 commit comments

Comments
 (0)