From fc455239e7c5cdaed6d0c53599bda9f8d59c3b4e Mon Sep 17 00:00:00 2001 From: GigaCronos Date: Mon, 2 Feb 2026 06:09:57 -0500 Subject: [PATCH 1/3] perf(chunkserver): Protect IO performance during massive delete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR further reduces trash-GC impact on foreground I/O by dynamically throttling expired-file deletion based on observed disk throughput, and improves delete wall-clock time by running removals per-disk in parallel. Key changes: - I/O-aware GC throttling (with floor guard) - Adds GC-specific I/O counters in HddStats and increments them on every read/write accounting path. - In ChunkTrashManagerImpl::collectGarbage(): - Samples and resets GC I/O counters each cycle (exchange(0)). - Normalizes by disk count (gDisks.size() under gDisksMutex) to estimate per-disk pressure. - Maintains max observed per-disk bytes (read/write) to create a stable baseline. - Computes totalIOPercentage and feeds it through an inverted sigmoid (steepness=12, center=0.3) to scale down trashGarbageCollectorBulkSize under load. - Adds a minimum threshold: only perform expired-file deletion when the scaled bulk size is >= 5, preventing “death by a thousand tiny deletes” during heavy I/O. - Parallel trash removal across disks -removeTrashFiles() now spawns a std::jthread per disk and delegates actual deletion/index updates to removeTrashFilesFromDisk() allowing independent disks to progress concurrently. - Behavioral defaults - CHUNK_TRASH_ENABLED default flips 0 → 1 (trash manager enabled unless explicitly disabled). - Default retention (kDefaultTrashTimeLimitSeconds) flips 259200 → 0 (immediate expiry unless configured). - Changed documentation accordingly. - Resource thread pacing - in hddFreeResourcesThread() loop replacing sleep() with Timeout + usleep(timeout.remaining_us()) for more precise loop cadence. - Changed the functionality of implMutex for protecting only the getter and setter of the current GC implementation. - Test dependency: - Disabled GC for rebalancing test, mainly because chunks in trash affect disk usage statistics. -Added more sleep time to sfschunkserver_check_no_buffer_in_use() given that GC cycles now take more time. Rationale: Large delete storms can saturate disks and degrade chunkserver latency. This patch makes the garbage collector self-throttling using real I/O signals and avoids thrashing by skipping deletion work when the system is already busy, while parallelizing deletes per disk to reclaim space faster when bandwidth is available. Signed-off-by: GigaCronos --- doc/sfschunkserver.cfg.5.adoc | 10 +-- src/admin/dump_config_command.cc | 4 +- .../chunkserver-common/chunk_trash_index.cc | 4 +- .../chunkserver-common/chunk_trash_manager.cc | 36 ++++++---- .../chunk_trash_manager_impl.cc | 68 +++++++++++++++++-- .../chunk_trash_manager_impl.h | 20 +++++- .../chunkserver-common/hdd_stats.cc | 2 + .../chunkserver-common/hdd_stats.h | 6 ++ src/chunkserver/hddspacemgr.cc | 4 +- src/data/sfschunkserver.cfg.in | 9 +-- .../test_chunk_rebalancing.sh | 2 +- ...oals_generate_no_accidental_rebalancing.sh | 1 + .../test_custom_goals_rebalancing_case_1.sh | 2 +- .../test_custom_goals_rebalancing_case_2.sh | 2 +- .../test_custom_goals_rebalancing_case_3.sh | 2 +- tests/tools/saunafs.sh | 4 +- 16 files changed, 134 insertions(+), 42 deletions(-) diff --git a/doc/sfschunkserver.cfg.5.adoc b/doc/sfschunkserver.cfg.5.adoc index ce7383f45..4e60e07b5 100644 --- a/doc/sfschunkserver.cfg.5.adoc +++ b/doc/sfschunkserver.cfg.5.adoc @@ -186,7 +186,7 @@ defined at build time), /usr/local/lib/saunafs/plugins/chunkserver, and *CHUNK_TRASH_ENABLED (EXPERIMENTAL)*:: enables or disables the chunk trash feature. When enabled, deleted chunks are moved to a trash directory instead of -being immediately removed. (Default: 0) +being immediately removed. (Default: 1) *CHUNK_TRASH_EXPIRATION_SECONDS (EXPERIMENTAL)*:: specifies the timeout in seconds for chunks to remain in the trash before being permanently deleted. @@ -198,9 +198,11 @@ threshold, the system will start deleting older chunks from the trash to free up space. A suggested value is about the 15% of the capacity of the smaller configured disk. (Default: 0) -*CHUNK_TRASH_GC_BATCH_SIZE (EXPERIMENTAL)*:: defines the bulk size for the -garbage collector when processing chunks in the trash. This determines how many -files are processed in each garbage collection cycle. (Default: 1000) +*CHUNK_TRASH_GC_BATCH_SIZE (EXPERIMENTAL)*:: defines the maximum bulk size +per disk for the garbage collector when processing chunks in the trash. +This value is used when no I/O pressure is detected and is dynamically +scaled down based on current disk load. +(Default: 500) *CHUNK_TRASH_GC_SPACE_RECOVERY_BATCH_SIZE (EXPERIMENTAL)*:: [ADVANCED] The number of files to remove from the trash in a single GC cycle, in case the disk diff --git a/src/admin/dump_config_command.cc b/src/admin/dump_config_command.cc index a1c94c3b0..36dc2b6b7 100644 --- a/src/admin/dump_config_command.cc +++ b/src/admin/dump_config_command.cc @@ -183,10 +183,10 @@ const static std::unordered_map defaultOptionsCS = { {"REPLICATION_CONNECTION_TIMEOUT_MS", "1000"}, {"REPLICATION_WAVE_TIMEOUT_MS", "500"}, {"PLUGINS_DIR", ""}, - {"CHUNK_TRASH_ENABLED", "0"}, + {"CHUNK_TRASH_ENABLED", "1"}, {"CHUNK_TRASH_EXPIRATION_SECONDS", "259200"}, {"CHUNK_TRASH_FREE_SPACE_THRESHOLD_GB", "0"}, - {"CHUNK_TRASH_GC_BATCH_SIZE", "1000"}, + {"CHUNK_TRASH_GC_BATCH_SIZE", "500"}, {"CHUNK_TRASH_GC_SPACE_RECOVERY_BATCH_SIZE", "100"}, }; diff --git a/src/chunkserver/chunkserver-common/chunk_trash_index.cc b/src/chunkserver/chunkserver-common/chunk_trash_index.cc index faf7ff669..f35d9dcf5 100644 --- a/src/chunkserver/chunkserver-common/chunk_trash_index.cc +++ b/src/chunkserver/chunkserver-common/chunk_trash_index.cc @@ -80,10 +80,8 @@ ChunkTrashIndex::TrashIndexDiskEntries ChunkTrashIndex::getExpiredFilesInternal( const time_t &timeLimit, size_t bulkSize) { TrashIndexDiskEntries expiredFiles; - size_t count = 0; for (const auto &diskEntry : trashIndex) { - if (bulkSize != 0 && count >= bulkSize) { break; } - count += getExpiredFilesInternal(diskEntry.first, timeLimit, expiredFiles, bulkSize); + getExpiredFilesInternal(diskEntry.first, timeLimit, expiredFiles, bulkSize); } return expiredFiles; diff --git a/src/chunkserver/chunkserver-common/chunk_trash_manager.cc b/src/chunkserver/chunkserver-common/chunk_trash_manager.cc index 57a028e7e..cdcd17080 100644 --- a/src/chunkserver/chunkserver-common/chunk_trash_manager.cc +++ b/src/chunkserver/chunkserver-common/chunk_trash_manager.cc @@ -54,9 +54,11 @@ int ChunkTrashManager::moveToTrash(const std::filesystem::path &filePath, if (!isEnabled) { return 0; } // Protect against concurrent access - std::lock_guard lock(implMutex); - - auto &impl = getImpl(); + ImplementationPtr impl; + { + std::lock_guard lock(implMutex); + impl = getImpl(); + } if (!impl) { safs::log_error_code(SAUNAFS_ERROR_EINVAL, "ChunkTrashManager implementation not initialized"); @@ -69,9 +71,11 @@ int ChunkTrashManager::init(const std::string &diskPath) { reloadConfig(); // Protect against concurrent access - std::lock_guard lock(implMutex); - - auto &impl = getImpl(); + ImplementationPtr impl; + { + std::lock_guard lock(implMutex); + impl = getImpl(); + } if (!impl) { safs::log_error_code(SAUNAFS_ERROR_EINVAL, "ChunkTrashManager implementation not initialized"); @@ -84,9 +88,11 @@ void ChunkTrashManager::collectGarbage() { if (!isEnabled) { return; } // Protect against concurrent access - std::lock_guard lock(implMutex); - - auto &impl = getImpl(); + ImplementationPtr impl; + { + std::lock_guard lock(implMutex); + impl = getImpl(); + } if (!impl) { safs::log_error_code(SAUNAFS_ERROR_EINVAL, "ChunkTrashManager implementation not initialized"); @@ -96,17 +102,19 @@ void ChunkTrashManager::collectGarbage() { } void ChunkTrashManager::reloadConfig() { - // Protect against concurrent access - std::lock_guard lock(implMutex); - - auto &impl = getImpl(); + // Protect against concurrent access + ImplementationPtr impl; + { + std::lock_guard lock(implMutex); + impl = getImpl(); + } if (!impl) { safs::log_error_code(SAUNAFS_ERROR_EINVAL, "ChunkTrashManager implementation not initialized"); return; } - isEnabled = cfg_get("CHUNK_TRASH_ENABLED", static_cast(0)); + isEnabled = cfg_get("CHUNK_TRASH_ENABLED", static_cast(1)); safs::log_info("Chunk trash manager is {}", isEnabled ? "enabled" : "disabled"); impl->reloadConfig(); } diff --git a/src/chunkserver/chunkserver-common/chunk_trash_manager_impl.cc b/src/chunkserver/chunkserver-common/chunk_trash_manager_impl.cc index 01da88053..3e02c415c 100644 --- a/src/chunkserver/chunkserver-common/chunk_trash_manager_impl.cc +++ b/src/chunkserver/chunkserver-common/chunk_trash_manager_impl.cc @@ -20,11 +20,13 @@ #include #include +#include #include #include "chunkserver-common/chunk_trash_manager_impl.h" #include "config/cfg.h" #include "errors/saunafs_error_codes.h" +#include "global_shared_resources.h" #include "hdd_stats.h" #include "slogger/slogger.h" @@ -36,6 +38,12 @@ size_t ChunkTrashManagerImpl::trashGarbageCollectorBulkSize = kDefaultTrashGarba size_t ChunkTrashManagerImpl::garbageCollectorSpaceRecoveryStep = kDefaultGarbageCollectorSpaceRecoveryStep; +uint64_t ChunkTrashManagerImpl::maxBytesReadPerDisk = 1024 * 1024; //1MiB +uint64_t ChunkTrashManagerImpl::maxBytesWritePerDisk = 1024 * 1024; //1MiB + +uint64_t ChunkTrashManagerImpl::previousBytesReadPerDisk = 1024 * 1024; //1MiB +uint64_t ChunkTrashManagerImpl::previousBytesWritePerDisk = 1024 * 1024; //1MiB + const std::string ChunkTrashManagerImpl::kTrashGuardString = std::string("/") + ChunkTrashManager::kTrashDirname + "/"; @@ -159,12 +167,19 @@ int ChunkTrashManagerImpl::moveToTrash(const fs::path &filePath, const fs::path void ChunkTrashManagerImpl::removeTrashFiles( const ChunkTrashIndex::TrashIndexDiskEntries &filesToRemove) const { + std::vector parallelRemovers; + for (const auto &[diskPath, fileEntries] : filesToRemove) { - for (const auto &fileEntry : fileEntries) { - if (removeFileFromTrash(fileEntry.second) != SAUNAFS_STATUS_OK) { continue; } + parallelRemovers.emplace_back(removeTrashFilesFromDisk, fileEntries, diskPath); + } +} + +void ChunkTrashManagerImpl::removeTrashFilesFromDisk( + const ChunkTrashIndex::TrashIndexFileEntries &filesToRemove, const std::string &diskPath) { + for (const auto &fileEntry : filesToRemove) { + if (removeFileFromTrash(fileEntry.second) != SAUNAFS_STATUS_OK) { continue; } HddStats::gStatsOperationsGCPurge++; - getTrashIndex().remove(fileEntry.first, fileEntry.second, diskPath); - } + getTrashIndex().remove(fileEntry.first, fileEntry.second, diskPath); } } @@ -256,8 +271,51 @@ void ChunkTrashManagerImpl::collectGarbage() { if (!ChunkTrashManager::isEnabled) { return; } std::time_t const currentTime = std::time(nullptr); std::time_t const expirationTime = currentTime - trashTimeLimitSeconds; - removeExpiredFiles(expirationTime, trashGarbageCollectorBulkSize); + + uint64_t currentBytesWrite = HddStats::gBytesWrittenSinceLastGCSweep.exchange(0); + uint64_t currentBytesRead = HddStats::gBytesReadSinceLastGCSweep.exchange(0); + uint64_t currentDiskCount = 1; + { + std::lock_guard disksLockGuard(gDisksMutex); + currentDiskCount = std::max(currentDiskCount, gDisks.size()); + } + currentBytesRead /= currentDiskCount; + currentBytesWrite /= currentDiskCount; + + // 0.99997 ^ (30 cycles/min * 60 minutes an hour * 72 hours a day) = 0.02 (2%) + maxBytesReadPerDisk = + std::max({uint64_t(maxBytesReadPerDisk * 0.99997), currentBytesRead, uint64_t(1'000'000)}); + maxBytesWritePerDisk = std::max( + {uint64_t(maxBytesWritePerDisk * 0.99997), currentBytesWrite, uint64_t(1'000'000)}); + + double totalIOPercentage = + static_cast(currentBytesRead) * 100.0 / maxBytesReadPerDisk + + static_cast(currentBytesWrite) * 100.0 / maxBytesWritePerDisk; + + auto invertedSigmoid = [](double val) -> double { + const double steepness = 10.0; // steepness + const double center = 0.15; // center in [0,1] + const double valnorm = val / 100.0; // normalize so that 100% total I/O maps to 1.0 + + const double res = 1.0 / (1.0 + std::exp(-steepness * (valnorm - center))); + return 1.0 - res; + }; + + uint64_t bulksizeScaled = trashGarbageCollectorBulkSize * invertedSigmoid(totalIOPercentage); + if ((currentBytesRead + 1) / (previousBytesReadPerDisk + 1) + + (currentBytesWrite + 1) / (previousBytesWritePerDisk + 1) >= + 10) { + bulksizeScaled = 0; + } + + static constexpr uint64_t kMinGCBulkSizeForActivation = 5; + + if (bulksizeScaled >= kMinGCBulkSizeForActivation) { + removeExpiredFiles(expirationTime, bulksizeScaled); + } makeSpace(availableThresholdGB, garbageCollectorSpaceRecoveryStep); + previousBytesReadPerDisk = currentBytesRead; + previousBytesWritePerDisk = currentBytesWrite; } bool ChunkTrashManagerImpl::isTrashPath(const std::string &filePath) { diff --git a/src/chunkserver/chunkserver-common/chunk_trash_manager_impl.h b/src/chunkserver/chunkserver-common/chunk_trash_manager_impl.h index 063a314c5..cef0fb86d 100644 --- a/src/chunkserver/chunkserver-common/chunk_trash_manager_impl.h +++ b/src/chunkserver/chunkserver-common/chunk_trash_manager_impl.h @@ -80,6 +80,14 @@ class ChunkTrashManagerImpl : public IChunkTrashManagerImpl { */ void removeTrashFiles(const ChunkTrashIndex::TrashIndexDiskEntries &filesToRemove) const; + /** + * @brief Removes a set of specified files from the trash for an specific FileEntries. + * @param filesToRemove The list of files to be permanently deleted. + * @param diskPath The path of the disk of the files. + */ + static void removeTrashFilesFromDisk( + const ChunkTrashIndex::TrashIndexFileEntries &filesToRemove, const std::string &diskPath); + /** * @brief Checks if a given timestamp string matches the expected format. * @param timestamp The timestamp string to validate. @@ -137,6 +145,14 @@ class ChunkTrashManagerImpl : public IChunkTrashManagerImpl { void reloadConfig() override; private: + /// Maximum recorded BytesReadPerDisk and BytesWritePerDisk in a GC cycle + static uint64_t maxBytesReadPerDisk; + static uint64_t maxBytesWritePerDisk; + + /// Previous recorded BytesReadPerDisk and BytesWritePerDisk in a GC cycle + static uint64_t previousBytesReadPerDisk; //1MiB + static uint64_t previousBytesWritePerDisk; //1MiB + /// Minimum available space threshold (in GB) before triggering garbage /// collection. static size_t availableThresholdGB; @@ -145,12 +161,12 @@ class ChunkTrashManagerImpl : public IChunkTrashManagerImpl { /// Time limit (in seconds) for files to be considered eligible for /// deletion. static size_t trashTimeLimitSeconds; - static constexpr size_t kDefaultTrashTimeLimitSeconds = 259200; + static constexpr size_t kDefaultTrashTimeLimitSeconds = 0; /// Number of files processed in each bulk operation during garbage /// collection. static size_t trashGarbageCollectorBulkSize; - static constexpr size_t kDefaultTrashGarbageCollectorBulkSize = 1000; + static constexpr size_t kDefaultTrashGarbageCollectorBulkSize = 500; /// Number of files to remove in each step to free up space when required. static size_t garbageCollectorSpaceRecoveryStep; diff --git a/src/chunkserver/chunkserver-common/hdd_stats.cc b/src/chunkserver/chunkserver-common/hdd_stats.cc index 4e862bada..55cfc2e29 100644 --- a/src/chunkserver/chunkserver-common/hdd_stats.cc +++ b/src/chunkserver/chunkserver-common/hdd_stats.cc @@ -42,6 +42,7 @@ static inline void totalRead(IDisk *disk, uint64_t size, MicroSeconds duration) gStatsTotalOperationsRead++; gStatsTotalBytesRead += size; + gBytesReadSinceLastGCSweep += size; gStatsTotalTimeRead += duration; auto &diskStats = disk->getCurrentStats(); @@ -61,6 +62,7 @@ static inline void totalWrite(IDisk *disk, uint64_t size, gStatsTotalOperationsWrite++; gStatsTotalBytesWrite += size; + gBytesWrittenSinceLastGCSweep += size; gStatsTotalTimeWrite += duration; auto &diskStats = disk->getCurrentStats(); diff --git a/src/chunkserver/chunkserver-common/hdd_stats.h b/src/chunkserver/chunkserver-common/hdd_stats.h index 9d91f574a..9871c2d2e 100644 --- a/src/chunkserver/chunkserver-common/hdd_stats.h +++ b/src/chunkserver/chunkserver-common/hdd_stats.h @@ -58,6 +58,12 @@ inline std::atomic gStatsOperationsDupTrunc(0); inline std::atomic gStatsOperationsGCPurge(0); + +// This is for internal use of GarbageCollector +inline std::atomic gBytesWrittenSinceLastGCSweep(0); +inline std::atomic gBytesReadSinceLastGCSweep(0); + + struct statsReport { statsReport(uint64_t *overBytesRead, uint64_t *overBytesWrite, uint32_t *overOpsRead, uint32_t *overOpsWrite, diff --git a/src/chunkserver/hddspacemgr.cc b/src/chunkserver/hddspacemgr.cc index a364dfb74..ef348fa91 100644 --- a/src/chunkserver/hddspacemgr.cc +++ b/src/chunkserver/hddspacemgr.cc @@ -2532,14 +2532,14 @@ void hddFreeResourcesThread() { pthread_setname_np(pthread_self(), "freeResThread"); while (!gTerminate) { + Timeout timeout(std::chrono::microseconds(kDelayedStep * 1'000'000)); gOpenChunks.freeUnused(eventloop_time(), gChunksMapMutex, kMaxFreeUnused); ChunkTrashManager::collectGarbage(); hddReleaseDisksToBeDeleted(); /// Release buffers older than kDelayedStep seconds releaseOldIoBuffers(kOldIoBuffersExpirationTimeMs); - - sleep(kDelayedStep); + usleep(timeout.remaining_us()); } } diff --git a/src/data/sfschunkserver.cfg.in b/src/data/sfschunkserver.cfg.in index 28b690cf1..fda50f0e1 100644 --- a/src/data/sfschunkserver.cfg.in +++ b/src/data/sfschunkserver.cfg.in @@ -253,10 +253,11 @@ ## the capacity of the smaller configured disk. (Default: 0) # CHUNK_TRASH_FREE_SPACE_THRESHOLD_GB = 0 -## Defines the bulk size for the garbage collector when processing chunks in the -## trash. This determines how many files are processed in each garbage -## collection cycle. (Default: 1000) -# CHUNK_TRASH_GC_BATCH_SIZE = 1000 +## Defines the maximum bulk size per disk for the garbage +## collector when processing chunks in the trash. +## This value is used when no I/O pressure is detected and is dynamically +## scaled down based on current disk load. (Default: 1000) +# CHUNK_TRASH_GC_BATCH_SIZE = 500 ## [ADVANCED] The number of files to remove from the trash in a single GC cycle, ## in case the disk is full and space needs to be recovered. (Default: 100) diff --git a/tests/test_suites/RebalancingTests/test_chunk_rebalancing.sh b/tests/test_suites/RebalancingTests/test_chunk_rebalancing.sh index 51c1cd1e0..552918fc5 100644 --- a/tests/test_suites/RebalancingTests/test_chunk_rebalancing.sh +++ b/tests/test_suites/RebalancingTests/test_chunk_rebalancing.sh @@ -4,7 +4,7 @@ rebalancing_timeout=90 CHUNKSERVERS=5 \ USE_LOOP_DISKS=YES \ MOUNT_EXTRA_CONFIG="sfscachemode=NEVER" \ - CHUNKSERVER_EXTRA_CONFIG="HDD_TEST_FREQ = 10000|HDD_LEAVE_SPACE_DEFAULT = 0MiB" \ + CHUNKSERVER_EXTRA_CONFIG="HDD_TEST_FREQ = 10000|HDD_LEAVE_SPACE_DEFAULT = 0MiB|CHUNK_TRASH_ENABLED=0" \ MASTER_EXTRA_CONFIG="ACCEPTABLE_DIFFERENCE = 0.0015` `|CHUNKS_LOOP_MAX_CPS = 2` `|CHUNKS_LOOP_MAX_CPU = 90` diff --git a/tests/test_suites/RebalancingTests/test_custom_goals_generate_no_accidental_rebalancing.sh b/tests/test_suites/RebalancingTests/test_custom_goals_generate_no_accidental_rebalancing.sh index 307f9f5df..23118b0c1 100644 --- a/tests/test_suites/RebalancingTests/test_custom_goals_generate_no_accidental_rebalancing.sh +++ b/tests/test_suites/RebalancingTests/test_custom_goals_generate_no_accidental_rebalancing.sh @@ -19,6 +19,7 @@ CHUNKSERVERS=6 \ MASTER_CUSTOM_GOALS="1 default: eu _" \ CHUNKSERVER_EXTRA_CONFIG="HDD_LEAVE_SPACE_DEFAULT = 0MiB` `|HDD_TEST_FREQ = 10000` + `|CHUNK_TRASH_ENABLED=0` `|MAGIC_DEBUG_LOG = $TEMP_DIR/log" \ MASTER_EXTRA_CONFIG="ACCEPTABLE_DIFFERENCE = 0.03` `|CHUNKS_LOOP_MAX_CPU = 90` diff --git a/tests/test_suites/RebalancingTests/test_custom_goals_rebalancing_case_1.sh b/tests/test_suites/RebalancingTests/test_custom_goals_rebalancing_case_1.sh index 19dca3860..c70e0134c 100644 --- a/tests/test_suites/RebalancingTests/test_custom_goals_rebalancing_case_1.sh +++ b/tests/test_suites/RebalancingTests/test_custom_goals_rebalancing_case_1.sh @@ -4,7 +4,7 @@ CHUNKSERVERS=5 \ USE_LOOP_DISKS=YES \ CHUNKSERVER_LABELS="0,1,2:ssd|3,4:hdd" \ MASTER_CUSTOM_GOALS="1 default: ssd _" \ - CHUNKSERVER_EXTRA_CONFIG="PERFORM_FSYNC = 1|HDD_TEST_FREQ = 10000|HDD_LEAVE_SPACE_DEFAULT = 0MiB" \ + CHUNKSERVER_EXTRA_CONFIG="PERFORM_FSYNC = 1|HDD_TEST_FREQ = 10000|HDD_LEAVE_SPACE_DEFAULT = 0MiB|CHUNK_TRASH_ENABLED=0" \ MASTER_EXTRA_CONFIG="CHUNKS_LOOP_MIN_TIME = 10` `|CHUNKS_LOOP_MAX_CPU = 90` `|CHUNKS_WRITE_REP_LIMIT = 2` diff --git a/tests/test_suites/RebalancingTests/test_custom_goals_rebalancing_case_2.sh b/tests/test_suites/RebalancingTests/test_custom_goals_rebalancing_case_2.sh index a578b9572..e53f9933b 100644 --- a/tests/test_suites/RebalancingTests/test_custom_goals_rebalancing_case_2.sh +++ b/tests/test_suites/RebalancingTests/test_custom_goals_rebalancing_case_2.sh @@ -4,7 +4,7 @@ CHUNKSERVERS=5 \ USE_LOOP_DISKS=YES \ CHUNKSERVER_LABELS="0:A|1:B|2:C|3:D|4:E" \ MASTER_CUSTOM_GOALS="1 default: _ _" \ - CHUNKSERVER_EXTRA_CONFIG="PERFORM_FSYNC = 1|HDD_TEST_FREQ = 10000|HDD_LEAVE_SPACE_DEFAULT = 0MiB" \ + CHUNKSERVER_EXTRA_CONFIG="PERFORM_FSYNC = 1|HDD_TEST_FREQ = 10000|HDD_LEAVE_SPACE_DEFAULT = 0MiB|CHUNK_TRASH_ENABLED=0" \ MASTER_EXTRA_CONFIG="CHUNKS_LOOP_MIN_TIME = 10` `|CHUNKS_LOOP_MAX_CPU = 90` `|CHUNKS_WRITE_REP_LIMIT = 2` diff --git a/tests/test_suites/RebalancingTests/test_custom_goals_rebalancing_case_3.sh b/tests/test_suites/RebalancingTests/test_custom_goals_rebalancing_case_3.sh index ed12c72d1..aa39106c3 100644 --- a/tests/test_suites/RebalancingTests/test_custom_goals_rebalancing_case_3.sh +++ b/tests/test_suites/RebalancingTests/test_custom_goals_rebalancing_case_3.sh @@ -4,7 +4,7 @@ CHUNKSERVERS=6 \ USE_LOOP_DISKS=YES \ CHUNKSERVER_LABELS="0,1,2:eu|3,4,5:us" \ MASTER_CUSTOM_GOALS="5 eu_eu: eu eu" \ - CHUNKSERVER_EXTRA_CONFIG="PERFORM_FSYNC = 1|HDD_TEST_FREQ = 10000|HDD_LEAVE_SPACE_DEFAULT = 0MiB" \ + CHUNKSERVER_EXTRA_CONFIG="PERFORM_FSYNC = 1|HDD_TEST_FREQ = 10000|HDD_LEAVE_SPACE_DEFAULT = 0MiB|CHUNK_TRASH_ENABLED=0" \ MASTER_EXTRA_CONFIG="CHUNKS_LOOP_MIN_TIME = 10` `|CHUNKS_LOOP_MAX_CPU = 90` `|CHUNKS_WRITE_REP_LIMIT = 2` diff --git a/tests/tools/saunafs.sh b/tests/tools/saunafs.sh index 5b92ed143..e51f50fbd 100644 --- a/tests/tools/saunafs.sh +++ b/tests/tools/saunafs.sh @@ -1130,9 +1130,9 @@ sfschunkserver_check_no_buffer_in_use() { saunafs_chunkserver_daemon ${i} reload done - sleep 5 + sleep 10 - # Assert that the last 5 seconds of log contains chunkserver_count lines with: + # Assert that the last 10 seconds of log contains chunkserver_count lines with: # "Current total buffer blocks per operation: read 0, write 0, replicate 0" # Plus, the last chunkserver_count lines should be unique disregarding the timestamp. # The difference should be the CS pid. This means that the buffers were released correctly. From 06a78b62b0c88bac7f1dfadb056b923d240aa541 Mon Sep 17 00:00:00 2001 From: GigaCronos Date: Sat, 28 Feb 2026 00:15:30 -0500 Subject: [PATCH 2/3] fix: Make a threadpool for trash-remover threads. Add a ProducerConsumerQueue and ThreadPool with up to 5 runners, for executing the per disk deletion jobs. Signed-off-by: GigaCronos --- .../chunkserver-common/chunk_trash_manager.cc | 18 +++++ .../chunkserver-common/chunk_trash_manager.h | 7 ++ .../chunk_trash_manager_impl.cc | 77 +++++++++++++++++-- .../chunk_trash_manager_impl.h | 14 +++- .../chunk_trash_manager_unittest.cc | 1 + src/chunkserver/hddspacemgr.cc | 4 + 6 files changed, 111 insertions(+), 10 deletions(-) diff --git a/src/chunkserver/chunkserver-common/chunk_trash_manager.cc b/src/chunkserver/chunkserver-common/chunk_trash_manager.cc index cdcd17080..8f0dbc066 100644 --- a/src/chunkserver/chunkserver-common/chunk_trash_manager.cc +++ b/src/chunkserver/chunkserver-common/chunk_trash_manager.cc @@ -84,6 +84,24 @@ int ChunkTrashManager::init(const std::string &diskPath) { return impl->init(diskPath); } +void ChunkTrashManager::terminate(){ + if (!isEnabled) { return; } + + // Protect against concurrent access + ImplementationPtr impl; + { + std::lock_guard lock(implMutex); + impl = getImpl(); + } + if (!impl) { + safs::log_error_code(SAUNAFS_ERROR_EINVAL, + "ChunkTrashManager implementation not initialized"); + return; + } + impl->terminate(); + +} + void ChunkTrashManager::collectGarbage() { if (!isEnabled) { return; } diff --git a/src/chunkserver/chunkserver-common/chunk_trash_manager.h b/src/chunkserver/chunkserver-common/chunk_trash_manager.h index 5958ee06d..c0a097790 100644 --- a/src/chunkserver/chunkserver-common/chunk_trash_manager.h +++ b/src/chunkserver/chunkserver-common/chunk_trash_manager.h @@ -34,6 +34,8 @@ class IChunkTrashManagerImpl { virtual int init(const std::string &) = 0; + virtual void terminate() = 0; + virtual void collectGarbage() = 0; virtual void reloadConfig() = 0; @@ -73,6 +75,11 @@ class ChunkTrashManager { */ static int init(const std::string &diskPath); + /** + * @brief Join the background threads. + */ + static void terminate(); + /** * @brief Moves a file to the trash directory. * diff --git a/src/chunkserver/chunkserver-common/chunk_trash_manager_impl.cc b/src/chunkserver/chunkserver-common/chunk_trash_manager_impl.cc index 3e02c415c..58964a8c4 100644 --- a/src/chunkserver/chunkserver-common/chunk_trash_manager_impl.cc +++ b/src/chunkserver/chunkserver-common/chunk_trash_manager_impl.cc @@ -20,14 +20,17 @@ #include #include +#include #include #include +#include #include "chunkserver-common/chunk_trash_manager_impl.h" #include "config/cfg.h" #include "errors/saunafs_error_codes.h" #include "global_shared_resources.h" #include "hdd_stats.h" +#include "hdd_utils.h" #include "slogger/slogger.h" namespace fs = std::filesystem; @@ -47,6 +50,12 @@ uint64_t ChunkTrashManagerImpl::previousBytesWritePerDisk = 1024 * 1024; //1MiB const std::string ChunkTrashManagerImpl::kTrashGuardString = std::string("/") + ChunkTrashManager::kTrashDirname + "/"; +std::vector ChunkTrashManagerImpl::removeFromTrashThreads{}; +std::unique_ptr ChunkTrashManagerImpl::removeFromTrashJobQueue = + std::make_unique(); + +std::atomic ChunkTrashManagerImpl::NotIdleThreadCount{0}; + void ChunkTrashManagerImpl::reloadConfig() { availableThresholdGB = cfg_get("CHUNK_TRASH_FREE_SPACE_THRESHOLD_GB", kDefaultAvailableThresholdGB); @@ -167,19 +176,50 @@ int ChunkTrashManagerImpl::moveToTrash(const fs::path &filePath, const fs::path void ChunkTrashManagerImpl::removeTrashFiles( const ChunkTrashIndex::TrashIndexDiskEntries &filesToRemove) const { - std::vector parallelRemovers; - for (const auto &[diskPath, fileEntries] : filesToRemove) { - parallelRemovers.emplace_back(removeTrashFilesFromDisk, fileEntries, diskPath); + removeFromTrashJobQueue->put( + 0, 1, + reinterpret_cast( + new std::pair(fileEntries, + diskPath)), + 1); + } + + while(NotIdleThreadCount && !removeFromTrashJobQueue->isEmpty()){ + } } -void ChunkTrashManagerImpl::removeTrashFilesFromDisk( - const ChunkTrashIndex::TrashIndexFileEntries &filesToRemove, const std::string &diskPath) { - for (const auto &fileEntry : filesToRemove) { - if (removeFileFromTrash(fileEntry.second) != SAUNAFS_STATUS_OK) { continue; } +void ChunkTrashManagerImpl::removeTrashFilesFromDiskThread(uint8_t workerId) { + std::string threadName ="removeTrashFilesFromDisk_worker_" + std::to_string(workerId); + pthread_setname_np(pthread_self(), threadName.c_str()); + + uint32_t jobId; + uint32_t operation; + uint8_t *jobPtrArg; + + while (true) { + removeFromTrashJobQueue->get(&jobId, &operation, &jobPtrArg, nullptr); + + if(operation == 0){ + break; + } + + NotIdleThreadCount ++; + auto tempTuple = reinterpret_cast *>(jobPtrArg); + + ChunkTrashIndex::TrashIndexFileEntries filesToRemove = tempTuple->first; + std::string diskPath = tempTuple->second; + + for (const auto &fileEntry : filesToRemove) { + if (removeFileFromTrash(fileEntry.second) != SAUNAFS_STATUS_OK) { continue; } HddStats::gStatsOperationsGCPurge++; - getTrashIndex().remove(fileEntry.first, fileEntry.second, diskPath); + getTrashIndex().remove(fileEntry.first, fileEntry.second, diskPath); + } + + delete tempTuple; + + NotIdleThreadCount --; } } @@ -225,9 +265,30 @@ int ChunkTrashManagerImpl::init(const std::string &diskPath) { } } + if (ChunkTrashManagerImpl::removeFromTrashThreads.size() < 5) { + ChunkTrashManagerImpl::removeFromTrashThreads.emplace_back( + &ChunkTrashManagerImpl::removeTrashFilesFromDiskThread, + uint8_t(ChunkTrashManagerImpl::removeFromTrashThreads.size())); + } + return SAUNAFS_STATUS_OK; } +void ChunkTrashManagerImpl::terminate() { + for(uint8_t i=0; i < ChunkTrashManagerImpl::removeFromTrashThreads.size(); i++){ + ChunkTrashManagerImpl::removeFromTrashJobQueue->put(0, 0, nullptr, 1); + } + + + for (auto &thread : ChunkTrashManagerImpl::removeFromTrashThreads) { + if (thread.joinable()) { thread.join(); } + } + + ChunkTrashManagerImpl::removeFromTrashThreads.clear(); + +} + + bool ChunkTrashManagerImpl::isValidTimestampFormat(const std::string ×tamp) { return timestamp.size() == kTimeStampLength && std::ranges::all_of(timestamp, ::isdigit); } diff --git a/src/chunkserver/chunkserver-common/chunk_trash_manager_impl.h b/src/chunkserver/chunkserver-common/chunk_trash_manager_impl.h index cef0fb86d..ff0832dbe 100644 --- a/src/chunkserver/chunkserver-common/chunk_trash_manager_impl.h +++ b/src/chunkserver/chunkserver-common/chunk_trash_manager_impl.h @@ -20,10 +20,14 @@ #include "common/platform.h" +#include #include +#include + #include "chunkserver-common/chunk_trash_index.h" #include "chunkserver-common/chunk_trash_manager.h" +#include "common/pcqueue.h" #include "errors/saunafs_error_codes.h" class ChunkTrashManagerImpl : public IChunkTrashManagerImpl { @@ -49,6 +53,9 @@ class ChunkTrashManagerImpl : public IChunkTrashManagerImpl { */ int init(const std::string &diskPath) override; + + void terminate() override; + /** * @brief Moves a specified file to the trash directory. * @param filePath The path of the file to be moved. @@ -85,8 +92,7 @@ class ChunkTrashManagerImpl : public IChunkTrashManagerImpl { * @param filesToRemove The list of files to be permanently deleted. * @param diskPath The path of the disk of the files. */ - static void removeTrashFilesFromDisk( - const ChunkTrashIndex::TrashIndexFileEntries &filesToRemove, const std::string &diskPath); + static void removeTrashFilesFromDiskThread(uint8_t workerId); /** * @brief Checks if a given timestamp string matches the expected format. @@ -172,6 +178,10 @@ class ChunkTrashManagerImpl : public IChunkTrashManagerImpl { static size_t garbageCollectorSpaceRecoveryStep; static constexpr size_t kDefaultGarbageCollectorSpaceRecoveryStep = 100; + static std::vector removeFromTrashThreads; /// Vector of worker threads. + static std::unique_ptr removeFromTrashJobQueue; /// Queue for jobs. + + static std::atomic NotIdleThreadCount; // Use a function to safely get the ChunkTrashIndex instance // This prevents issues during program shutdown /** diff --git a/src/chunkserver/chunkserver-common/chunk_trash_manager_unittest.cc b/src/chunkserver/chunkserver-common/chunk_trash_manager_unittest.cc index ba3715e2f..470e2489d 100644 --- a/src/chunkserver/chunkserver-common/chunk_trash_manager_unittest.cc +++ b/src/chunkserver/chunkserver-common/chunk_trash_manager_unittest.cc @@ -29,6 +29,7 @@ class MockChunkTrashManagerImpl : public IChunkTrashManagerImpl { (const std::filesystem::path &, const std::filesystem::path &, const std::time_t &), ()); MOCK_METHOD(int, init, (const std::string &), ()); + MOCK_METHOD(void, terminate, (), ()); MOCK_METHOD(void, collectGarbage, (), ()); MOCK_METHOD(void, reloadConfig, (), ()); }; diff --git a/src/chunkserver/hddspacemgr.cc b/src/chunkserver/hddspacemgr.cc index ef348fa91..b5bd0b5cb 100644 --- a/src/chunkserver/hddspacemgr.cc +++ b/src/chunkserver/hddspacemgr.cc @@ -2541,6 +2541,10 @@ void hddFreeResourcesThread() { releaseOldIoBuffers(kOldIoBuffersExpirationTimeMs); usleep(timeout.remaining_us()); } + + + ChunkTrashManager::terminate(); + } void hddTerminate(void) { From ef4763001fcfcd001f1c8c1dd83cf54b3098f61e Mon Sep 17 00:00:00 2001 From: GigaCronos Date: Sat, 28 Feb 2026 18:42:46 -0500 Subject: [PATCH 3/3] fix: Fix ChunkTrashManager unittests This commits fix all unittests that were failing due to the recent Changes. Also fix the CHUNK_TRASH_EXPIRATION_SECONDS param to 0, in documentation and dump_config_command.cc. --- doc/sfschunkserver.cfg.5.adoc | 2 +- src/admin/dump_config_command.cc | 2 +- .../chunk_trash_manager_impl.cc | 29 ++++++++++++------- .../chunk_trash_manager_impl.h | 6 +++- .../chunk_trash_manager_impl_unittest.cc | 2 +- .../chunk_trash_manager_unittest.cc | 11 ++++++- src/data/sfschunkserver.cfg.in | 4 +-- 7 files changed, 39 insertions(+), 17 deletions(-) diff --git a/doc/sfschunkserver.cfg.5.adoc b/doc/sfschunkserver.cfg.5.adoc index 4e60e07b5..cf43f2392 100644 --- a/doc/sfschunkserver.cfg.5.adoc +++ b/doc/sfschunkserver.cfg.5.adoc @@ -190,7 +190,7 @@ being immediately removed. (Default: 1) *CHUNK_TRASH_EXPIRATION_SECONDS (EXPERIMENTAL)*:: specifies the timeout in seconds for chunks to remain in the trash before being permanently deleted. -(Default: 259200) +(Default: 0) *CHUNK_TRASH_FREE_SPACE_THRESHOLD_GB (EXPERIMENTAL)*:: sets the available space threshold in gigabytes. If the available space on the disk falls below this diff --git a/src/admin/dump_config_command.cc b/src/admin/dump_config_command.cc index 36dc2b6b7..3a377a3c7 100644 --- a/src/admin/dump_config_command.cc +++ b/src/admin/dump_config_command.cc @@ -184,7 +184,7 @@ const static std::unordered_map defaultOptionsCS = { {"REPLICATION_WAVE_TIMEOUT_MS", "500"}, {"PLUGINS_DIR", ""}, {"CHUNK_TRASH_ENABLED", "1"}, - {"CHUNK_TRASH_EXPIRATION_SECONDS", "259200"}, + {"CHUNK_TRASH_EXPIRATION_SECONDS", "0"}, {"CHUNK_TRASH_FREE_SPACE_THRESHOLD_GB", "0"}, {"CHUNK_TRASH_GC_BATCH_SIZE", "500"}, {"CHUNK_TRASH_GC_SPACE_RECOVERY_BATCH_SIZE", "100"}, diff --git a/src/chunkserver/chunkserver-common/chunk_trash_manager_impl.cc b/src/chunkserver/chunkserver-common/chunk_trash_manager_impl.cc index 58964a8c4..dcbdb03de 100644 --- a/src/chunkserver/chunkserver-common/chunk_trash_manager_impl.cc +++ b/src/chunkserver/chunkserver-common/chunk_trash_manager_impl.cc @@ -21,8 +21,12 @@ #include #include #include +#include #include #include +#include +#include +#include #include #include "chunkserver-common/chunk_trash_manager_impl.h" @@ -54,7 +58,11 @@ std::vector ChunkTrashManagerImpl::removeFromTrashThreads{}; std::unique_ptr ChunkTrashManagerImpl::removeFromTrashJobQueue = std::make_unique(); -std::atomic ChunkTrashManagerImpl::NotIdleThreadCount{0}; +std::atomic ChunkTrashManagerImpl::toDoJobsCount{0}; + +std::mutex ChunkTrashManagerImpl::mutex_{}; + +std::condition_variable ChunkTrashManagerImpl::trashRemoverCV{}; void ChunkTrashManagerImpl::reloadConfig() { availableThresholdGB = @@ -177,6 +185,7 @@ int ChunkTrashManagerImpl::moveToTrash(const fs::path &filePath, const fs::path void ChunkTrashManagerImpl::removeTrashFiles( const ChunkTrashIndex::TrashIndexDiskEntries &filesToRemove) const { for (const auto &[diskPath, fileEntries] : filesToRemove) { + toDoJobsCount++; removeFromTrashJobQueue->put( 0, 1, reinterpret_cast( @@ -184,16 +193,16 @@ void ChunkTrashManagerImpl::removeTrashFiles( diskPath)), 1); } - - while(NotIdleThreadCount && !removeFromTrashJobQueue->isEmpty()){ - - } + std::unique_lock trashRemoverlock(mutex_); + trashRemoverCV.wait(trashRemoverlock,[&] { return toDoJobsCount==0 && removeFromTrashJobQueue->isEmpty(); }); + } void ChunkTrashManagerImpl::removeTrashFilesFromDiskThread(uint8_t workerId) { std::string threadName ="removeTrashFilesFromDisk_worker_" + std::to_string(workerId); pthread_setname_np(pthread_self(), threadName.c_str()); + std::cout<<"GigaCronos: ThreadOpened"< *>(jobPtrArg); ChunkTrashIndex::TrashIndexFileEntries filesToRemove = tempTuple->first; std::string diskPath = tempTuple->second; - for (const auto &fileEntry : filesToRemove) { if (removeFileFromTrash(fileEntry.second) != SAUNAFS_STATUS_OK) { continue; } HddStats::gStatsOperationsGCPurge++; @@ -218,9 +224,12 @@ void ChunkTrashManagerImpl::removeTrashFilesFromDiskThread(uint8_t workerId) { } delete tempTuple; - - NotIdleThreadCount --; + toDoJobsCount --; + std::unique_lock trashRemoverlock(mutex_); + trashRemoverCV.notify_one(); } + + std::cout<<"GigaCronos: ThreadClosed"< #include #include #include @@ -181,7 +182,10 @@ class ChunkTrashManagerImpl : public IChunkTrashManagerImpl { static std::vector removeFromTrashThreads; /// Vector of worker threads. static std::unique_ptr removeFromTrashJobQueue; /// Queue for jobs. - static std::atomic NotIdleThreadCount; + static std::atomic toDoJobsCount; + + static std::mutex mutex_; + static std::condition_variable trashRemoverCV; // Use a function to safely get the ChunkTrashIndex instance // This prevents issues during program shutdown /** diff --git a/src/chunkserver/chunkserver-common/chunk_trash_manager_impl_unittest.cc b/src/chunkserver/chunkserver-common/chunk_trash_manager_impl_unittest.cc index 94a71696c..9cef919f0 100644 --- a/src/chunkserver/chunkserver-common/chunk_trash_manager_impl_unittest.cc +++ b/src/chunkserver/chunkserver-common/chunk_trash_manager_impl_unittest.cc @@ -55,7 +55,7 @@ class ChunkTrashManagerImplTest : public ::testing::Test { chunkTrashManagerImpl.init(testDir.string()); } - void TearDown() override { fs::remove_all(testDir); } + void TearDown() override { fs::remove_all(testDir);chunkTrashManagerImpl.terminate(); } }; TEST_F(ChunkTrashManagerImplTest, MoveToTrashValidFile) { diff --git a/src/chunkserver/chunkserver-common/chunk_trash_manager_unittest.cc b/src/chunkserver/chunkserver-common/chunk_trash_manager_unittest.cc index 470e2489d..258265a48 100644 --- a/src/chunkserver/chunkserver-common/chunk_trash_manager_unittest.cc +++ b/src/chunkserver/chunkserver-common/chunk_trash_manager_unittest.cc @@ -45,7 +45,10 @@ class ChunkTrashManagerTest : public ::testing::Test { ChunkTrashManager::isEnabled = 1; } - void TearDown() override { mockImpl.reset(); } + void TearDown() override { + ChunkTrashManager::terminate(); + mockImpl.reset(); + } }; TEST_F(ChunkTrashManagerTest, MoveToTrashForwardsCall) { @@ -82,6 +85,12 @@ TEST_F(ChunkTrashManagerTest, ReloadConfigForwardsCall) { ChunkTrashManager::reloadConfig(); // Call the method. } +TEST_F(ChunkTrashManagerTest, TerminateForwardsCall) { + EXPECT_CALL(*mockImpl, terminate()).Times(::testing::AnyNumber()); + + ChunkTrashManager::terminate(); +} + TEST_F(ChunkTrashManagerTest, DisabledMoveToTrashDoesNotForwardsCall) { std::filesystem::path filePath = "example.txt"; std::filesystem::path diskPath = "/disk/"; diff --git a/src/data/sfschunkserver.cfg.in b/src/data/sfschunkserver.cfg.in index fda50f0e1..a39d16447 100644 --- a/src/data/sfschunkserver.cfg.in +++ b/src/data/sfschunkserver.cfg.in @@ -244,8 +244,8 @@ # CHUNK_TRASH_ENABLED = 0 ## Specifies the timeout in seconds for chunks to remain in the trash before -## being permanently deleted. (Default: 259200) -# CHUNK_TRASH_EXPIRATION_SECONDS = 259200 +## being permanently deleted. (Default: 0) +# CHUNK_TRASH_EXPIRATION_SECONDS = 0 ## Sets the available space threshold in gigabytes. If the available space on ## the disk falls below this threshold, the system will start deleting older