Skip to content

Commit 3aa891d

Browse files
committed
os/BlueStore: NCB fix for leaked space when bdev_async_discard is enabled
Fix calls bdev->discard_drain() before calling store_allocator() to make sure all freed space is reflected in the allocator before destaging it The fix set a timeout for the drain call (500msec) and if expires will not store the allocator (forcing a recovery on the next startup) Fixes: https://tracker.ceph.com/issues/65298 Signed-off-by: Gabriel BenHanokh <[email protected]>
1 parent 344c1ff commit 3aa891d

File tree

4 files changed

+36
-6
lines changed

4 files changed

+36
-6
lines changed

src/blk/BlockDevice.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ class BlockDevice {
285285
int write_hint = WRITE_LIFE_NOT_SET) = 0;
286286
virtual int flush() = 0;
287287
virtual bool try_discard(interval_set<uint64_t> &to_release, bool async=true) { return false; }
288-
virtual void discard_drain() { return; }
288+
virtual int discard_drain(uint32_t timeout_msec = 0) { return 0; }
289289

290290
// for managing buffered readers/writers
291291
virtual int invalidate_cache(uint64_t off, uint64_t len) = 0;

src/blk/kernel/KernelDevice.cc

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -587,13 +587,35 @@ bool KernelDevice::_discard_started()
587587
return !discard_threads.empty();
588588
}
589589

590-
void KernelDevice::discard_drain()
590+
int KernelDevice::discard_drain(uint32_t timeout_msec = 0)
591591
{
592592
dout(10) << __func__ << dendl;
593+
bool check_timeout = false;
594+
utime_t end_time;
595+
if (timeout_msec) {
596+
check_timeout = true;
597+
uint32_t timeout_sec = 0;
598+
if (timeout_msec >= 1000) {
599+
timeout_sec = (timeout_msec / 1000);
600+
timeout_msec = (timeout_msec % 1000);
601+
}
602+
end_time = ceph_clock_now();
603+
// add the timeout after converting from msec to nsec
604+
end_time.tv.tv_nsec += (timeout_msec * (1000*1000));
605+
if (end_time.tv.tv_nsec > (1000*1000*1000)) {
606+
end_time.tv.tv_nsec -= (1000*1000*1000);
607+
end_time.tv.tv_sec += 1;
608+
}
609+
end_time.tv.tv_sec += timeout_sec;
610+
}
593611
std::unique_lock l(discard_lock);
594612
while (!discard_queued.empty() || discard_running) {
595613
discard_cond.wait(l);
614+
if (check_timeout && ceph_clock_now() > end_time) {
615+
return -1;
616+
}
596617
}
618+
return 0;
597619
}
598620

599621
static bool is_expected_ioerr(const int r)

src/blk/kernel/KernelDevice.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ class KernelDevice : public BlockDevice,
123123
~KernelDevice();
124124

125125
void aio_submit(IOContext *ioc) override;
126-
void discard_drain() override;
126+
int discard_drain(uint32_t timeout_msec) override;
127127

128128
int collect_metadata(const std::string& prefix, std::map<std::string,std::string> *pm) const override;
129129
int get_devname(std::string *s) const override {

src/os/bluestore/BlueStore.cc

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7762,9 +7762,17 @@ void BlueStore::_close_db()
77627762
db = nullptr;
77637763

77647764
if (do_destage && fm && fm->is_null_manager()) {
7765-
int ret = store_allocator(alloc);
7766-
if (ret != 0) {
7767-
derr << __func__ << "::NCB::store_allocator() failed (continue with bitmapFreelistManager)" << dendl;
7765+
// force all backgrounds discards to be committed before storing allocator
7766+
// set timeout to 500msec
7767+
int ret = bdev->discard_drain(500);
7768+
if (ret == 0) {
7769+
ret = store_allocator(alloc);
7770+
if (unlikely(ret != 0)) {
7771+
derr << __func__ << "::NCB::store_allocator() failed (we will need to rebuild it on startup)" << dendl;
7772+
}
7773+
}
7774+
else {
7775+
derr << __func__ << "::NCB::discard_drain() exceeded timeout (abort!)" << dendl;
77687776
}
77697777
}
77707778

0 commit comments

Comments
 (0)