Skip to content

Commit f826318

Browse files
authored
Merge pull request ceph#62588 from aclamk/wip-aclamk-bluefs-remove-truncate
os/bluestore: Fix race in BlueFS truncate / remove
2 parents f20e442 + e5f8892 commit f826318

File tree

3 files changed

+69
-17
lines changed

3 files changed

+69
-17
lines changed

src/os/bluestore/BlueFS.cc

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2221,7 +2221,7 @@ std::ostream& operator<<(std::ostream& out, const lock_fnode_print& to_lock) {
22212221
return out;
22222222
}
22232223

2224-
void BlueFS::_drop_link_D(FileRef file)
2224+
void BlueFS::_drop_link_DF(FileRef file)
22252225
{
22262226
dout(20) << __func__ << " had refs " << file->refs
22272227
<< " on " << lock_fnode_print(file) << dendl;
@@ -2237,9 +2237,11 @@ void BlueFS::_drop_link_D(FileRef file)
22372237
log.t.op_file_remove(file->fnode.ino);
22382238
nodes.file_map.erase(file->fnode.ino);
22392239
logger->set(l_bluefs_num_files, nodes.file_map.size());
2240-
file->deleted = true;
2241-
22422240
std::lock_guard dl(dirty.lock);
2241+
std::lock_guard fl(file->lock);
2242+
// setting `deleted` is under log+nodes+dirty+file locks
2243+
// so it is enough to held one of those locks to make sure it does not change
2244+
file->deleted = true;
22432245
for (auto& r : file->fnode.extents) {
22442246
dirty.pending_release[r.bdev].insert(r.offset, r.length);
22452247
}
@@ -3563,10 +3565,6 @@ int BlueFS::_flush_range_F(FileWriter *h, uint64_t offset, uint64_t length)
35633565
<< " to " << h->file->fnode
35643566
<< " hint " << h->file->vselector_hint
35653567
<< dendl;
3566-
if (h->file->deleted) {
3567-
dout(10) << __func__ << " deleted, no-op" << dendl;
3568-
return 0;
3569-
}
35703568

35713569
bool buffered = cct->_conf->bluefs_buffered_io;
35723570

@@ -3580,6 +3578,10 @@ int BlueFS::_flush_range_F(FileWriter *h, uint64_t offset, uint64_t length)
35803578
<< dendl;
35813579
}
35823580
std::lock_guard file_lock(h->file->lock);
3581+
if (h->file->deleted) {
3582+
dout(10) << __func__ << " deleted, no-op" << dendl;
3583+
return 0;
3584+
}
35833585
ceph_assert(offset <= h->file->fnode.size);
35843586

35853587
uint64_t allocated = h->file->fnode.get_allocated();
@@ -3836,16 +3838,13 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset)/*_WF_L*/
38363838
auto t0 = mono_clock::now();
38373839
std::lock_guard hl(h->lock);
38383840
auto& fnode = h->file->fnode;
3839-
dout(10) << __func__ << " 0x" << std::hex << offset << std::dec
3840-
<< " file " << fnode << dendl;
3841-
if (h->file->deleted) {
3842-
dout(10) << __func__ << " deleted, no-op" << dendl;
3843-
return 0;
3844-
}
38453841

38463842
// we never truncate internal log files
38473843
ceph_assert(fnode.ino > 1);
38483844

3845+
dout(10) << __func__ << " 0x" << std::hex << offset << std::dec
3846+
<< " file " << fnode << dendl;
3847+
38493848
// truncate off unflushed data?
38503849
if (h->pos < offset &&
38513850
h->pos + h->get_buffer_length() > offset) {
@@ -3861,11 +3860,15 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset)/*_WF_L*/
38613860
if (offset > fnode.size) {
38623861
ceph_abort_msg("truncate up not supported");
38633862
}
3864-
3863+
unittest_inject_delay();
38653864
_flush_bdev(h);
38663865
{
38673866
std::lock_guard ll(log.lock);
38683867
std::lock_guard dl(dirty.lock);
3868+
if (h->file->deleted) {
3869+
dout(10) << __func__ << " deleted, no-op" << dendl;
3870+
return 0;
3871+
}
38693872
bool changed_extents = false;
38703873
vselector->sub_usage(h->file->vselector_hint, fnode);
38713874
uint64_t x_off = 0;
@@ -4456,7 +4459,7 @@ int BlueFS::rename(
44564459
<< " already exists, unlinking" << dendl;
44574460
ceph_assert(q->second != file);
44584461
log.t.op_dir_unlink(new_dirname, new_filename);
4459-
_drop_link_D(q->second);
4462+
_drop_link_DF(q->second);
44604463
}
44614464

44624465
dout(10) << __func__ << " " << new_dirname << "/" << new_filename << " "
@@ -4651,7 +4654,7 @@ int BlueFS::unlink(std::string_view dirname, std::string_view filename)/*_LND*/
46514654
}
46524655
dir->file_map.erase(q);
46534656
log.t.op_dir_unlink(dirname, filename);
4654-
_drop_link_D(file);
4657+
_drop_link_DF(file);
46554658
logger->tinc(l_bluefs_unlink_lat, mono_clock::now() - t0);
46564659

46574660
return 0;

src/os/bluestore/BlueFS.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ class debug_point_t {
237237
: m_func(func) {}
238238
template<typename... Arg>
239239
void operator()(Arg... arg) { if (m_func) m_func(std::forward<Arg...>(arg...)); }
240+
void operator()() { if (m_func) m_func(); }
240241
void operator=(T&& func) { m_func = std::move(func);}
241242
void operator=(T& func) { m_func = func;}
242243
private:
@@ -562,7 +563,7 @@ class BlueFS {
562563
uint64_t _get_minimal_reserved(unsigned id) const;
563564

564565
FileRef _get_file(uint64_t ino);
565-
void _drop_link_D(FileRef f);
566+
void _drop_link_DF(FileRef f);
566567

567568
unsigned _get_slow_device_id() {
568569
return bdev[BDEV_SLOW] ? BDEV_SLOW : BDEV_DB;
@@ -813,6 +814,7 @@ class BlueFS {
813814
bool debug_get_is_dev_dirty(FileWriter *h, uint8_t dev);
814815
debug_point_t<std::function<void(uint32_t)>> tracepoint_async_compact;
815816
void trim_free_space(const std::string& type, std::ostream& outss);
817+
debug_point_t<std::function<void()>> unittest_inject_delay;
816818

817819
private:
818820
// Wrappers for BlockDevice::read(...) and BlockDevice::read_random(...)

src/test/objectstore/store_test.cc

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12006,6 +12006,53 @@ TEST_P(StoreTestSpecificAUSize, BlueFSReservedTest) {
1200612006
g_conf()->bluefs_alloc_size + wal_extra);
1200712007
}
1200812008

12009+
12010+
TEST_P(StoreTest, BlueFS_truncate_remove_race) {
12011+
if (string(GetParam()) != "bluestore")
12012+
GTEST_SKIP();
12013+
12014+
BlueStore* bstore = dynamic_cast<BlueStore*> (store.get());
12015+
ceph_assert(bstore);
12016+
BlueFS& fs = *bstore->get_bluefs();
12017+
fs.unittest_inject_delay = []() { usleep(1000); };
12018+
static constexpr uint32_t batch_size = 20;
12019+
std::binary_semaphore go_remove(0);
12020+
std::binary_semaphore done_remove(0);
12021+
12022+
auto files_remover = [&]() {
12023+
for (uint32_t cnt = 0; cnt < batch_size; cnt++) {
12024+
go_remove.acquire();
12025+
std::string name = "test-file-" + std::to_string(cnt);
12026+
fs.unlink("dir", name);
12027+
fs.sync_metadata(false);
12028+
done_remove.release();
12029+
}
12030+
};
12031+
12032+
ASSERT_EQ(0, fs.mkdir("dir"));
12033+
std::thread remover_thread(files_remover);
12034+
12035+
for (uint32_t cnt = 0; cnt < batch_size; cnt++) {
12036+
std::string name = "test-file-" + std::to_string(cnt);
12037+
BlueFS::FileWriter *f = nullptr;
12038+
ASSERT_EQ(0, fs.open_for_write("dir", name, &f, false));
12039+
fs.preallocate(f->file, 0, 100000);
12040+
for (uint32_t i = 0; i < 10; i++) {
12041+
fs.append_try_flush(f, "x", 1);
12042+
fs.fsync(f);
12043+
}
12044+
go_remove.release();
12045+
fs.truncate(f, 10);
12046+
fs.close_writer(f);
12047+
done_remove.acquire();
12048+
}
12049+
12050+
remover_thread.join();
12051+
fs.unittest_inject_delay = nullptr;
12052+
EXPECT_EQ(store->umount(), 0);
12053+
EXPECT_EQ(store->mount(), 0);
12054+
}
12055+
1200912056
#endif // WITH_BLUESTORE
1201012057

1201112058
int main(int argc, char **argv) {

0 commit comments

Comments
 (0)