diff --git a/conanfile.py b/conanfile.py index b183e8f..9aad725 100644 --- a/conanfile.py +++ b/conanfile.py @@ -9,7 +9,7 @@ class HomeBlocksConan(ConanFile): name = "homeblocks" - version = "1.0.22" + version = "1.0.24" homepage = "https://github.com/eBay/HomeBlocks" description = "Block Store built on HomeStore" topics = ("ebay") diff --git a/src/lib/homeblks_impl.cpp b/src/lib/homeblks_impl.cpp index 30802ce..bb50010 100644 --- a/src/lib/homeblks_impl.cpp +++ b/src/lib/homeblks_impl.cpp @@ -37,6 +37,7 @@ extern std::shared_ptr< HomeBlocks > init_homeblocks(std::weak_ptr< HomeBlocksAp auto inst = std::make_shared< HomeBlocksImpl >(std::move(application)); inst->init_homestore(); inst->init_cp(); + inst->start_reaper_thread(); return inst; } @@ -57,6 +58,13 @@ HomeBlocksStats HomeBlocksImpl::get_stats() const { } HomeBlocksImpl::~HomeBlocksImpl() { + LOGI("Shutting down HomeBlocksImpl"); + + if (vol_gc_timer_hdl_ != iomgr::null_timer_handle) { + iomanager.cancel_timer(vol_gc_timer_hdl_); + vol_gc_timer_hdl_ = iomgr::null_timer_handle; + } + homestore::hs()->shutdown(); homestore::HomeStore::reset_instance(); iomanager.stop(); @@ -299,4 +307,48 @@ void HomeBlocksImpl::on_init_complete() { } void HomeBlocksImpl::init_cp() {} + +uint64_t HomeBlocksImpl::gc_timer_nsecs() const { + if (SISL_OPTIONS.count("gc_timer_nsecs")) { + auto const n = SISL_OPTIONS["gc_timer_nsecs"].as< uint32_t >(); + LOGINFO("Using gc_timer_nsecs option value: {}", n); + return n; + } else { + return HB_DYNAMIC_CONFIG(reaper_thread_timer_secs); + } +} + +void HomeBlocksImpl::start_reaper_thread() { + auto const nsecs = gc_timer_nsecs(); + LOGI("Starting volume garbage collection timer with interval: {} seconds", nsecs); + vol_gc_timer_hdl_ = iomanager.schedule_global_timer( + nsecs * 1000 * 1000 * 1000, true /* recurring */, nullptr /* cookie */, iomgr::reactor_regex::all_user, + [this](void*) { this->vol_gc(); }, true /* wait_to_schedule */); +} + +void HomeBlocksImpl::vol_gc() { + LOGI("Running volume garbage collection"); + // loop through every volume and call remove volume if volume's ref_cnt is zero; + std::vector< VolumePtr > vols_to_remove; + { + auto lg = std::shared_lock(vol_lock_); + for (auto& vol_pair : vol_map_) { + auto& vol = vol_pair.second; + LOGI("Checking volume with id: {}, is_destroying: {}, can_remove: {}, num_outstanding_reqs: {}", + vol->id_str(), vol->is_destroying(), vol->can_remove(), vol->num_outstanding_reqs()); + + if (vol->is_destroying() && vol->can_remove()) { + // 1. volume has been issued with removed command before + // 2. no one has already started removing it + // 3. volume is not in use anymore (ref_cnt == 0) + vols_to_remove.push_back(vol); + } + } + } + + for (auto& vol : vols_to_remove) { + LOGI("Garbage Collecting removed volume with id: {}", vol->id_str()); + remove_volume(vol->id()); + } +} } // namespace homeblocks diff --git a/src/lib/homeblks_impl.hpp b/src/lib/homeblks_impl.hpp index f6d9b75..9ad6bd8 100644 --- a/src/lib/homeblks_impl.hpp +++ b/src/lib/homeblks_impl.hpp @@ -60,7 +60,7 @@ class HomeBlocksImpl : public HomeBlocks, public VolumeManager, public std::enab std::weak_ptr< HomeBlocksApplication > _application; folly::Executor::KeepAlive<> executor_; - /// + /// Volume management mutable std::shared_mutex vol_lock_; std::map< volume_id_t, VolumePtr > vol_map_; @@ -72,6 +72,9 @@ class HomeBlocksImpl : public HomeBlocks, public VolumeManager, public std::enab superblk< homeblks_sb_t > sb_; peer_id_t our_uuid_; + iomgr::io_fiber_t reaper_fiber_; + iomgr::timer_handle_t vol_gc_timer_hdl_{iomgr::null_timer_handle}; + public: explicit HomeBlocksImpl(std::weak_ptr< HomeBlocksApplication >&& application); @@ -127,7 +130,7 @@ class HomeBlocksImpl : public HomeBlocks, public VolumeManager, public std::enab void on_write(int64_t lsn, const sisl::blob& header, const sisl::blob& key, const std::vector< homestore::MultiBlkId >& blkids, cintrusive< homestore::repl_req_ctx >& ctx); - // VolumeManager::Result< folly::Unit > verify_checksum(vol_read_ctx const& read_ctx); + void start_reaper_thread(); private: // Should only be called for first-time-boot @@ -143,6 +146,17 @@ class HomeBlocksImpl : public HomeBlocks, public VolumeManager, public std::enab // recovery apis void on_hb_meta_blk_found(sisl::byte_view const& buf, void* cookie); void on_vol_meta_blk_found(sisl::byte_view const& buf, void* cookie); + + void vol_gc(); + + uint64_t gc_timer_nsecs() const; + +#ifdef _PRERELEASE + // For testing purpose only + // If delay flip is not set, false will be returned; + // If delay flip is set, it will delay the IOs for a given VolumePtr + bool delay_fake_io(VolumePtr vol); +#endif }; class HBIndexSvcCB : public homestore::IndexServiceCallbacks { diff --git a/src/lib/volume/tests/CMakeLists.txt b/src/lib/volume/tests/CMakeLists.txt index 64734d1..db4e873 100644 --- a/src/lib/volume/tests/CMakeLists.txt +++ b/src/lib/volume/tests/CMakeLists.txt @@ -22,5 +22,5 @@ target_link_libraries(test_volume_io -rdynamic ) -add_test(NAME VolumeTest COMMAND test_volume) -add_test(NAME VolumeIOTest COMMAND test_volume_io) \ No newline at end of file +add_test(NAME VolumeTest COMMAND test_volume --gc_timer_nsecs=3) +add_test(NAME VolumeIOTest COMMAND test_volume_io) diff --git a/src/lib/volume/tests/test_common.hpp b/src/lib/volume/tests/test_common.hpp index 6a615d1..266e56a 100644 --- a/src/lib/volume/tests/test_common.hpp +++ b/src/lib/volume/tests/test_common.hpp @@ -213,6 +213,31 @@ class HBTestHelper { } } +#ifdef _PRERELEASE + void set_flip_point(const std::string flip_name) { + flip::FlipCondition null_cond; + flip::FlipFrequency freq; + freq.set_count(2); + freq.set_percent(100); + m_fc.inject_noreturn_flip(flip_name, {null_cond}, freq); + LOGI("Flip {} set", flip_name); + } + + void set_delay_flip(const std::string flip_name, uint64_t delay_usec, uint32_t count = 1, uint32_t percent = 100) { + flip::FlipCondition null_cond; + flip::FlipFrequency freq; + freq.set_count(count); + freq.set_percent(percent); + m_fc.inject_delay_flip(flip_name, {null_cond}, freq, delay_usec); + LOGDEBUG("Flip {} set", flip_name); + } + + void remove_flip(const std::string flip_name) { + m_fc.remove_flip(flip_name); + LOGDEBUG("Flip {} removed", flip_name); + } +#endif + private: void init_devices(bool is_file, uint64_t dev_size = 0) { if (is_file) { @@ -275,6 +300,10 @@ class HBTestHelper { peer_id_t svc_id_; Runner io_runner_; Waiter waiter_; + +#ifdef _PRERELEASE + flip::FlipClient m_fc{iomgr_flip::instance()}; +#endif }; -} // namespace test_common \ No newline at end of file +} // namespace test_common diff --git a/src/lib/volume/tests/test_volume.cpp b/src/lib/volume/tests/test_volume.cpp index eb76254..d63da32 100644 --- a/src/lib/volume/tests/test_volume.cpp +++ b/src/lib/volume/tests/test_volume.cpp @@ -26,8 +26,11 @@ SISL_LOGGING_INIT(HOMEBLOCKS_LOG_MODS) SISL_OPTION_GROUP(test_volume_setup, - (num_vols, "", "num_vols", "number of volumes", ::cxxopts::value< uint32_t >()->default_value("2"), - "number")); + (num_vols, "", "num_vols", "number of volumes", ::cxxopts::value< uint32_t >()->default_value("2"), + "number"), + (gc_timer_nsecs, "", "gc_timer_nsecs", "gc timer in seconds", + ::cxxopts::value< uint32_t >()->default_value("5"), "seconds")); + SISL_OPTIONS_ENABLE(logging, test_common_setup, test_volume_setup, homeblocks) SISL_LOGGING_DECL(test_volume) @@ -47,23 +50,63 @@ class VolumeTest : public ::testing::Test { vol_info.id = hb_utils::gen_random_uuid(); return vol_info; } +}; #ifdef _PRERELEASE - void set_flip_point(const std::string flip_name) { - flip::FlipCondition null_cond; - flip::FlipFrequency freq; - freq.set_count(2); - freq.set_percent(100); - m_fc.inject_noreturn_flip(flip_name, {null_cond}, freq); - LOGI("Flip {} set", flip_name); +TEST_F(VolumeTest, CreateDestroyVolumeWithOutstandingIO) { + std::vector< volume_id_t > vol_ids; + { + auto hb = g_helper->inst(); + auto vol_mgr = hb->volume_manager(); + + uint32_t delay_sec = 6; + g_helper->set_delay_flip("vol_fake_io_delay_simulation", delay_sec * 1000 * 1000 /*delay_usec*/, 2, 100); + + auto num_vols = 1ul; + + for (uint32_t i = 0; i < num_vols; ++i) { + auto vinfo = gen_vol_info(i); + auto id = vinfo.id; + vol_ids.emplace_back(id); + auto ret = vol_mgr->create_volume(std::move(vinfo)).get(); + ASSERT_TRUE(ret); + + auto vol_ptr = vol_mgr->lookup_volume(id); + // verify the volume is there + ASSERT_TRUE(vol_ptr != nullptr); + + // fake a write that will be delayed; + vol_mgr->write(vol_ptr, nullptr); + + // fake a read that will be delayed; + vol_mgr->read(vol_ptr, nullptr); + } + + auto const s = hb->get_stats(); + auto const dtype = hb->data_drive_type(); + LOGINFO("Stats: {}, drive_type: {}", s.to_string(), dtype); + + for (uint32_t i = 0; i < num_vols; ++i) { + auto id = vol_ids[i]; + auto ret = vol_mgr->remove_volume(id).get(); + ASSERT_TRUE(ret); + while (true) { + auto delay_secs = 1; + LOGINFO("Remove Volume {} triggered, waiting for {} seconds for IO to complete", + boost::uuids::to_string(id), delay_secs); + // sleep for a while + std::this_thread::sleep_for(std::chrono::milliseconds(delay_secs * 1000)); + auto vol_ptr = vol_mgr->lookup_volume(id); + if (!vol_ptr) { break; } + } + } } -#endif -private: -#ifdef _PRERELEASE - flip::FlipClient m_fc{iomgr_flip::instance()}; + g_helper->remove_flip("vol_fake_io_delay_simulation"); + + g_helper->restart(2); +} #endif -}; TEST_F(VolumeTest, CreateDestroyVolume) { std::vector< volume_id_t > vol_ids; @@ -101,7 +144,7 @@ TEST_F(VolumeTest, CreateDestroyVolume) { } } - g_helper->restart(5); + g_helper->restart(2); } TEST_F(VolumeTest, CreateVolumeThenRecover) { @@ -125,7 +168,7 @@ TEST_F(VolumeTest, CreateVolumeThenRecover) { } } - g_helper->restart(5); + g_helper->restart(2); // verify the volumes are still there { @@ -141,9 +184,8 @@ TEST_F(VolumeTest, CreateVolumeThenRecover) { } TEST_F(VolumeTest, DestroyVolumeCrashRecovery) { - #ifdef _PRERELEASE - set_flip_point("vol_destroy_crash_simulation"); + g_helper->set_flip_point("vol_destroy_crash_simulation"); #endif std::vector< volume_id_t > vol_ids; { @@ -171,7 +213,7 @@ TEST_F(VolumeTest, DestroyVolumeCrashRecovery) { } } - g_helper->restart(5); + g_helper->restart(2); } int main(int argc, char* argv[]) { @@ -193,4 +235,4 @@ int main(int argc, char* argv[]) { g_helper->teardown(); return ret; -} \ No newline at end of file +} diff --git a/src/lib/volume/volume.cpp b/src/lib/volume/volume.cpp index 5fdc800..7f295fc 100644 --- a/src/lib/volume/volume.cpp +++ b/src/lib/volume/volume.cpp @@ -101,12 +101,12 @@ bool Volume::init(bool is_recovery) { } void Volume::destroy() { - // 0. Set destroying state in superblock; - state_change(vol_state::DESTROYING); + LOGI("Start destroying volume: {}, uuid: {}", vol_info_->name, boost::uuids::to_string(id())); + destroy_started_ = true; // 1. destroy the repl dev; if (rd_) { - LOGI("Destroying repl dev for volume: {}, uuid: {}", vol_info_->name, boost::uuids::to_string(id())); + LOGI("Destroying repl dev for volume: {}", vol_info_->name); homestore::hs()->repl_service().remove_repl_dev(id()).get(); rd_ = nullptr; } @@ -232,7 +232,6 @@ VolumeManager::NullAsyncResult Volume::write(const vol_interface_req_ptr& vol_re VolumeManager::Result< folly::Unit > Volume::write_to_index(lba_t start_lba, lba_t end_lba, std::unordered_map< lba_t, BlockInfo >& blocks_info) { - // Use filter callback to get the old blkid. homestore::put_filter_cb_t filter_cb = [&blocks_info](BtreeKey const& key, BtreeValue const& existing_value, BtreeValue const& value) { diff --git a/src/lib/volume/volume.hpp b/src/lib/volume/volume.hpp index 08216f7..06d91d0 100644 --- a/src/lib/volume/volume.hpp +++ b/src/lib/volume/volume.hpp @@ -159,6 +159,12 @@ class Volume : public std::enable_shared_from_this< Volume > { VolumeManager::NullAsyncResult read(const vol_interface_req_ptr& req); + bool can_remove() { return !destroy_started_ && outstanding_reqs_.test_eq(0); } + + void inc_ref(uint64_t n = 1) { outstanding_reqs_.increment(n); } + void dec_ref(uint64_t n = 1) { outstanding_reqs_.decrement(n); } + uint64_t num_outstanding_reqs() const { return outstanding_reqs_.get(); } + private: // // this API will be called to initialize volume in both volume creation and volume recovery; @@ -178,10 +184,14 @@ class Volume : public std::enable_shared_from_this< Volume > { VolumeManager::Result< folly::Unit > read_from_index(const vol_interface_req_ptr& req, index_kv_list_t& index_kvs); private: - VolumeInfoPtr vol_info_; - ReplDevPtr rd_; - VolIdxTablePtr indx_tbl_; - superblk< vol_sb_t > sb_; + VolumeInfoPtr vol_info_; // volume info + ReplDevPtr rd_; // replication device for this volume, which provides read/write APIs to the volume; + VolIdxTablePtr indx_tbl_; // index table for this volume + superblk< vol_sb_t > sb_; // meta data of the volume + + sisl::atomic_counter< uint64_t > outstanding_reqs_{0}; // number of outstanding requests + bool destroy_started_{ + false}; // indicates if volume destroy has started, avoid destroy to be executed more than once. }; struct vol_repl_ctx : public homestore::repl_req_ctx { diff --git a/src/lib/volume_mgr.cpp b/src/lib/volume_mgr.cpp index 518ac8a..3aea49a 100644 --- a/src/lib/volume_mgr.cpp +++ b/src/lib/volume_mgr.cpp @@ -97,10 +97,19 @@ VolumeManager::NullAsyncResult HomeBlocksImpl::remove_volume(const volume_id_t& VolumePtr vol_ptr = nullptr; { auto lg = std::scoped_lock(vol_lock_); - if (auto it = vol_map_.find(id); it != vol_map_.end()) { vol_ptr = it->second; } + if (auto it = vol_map_.find(id); it != vol_map_.end()) { + vol_ptr = it->second; + } else { + LOGWARN("Volume with id {} not found, cannot remove", boost::uuids::to_string(id)); + return folly::Unit(); + } } - if (vol_ptr) { + vol_ptr->state_change(vol_state::DESTROYING); + + // if vol is already started with destroy or there is any outstanding reqs on the vol, we will not do anything + // on this vol and let reaper thread to handle it + if (vol_ptr->can_remove()) { // 2. do volume destroy; vol_ptr->destroy(); #ifdef _PRERELEASE @@ -114,7 +123,13 @@ VolumeManager::NullAsyncResult HomeBlocksImpl::remove_volume(const volume_id_t& LOGINFO("Volume {} removed successfully", vol_ptr->id_str()); } else { - LOGWARN("remove_volume with input id: {} not found", boost::uuids::to_string(id)); + if (vol_ptr) { + LOGD("Volume {} is in destroying state or has outstanding requests: {}, backing off and wait for GC to " + "cleanup.", + vol_ptr->id_str(), vol_ptr->num_outstanding_reqs()); + } else { + LOGWARN("Volume with id {} not found, cannot remove", boost::uuids::to_string(id)); + } } // Volume Destructor will be called after vol_ptr goes out of scope; return folly::Unit(); @@ -133,19 +148,63 @@ bool HomeBlocksImpl::get_stats(volume_id_t id, VolumeStats& stats) const { retur void HomeBlocksImpl::get_volume_ids(std::vector< volume_id_t >& vol_ids) const {} -VolumeManager::NullAsyncResult HomeBlocksImpl::write(const VolumePtr& vol_ptr, const vol_interface_req_ptr& vol_req) { - return vol_ptr->write(vol_req); +VolumeManager::NullAsyncResult HomeBlocksImpl::write(const VolumePtr& vol, const vol_interface_req_ptr& vol_req) { + if (vol->is_destroying()) { + LOGE("Volume {} is in destroying state, cannot write", vol->id_str()); + return folly::makeUnexpected(VolumeError::UNSUPPORTED_OP); + } + + vol->inc_ref(); + +#ifdef _PRERELEASE + if (delay_fake_io(vol)) { + // If we are delaying IO, we return immediately without calling vol->write + // and let the delay flip handle the completion later. + return folly::Unit(); + } +#endif + + auto ret = vol->write(vol_req); + vol->dec_ref(); + + return ret; } VolumeManager::NullAsyncResult HomeBlocksImpl::read(const VolumePtr& vol, const vol_interface_req_ptr& req) { - return vol->read(req); + if (vol->is_destroying()) { + LOGE("Volume {} is in destroying state, cannot read", vol->id_str()); + return folly::makeUnexpected(VolumeError::UNSUPPORTED_OP); + } + + vol->inc_ref(); + +#ifdef _PRERELEASE + if (delay_fake_io(vol)) { + // If we are delaying IO, we return immediately without calling vol->read + // and let the delay flip handle the completion later. + return folly::Unit(); + } +#endif + + auto ret = vol->read(req); + vol->dec_ref(); + return ret; } VolumeManager::NullAsyncResult HomeBlocksImpl::unmap(const VolumePtr& vol, const vol_interface_req_ptr& req) { - RELEASE_ASSERT(false, "Unmap Not implemented"); + LOGWARN("Unmap to vol: {} not implemented", vol->id_str()); + if (vol->is_destroying()) { + LOGE("Volume {} is in destroying state, cannot unmap", vol->id_str()); + return folly::makeUnexpected(VolumeError::UNSUPPORTED_OP); + } + return folly::Unit(); } +// +// we have to allow submit_io_batch even though a volume is in destroying state, because destroy relies on outstanding +// IOs to decrease to zero to proceed, e.g. submit_io_batch will allow outstanding io to complete; +// void HomeBlocksImpl::submit_io_batch() { homestore::data_service().submit_io_batch(); } void HomeBlocksImpl::on_write(int64_t lsn, const sisl::blob& header, const sisl::blob& key, @@ -205,4 +264,17 @@ void HomeBlocksImpl::on_write(int64_t lsn, const sisl::blob& header, const sisl: if (repl_ctx) { repl_ctx->promise_.setValue(folly::Unit()); } } +#ifdef _PRERELEASE +bool HomeBlocksImpl::delay_fake_io(VolumePtr v) { + if (iomgr_flip::instance()->delay_flip("vol_fake_io_delay_simulation", [this, v]() mutable { + LOGI("Resuming fake IO delay flip is done. Do nothing "); + v->dec_ref(); + })) { + LOGI("Slow down vol fake IO flip is enabled, scheduling to call later."); + return true; + } + return false; +} +#endif + } // namespace homeblocks