Skip to content

Commit a7166eb

Browse files
samarahupritha-srivastava
authored andcommitted
rgw/d4n: making LFUDAPolicy destructor lock free to prevent
deadlocks during rgw shutdown. Signed-off-by: Pritha Srivastava <[email protected]> Signed-off-by: Samarah <[email protected]>
1 parent 142ffde commit a7166eb

File tree

2 files changed

+57
-58
lines changed

2 files changed

+57
-58
lines changed

src/rgw/driver/d4n/d4n_policy.cc

Lines changed: 55 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,8 @@ int LFUDAPolicy::init(CephContext* cct, const DoutPrefixProvider* dpp, asio::io_
6767

6868
driver = _driver;
6969
if (dpp->get_cct()->_conf->d4n_writecache_enabled) {
70+
quit = false;
7071
tc = std::thread(&CachePolicy::cleaning, this, dpp);
71-
tc.detach();
7272
}
7373

7474
try {
@@ -624,44 +624,44 @@ void LFUDAPolicy::cleaning(const DoutPrefixProvider* dpp)
624624
ldpp_dout(dpp, 10) << __func__ << "(): State is INVALID; deleting object." << dendl;
625625
int ret = -1;
626626
//check if key exists and get the refcount of block, if greater than zero then modify the creationTime of dirty object to attempt to delete later
627-
std::unique_lock<std::mutex> ll(lfuda_lock);
628-
auto it = entries_map.find(e->key);
629-
if (it != entries_map.end()) {
630-
if (it->second->refcount > 0) {
631-
l.lock();
632-
//deferring the deletion of the invalid object
633-
(*e->handle)->creationTime = (*e->handle)->creationTime + interval/2;
634-
ldpp_dout(dpp, 20) << "LFUDAPolicy::" << __func__ << "(): updated creation time is: " << (*e->handle)->creationTime << dendl;
635-
object_heap.update(e->handle);
636-
l.unlock();
637-
continue;
638-
}
639-
ll.unlock();
640-
if ((ret = cacheDriver->delete_data(dpp, e->key, y)) == 0) {
641-
if (!(ret = erase(dpp, e->key, y))) {
642-
ldpp_dout(dpp, 0) << "Failed to delete head policy entry for: " << e->key << ", ret=" << ret << dendl; // TODO: what must occur during failure?
643-
}
644-
} else {
645-
ldpp_dout(dpp, 0) << "Failed to delete head object for: " << e->key << ", ret=" << ret << dendl;
646-
}
647-
} else {
648-
//ignore if block not found, as it could have been deleted earlier when refcount for it was 0
649-
ll.unlock();
650-
}
627+
std::unique_lock<std::mutex> ll(lfuda_lock);
628+
auto it = entries_map.find(e->key);
629+
if (it != entries_map.end()) {
630+
if (it->second->refcount > 0) {
631+
l.lock();
632+
//deferring the deletion of the invalid object
633+
(*e->handle)->creationTime = (*e->handle)->creationTime + interval/2;
634+
ldpp_dout(dpp, 20) << "LFUDAPolicy::" << __func__ << "(): updated creation time is: " << (*e->handle)->creationTime << dendl;
635+
object_heap.update(e->handle);
636+
l.unlock();
637+
continue;
638+
}
639+
ll.unlock();
640+
if ((ret = cacheDriver->delete_data(dpp, e->key, y)) == 0) {
641+
if (!(ret = erase(dpp, e->key, y))) {
642+
ldpp_dout(dpp, 0) << "Failed to delete head policy entry for: " << e->key << ", ret=" << ret << dendl; // TODO: what must occur during failure?
643+
}
644+
} else {
645+
ldpp_dout(dpp, 0) << "Failed to delete head object for: " << e->key << ", ret=" << ret << dendl;
646+
}
647+
} else {
648+
//ignore if block not found, as it could have been deleted earlier when refcount for it was 0
649+
ll.unlock();
650+
}
651651

652652
if (!e->delete_marker) {
653653
ret = delete_data_blocks(dpp, e, y);
654654
if (ret == 0) {
655655
erase_dirty_object(dpp, e->key, null_yield);
656656
} else if (ret == -EBUSY) {
657-
l.lock();
658-
//deferring the deletion of the invalid object
659-
(*e->handle)->creationTime = (*e->handle)->creationTime + interval/2;
660-
ldpp_dout(dpp, 20) << "LFUDAPolicy::" << __func__ << "(): updated creation time is: " << (*e->handle)->creationTime << dendl;
661-
object_heap.update(e->handle);
662-
l.unlock();
663-
continue;
664-
} else {
657+
l.lock();
658+
//deferring the deletion of the invalid object
659+
(*e->handle)->creationTime = (*e->handle)->creationTime + interval/2;
660+
ldpp_dout(dpp, 20) << "LFUDAPolicy::" << __func__ << "(): updated creation time is: " << (*e->handle)->creationTime << dendl;
661+
object_heap.update(e->handle);
662+
l.unlock();
663+
continue;
664+
} else {
665665
ldpp_dout(dpp, 0) << "Failed to delete blocks for: " << e->key << ", ret=" << ret << dendl;
666666
}
667667
}
@@ -736,7 +736,7 @@ void LFUDAPolicy::cleaning(const DoutPrefixProvider* dpp)
736736

737737
op_ret = processor->prepare(null_yield);
738738
if (op_ret < 0) {
739-
ldpp_dout(dpp, 20) << __func__ << "processor->prepare() returned ret=" << op_ret << dendl;
739+
ldpp_dout(dpp, 20) << __func__ << "processor->prepare() returned ret=" << op_ret << dendl;
740740
erase_dirty_object(dpp, e->key, null_yield);
741741
continue;
742742
}
@@ -765,7 +765,7 @@ void LFUDAPolicy::cleaning(const DoutPrefixProvider* dpp)
765765
do {
766766
ceph::bufferlist data;
767767
if (fst >= lst){
768-
break;
768+
break;
769769
}
770770
off_t cur_size = std::min<off_t>(fst + dpp->get_cct()->_conf->rgw_max_chunk_size, lst);
771771
off_t cur_len = cur_size - fst;
@@ -788,8 +788,7 @@ void LFUDAPolicy::cleaning(const DoutPrefixProvider* dpp)
788788

789789
op_ret = filter->process(std::move(data), ofs);
790790
if (op_ret < 0) {
791-
ldpp_dout(dpp, 20) << __func__ << "processor->process() returned ret="
792-
<< op_ret << dendl;
791+
ldpp_dout(dpp, 20) << __func__ << "processor->process() returned ret=" << op_ret << dendl;
793792
erase_dirty_object(dpp, e->key, null_yield);
794793
continue;
795794
}
@@ -833,13 +832,13 @@ void LFUDAPolicy::cleaning(const DoutPrefixProvider* dpp)
833832
block.size = cur_len;
834833
block.blockID = fst;
835834
if ((op_ret = cacheDriver->set_attr(dpp, oid_in_cache, RGW_CACHE_ATTR_DIRTY, "0", y)) == 0) {
836-
std::string dirty = "false";
837-
op_ret = blockDir->update_field(dpp, &block, "dirty", dirty, null_yield);
838-
if (op_ret < 0) {
839-
ldpp_dout(dpp, 0) << __func__ << "updating dirty flag in block directory failed, ret=" << op_ret << dendl;
840-
}
835+
std::string dirty = "false";
836+
op_ret = blockDir->update_field(dpp, &block, "dirty", dirty, null_yield);
837+
if (op_ret < 0) {
838+
ldpp_dout(dpp, 0) << __func__ << "updating dirty flag in block directory failed, ret=" << op_ret << dendl;
839+
}
841840
} else {
842-
ldpp_dout(dpp, 0) << __func__ << "(): Failed to update dirty xattr in cache, ret=" << op_ret << dendl;
841+
ldpp_dout(dpp, 0) << __func__ << "(): Failed to update dirty xattr in cache, ret=" << op_ret << dendl;
843842
}
844843

845844
fst += cur_len;
@@ -878,7 +877,7 @@ void LFUDAPolicy::cleaning(const DoutPrefixProvider* dpp)
878877
//hash entry for null block
879878
op_ret = blockDir->get(dpp, &null_block, y);
880879
if (op_ret < 0) {
881-
ldpp_dout(dpp, 0) << __func__ << "(): Failed to get latest entry in block directory for: " << null_block.cacheObj.objName << ", ret=" << ret << dendl;
880+
ldpp_dout(dpp, 0) << __func__ << "(): Failed to get latest entry in block directory for: " << null_block.cacheObj.objName << ", ret=" << ret << dendl;
882881
} else {
883882
if (null_block.version == e->version) {
884883
block.cacheObj.dirty = false;
@@ -917,7 +916,7 @@ void LFUDAPolicy::cleaning(const DoutPrefixProvider* dpp)
917916
std::string dirty = "false";
918917
op_ret = blockDir->update_field(dpp, &instance_block, "dirty", dirty, null_yield);
919918
if (op_ret < 0) {
920-
ldpp_dout(dpp, 20) << __func__ << "updating dirty flag in block directory for instance block failed!" << dendl;
919+
ldpp_dout(dpp, 20) << __func__ << "updating dirty flag in block directory for instance block failed!" << dendl;
921920
}
922921
//the next steps remove the entry from the ordered set and if needed the latest hash entry also in case of versioned buckets
923922
rgw::d4n::CacheBlock latest_block = block;
@@ -933,19 +932,19 @@ void LFUDAPolicy::cleaning(const DoutPrefixProvider* dpp)
933932
if (latest_block.version == e->version) {
934933
//remove object entry from ordered set of versions
935934
if (c_obj->have_instance()) {
936-
blockDir->del(dpp, &latest_block, y);
937-
if (ret < 0) {
938-
ldpp_dout(dpp, 0) << __func__ << "(): Failed to queue del for latest hash entry: " << latest_block.cacheObj.objName << ", ret=" << ret << dendl;
939-
continue;
935+
blockDir->del(dpp, &latest_block, y);
936+
if (ret < 0) {
937+
ldpp_dout(dpp, 0) << __func__ << "(): Failed to queue del for latest hash entry: " << latest_block.cacheObj.objName << ", ret=" << ret << dendl;
938+
continue;
940939
}
941940
}
942-
//delete entry from ordered set of objects, as older versions would have been written to the backend store
943-
ret = bucketDir->zrem(dpp, e->bucket_id, c_obj->get_name(), y, true);
944-
if (ret < 0) {
945-
blockDir->discard(dpp, y);
946-
ldpp_dout(dpp, 0) << __func__ << "(): Failed to queue zrem for object entry: " << c_obj->get_name() << ", ret=" << ret << dendl;
947-
continue;
948-
}
941+
//delete entry from ordered set of objects, as older versions would have been written to the backend store
942+
ret = bucketDir->zrem(dpp, e->bucket_id, c_obj->get_name(), y, true);
943+
if (ret < 0) {
944+
blockDir->discard(dpp, y);
945+
ldpp_dout(dpp, 0) << __func__ << "(): Failed to queue zrem for object entry: " << c_obj->get_name() << ", ret=" << ret << dendl;
946+
continue;
947+
}
949948
}
950949
ldpp_dout(dpp, 10) << "D4NFilterObject::" << __func__ << "(): Removing object name: "<< c_obj->get_name() << " score: " << std::setprecision(std::numeric_limits<double>::max_digits10) << e->creationTime << " from ordered set" << dendl;
951950
rgw::d4n::CacheObj dir_obj = rgw::d4n::CacheObj{

src/rgw/driver/d4n/d4n_policy.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ class LFUDAPolicy : public CachePolicy {
149149
std::mutex lfuda_cleaning_lock;
150150
std::condition_variable cond;
151151
std::condition_variable state_cond;
152-
bool quit{false};
152+
inline static std::atomic<bool> quit{false};
153153

154154
int age = 1, weightSum = 0, postedSum = 0;
155155
optional_yield y = null_yield;
@@ -195,9 +195,9 @@ class LFUDAPolicy : public CachePolicy {
195195
delete bucketDir;
196196
delete blockDir;
197197
delete objDir;
198-
std::lock_guard l(lfuda_cleaning_lock);
199198
quit = true;
200199
cond.notify_all();
200+
if (tc.joinable()) { tc.join(); }
201201
}
202202

203203
virtual int init(CephContext *cct, const DoutPrefixProvider* dpp, asio::io_context& io_context, rgw::sal::Driver *_driver);

0 commit comments

Comments
 (0)