Skip to content

Commit 11d962d

Browse files
authored
Merge pull request ceph#57697 from petrutlucian94/sv_wait
rbd-wnbd: wait for the disk cleanup to complete Reviewed-by: Ilya Dryomov <[email protected]>
2 parents 131effd + 9136cbd commit 11d962d

File tree

5 files changed

+149
-78
lines changed

5 files changed

+149
-78
lines changed

src/common/win32/service.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ void ServiceBase::shutdown(bool ignore_errors)
8686
DWORD original_state = status.dwCurrentState;
8787
set_status(SERVICE_STOP_PENDING);
8888

89+
dout(0) << "Shutdown requested." << dendl;
90+
8991
int err = shutdown_hook();
9092
if (err) {
9193
derr << "Shutdown service hook failed. Error code: " << err << dendl;
@@ -108,6 +110,8 @@ void ServiceBase::stop()
108110
DWORD original_state = status.dwCurrentState;
109111
set_status(SERVICE_STOP_PENDING);
110112

113+
dout(0) << "Service stop requested." << dendl;
114+
111115
int err = stop_hook();
112116
if (err) {
113117
derr << "Service stop hook failed. Error code: " << err << dendl;

src/tools/rbd_wnbd/rbd_mapping.cc

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,12 @@ int RbdMappingDispatcher::create(Config& cfg)
232232
return -EEXIST;
233233
}
234234

235+
if (stop_requested) {
236+
derr << "service stop requested, refusing to create new mapping."
237+
<< dendl;
238+
return -ESHUTDOWN;
239+
}
240+
235241
auto rbd_mapping = std::make_shared<RbdMapping>(
236242
cfg, client_cache,
237243
std::bind(
@@ -282,3 +288,129 @@ void RbdMappingDispatcher::disconnect_cbk(std::string devpath, int ret)
282288
mappings.erase(devpath);
283289
}
284290
}
291+
292+
int RbdMappingDispatcher::stop(
293+
bool hard_disconnect,
294+
int soft_disconnect_timeout,
295+
int worker_count)
296+
{
297+
stop_requested = true;
298+
299+
// Although not generally recommended, soft_disconnect_timeout can be 0,
300+
// which means infinite timeout.
301+
ceph_assert(soft_disconnect_timeout >= 0);
302+
ceph_assert(worker_count > 0);
303+
304+
std::atomic<int> err = 0;
305+
306+
boost::asio::thread_pool pool(worker_count);
307+
LARGE_INTEGER start_t, counter_freq;
308+
QueryPerformanceFrequency(&counter_freq);
309+
QueryPerformanceCounter(&start_t);
310+
311+
// We're initiating the disk removal through libwnbd, which uses PNP
312+
// to notify the storage stack that the disk is going to be removed
313+
// and waits for pending operations to complete.
314+
//
315+
// Once ready, the WNBD driver notifies rbd-wnbd that the disk has been
316+
// disconnected.
317+
auto mapped_devpaths = get_mapped_devpaths();
318+
for (const auto& devpath: mapped_devpaths) {
319+
boost::asio::post(pool,
320+
[devpath, start_t, counter_freq, soft_disconnect_timeout,
321+
hard_disconnect, &err]()
322+
{
323+
LARGE_INTEGER curr_t, elapsed_ms;
324+
QueryPerformanceCounter(&curr_t);
325+
elapsed_ms.QuadPart = curr_t.QuadPart - start_t.QuadPart;
326+
elapsed_ms.QuadPart *= 1000;
327+
elapsed_ms.QuadPart /= counter_freq.QuadPart;
328+
329+
int64_t time_left_ms = std::max(
330+
(int64_t)0,
331+
soft_disconnect_timeout * 1000 - elapsed_ms.QuadPart);
332+
333+
dout(1) << "Removing mapping: " << devpath
334+
<< ". Timeout: " << time_left_ms
335+
<< "ms. Hard disconnect: " << hard_disconnect
336+
<< dendl;
337+
338+
WNBD_REMOVE_OPTIONS remove_options = {0};
339+
remove_options.Flags.HardRemove = hard_disconnect || !time_left_ms;
340+
remove_options.Flags.HardRemoveFallback = true;
341+
remove_options.SoftRemoveTimeoutMs = time_left_ms;
342+
remove_options.SoftRemoveRetryIntervalMs = SOFT_REMOVE_RETRY_INTERVAL * 1000;
343+
344+
// This is asynchronous, it may take a few seconds for the disk to be
345+
// removed. We'll perform the wait outside the loop to speed up the
346+
// process.
347+
int r = WnbdRemoveEx(devpath.c_str(), &remove_options);
348+
if (r && r != ERROR_FILE_NOT_FOUND) {
349+
err = -EINVAL;
350+
derr << "Could not initiate mapping removal: " << devpath
351+
<< ". Error: " << win32_strerror(r) << dendl;
352+
} else {
353+
dout(1) << "Successfully initiated mapping removal: " << devpath << dendl;
354+
}
355+
});
356+
}
357+
pool.join();
358+
359+
if (err) {
360+
derr << "Could not initiate removal of all mappings. Error: " << err << dendl;
361+
return err;
362+
}
363+
364+
// Wait for the cleanup to complete on the rbd-wnbd service side.
365+
return wait_for_mappings_removal(10000);
366+
}
367+
368+
std::vector<std::string> RbdMappingDispatcher::get_mapped_devpaths() {
369+
std::vector<std::string> out;
370+
std::unique_lock l{map_mutex};
371+
372+
for (auto it = mappings.begin(); it != mappings.end(); it++) {
373+
out.push_back(it->first);
374+
}
375+
376+
return out;
377+
}
378+
379+
int RbdMappingDispatcher::get_mappings_count() {
380+
std::unique_lock l{map_mutex};
381+
return mappings.size();
382+
}
383+
384+
int RbdMappingDispatcher::wait_for_mappings_removal(int timeout_ms) {
385+
LARGE_INTEGER start_t, counter_freq;
386+
QueryPerformanceFrequency(&counter_freq);
387+
QueryPerformanceCounter(&start_t);
388+
389+
int time_left_ms = timeout_ms;
390+
int mappings_count = get_mappings_count();
391+
392+
while (mappings_count && time_left_ms > 0) {
393+
dout(1) << "Waiting for " << mappings_count << " mapping(s) to be removed. "
394+
<< "Time left: " << time_left_ms << "ms." << dendl;
395+
Sleep(2000);
396+
397+
LARGE_INTEGER curr_t, elapsed_ms;
398+
QueryPerformanceCounter(&curr_t);
399+
elapsed_ms.QuadPart = curr_t.QuadPart - start_t.QuadPart;
400+
elapsed_ms.QuadPart *= 1000;
401+
elapsed_ms.QuadPart /= counter_freq.QuadPart;
402+
403+
time_left_ms = timeout_ms - elapsed_ms.QuadPart;
404+
mappings_count = get_mappings_count();
405+
}
406+
407+
if (mappings_count) {
408+
derr << "Timed out waiting for disk mappings to be removed. "
409+
<< "Remaining mapping(s): " << mappings_count << dendl;
410+
return -ETIMEDOUT;
411+
} else {
412+
dout(1) << "Successfully removed all mappings." << dendl;
413+
}
414+
415+
return 0;
416+
}

src/tools/rbd_wnbd/rbd_mapping.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,12 @@ class RbdMappingDispatcher
107107
std::map<std::string, std::shared_ptr<RbdMapping>> mappings;
108108
ceph::mutex map_mutex = ceph::make_mutex("RbdMappingDispatcher::MapMutex");
109109

110+
std::atomic<bool> stop_requested = false;
111+
110112
void disconnect_cbk(std::string devpath, int ret);
113+
int wait_for_mappings_removal(int timeout_ms);
114+
std::vector<std::string> get_mapped_devpaths();
115+
int get_mappings_count();
111116

112117
public:
113118
RbdMappingDispatcher(RadosClientCache& _client_cache)
@@ -116,4 +121,7 @@ class RbdMappingDispatcher
116121

117122
int create(Config& cfg);
118123
std::shared_ptr<RbdMapping> get_mapping(std::string& devpath);
124+
int stop(bool hard_disconnect,
125+
int soft_disconnect_timeout,
126+
int worker_count);
119127
};

src/tools/rbd_wnbd/rbd_wnbd.cc

Lines changed: 5 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -482,74 +482,6 @@ int restart_registered_mappings(
482482
return err;
483483
}
484484

485-
int disconnect_all_mappings(
486-
bool unregister,
487-
bool hard_disconnect,
488-
int soft_disconnect_timeout,
489-
int worker_count)
490-
{
491-
// Although not generally recommended, soft_disconnect_timeout can be 0,
492-
// which means infinite timeout.
493-
ceph_assert(soft_disconnect_timeout >= 0);
494-
ceph_assert(worker_count > 0);
495-
int64_t timeout_ms = soft_disconnect_timeout * 1000;
496-
497-
Config cfg;
498-
WNBDActiveDiskIterator iterator;
499-
int r;
500-
std::atomic<int> err = 0;
501-
502-
boost::asio::thread_pool pool(worker_count);
503-
LARGE_INTEGER start_t, counter_freq;
504-
QueryPerformanceFrequency(&counter_freq);
505-
QueryPerformanceCounter(&start_t);
506-
while (iterator.get(&cfg)) {
507-
boost::asio::post(pool,
508-
[cfg, start_t, counter_freq, timeout_ms,
509-
hard_disconnect, unregister, &err]() mutable
510-
{
511-
LARGE_INTEGER curr_t, elapsed_ms;
512-
QueryPerformanceCounter(&curr_t);
513-
elapsed_ms.QuadPart = curr_t.QuadPart - start_t.QuadPart;
514-
elapsed_ms.QuadPart *= 1000;
515-
elapsed_ms.QuadPart /= counter_freq.QuadPart;
516-
517-
int64_t time_left_ms = max((int64_t)0, timeout_ms - elapsed_ms.QuadPart);
518-
519-
cfg.hard_disconnect = hard_disconnect || !time_left_ms;
520-
cfg.hard_disconnect_fallback = true;
521-
cfg.soft_disconnect_timeout = time_left_ms / 1000;
522-
523-
dout(1) << "Removing mapping: " << cfg.devpath
524-
<< ". Timeout: " << cfg.soft_disconnect_timeout
525-
<< "s. Hard disconnect: " << cfg.hard_disconnect
526-
<< dendl;
527-
528-
int r = do_unmap(&cfg, unregister);
529-
if (r) {
530-
err = r;
531-
derr << "Could not remove mapping: " << cfg.devpath
532-
<< ". Error: " << r << dendl;
533-
} else {
534-
dout(1) << "Successfully removed mapping: " << cfg.devpath << dendl;
535-
}
536-
});
537-
}
538-
pool.join();
539-
540-
r = iterator.get_error();
541-
if (r == -ENOENT) {
542-
dout(0) << __func__ << ": wnbd adapter unavailable, "
543-
<< "assuming that no wnbd mappings exist." << dendl;
544-
err = 0;
545-
} else if (r) {
546-
derr << "Could not fetch all mappings. Error: " << r << dendl;
547-
err = r;
548-
}
549-
550-
return err;
551-
}
552-
553485
class RBDService : public ServiceBase {
554486
private:
555487
bool hard_disconnect;
@@ -565,7 +497,7 @@ class RBDService : public ServiceBase {
565497
ceph::mutex start_hook_lock = ceph::make_mutex("RBDService::StartLocker");
566498
ceph::mutex stop_hook_lock = ceph::make_mutex("RBDService::ShutdownLocker");
567499
bool started = false;
568-
std::atomic<bool> stop_requsted = false;
500+
std::atomic<bool> stop_requested = false;
569501

570502
public:
571503
RBDService(bool _hard_disconnect,
@@ -743,7 +675,7 @@ class RBDService : public ServiceBase {
743675
dout(0) << "monitoring wnbd adapter state changes" << dendl;
744676
// The event watcher will wait at most WMI_EVENT_TIMEOUT (2s)
745677
// and exit the loop if the service is being stopped.
746-
while (!stop_requsted) {
678+
while (!stop_requested) {
747679
IWbemClassObject* object;
748680
ULONG returned = 0;
749681

@@ -826,10 +758,10 @@ class RBDService : public ServiceBase {
826758
int stop_hook() override {
827759
std::unique_lock l{stop_hook_lock};
828760

829-
stop_requsted = true;
761+
stop_requested = true;
830762

831-
int r = disconnect_all_mappings(
832-
false, hard_disconnect, soft_disconnect_timeout, thread_count);
763+
int r = mapping_dispatcher.stop(
764+
hard_disconnect, soft_disconnect_timeout, thread_count);
833765

834766
if (adapter_monitor_thread.joinable()) {
835767
dout(10) << "waiting for wnbd event monitor thread" << dendl;

src/tools/rbd_wnbd/rbd_wnbd.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,6 @@ typedef struct {
5555
bool is_process_running(DWORD pid);
5656
void unmap_at_exit();
5757

58-
int disconnect_all_mappings(
59-
bool unregister,
60-
bool hard_disconnect,
61-
int soft_disconnect_timeout,
62-
int worker_count);
6358
int restart_registered_mappings(
6459
int worker_count, int total_timeout, int image_map_timeout);
6560
int map_device_using_same_process(std::string command_line);

0 commit comments

Comments
 (0)