@@ -171,6 +171,8 @@ int NVMeofGwMap::cfg_delete_gw(
171171 << state.availability << " Resulting GW availability: "
172172 << state.availability << dendl;
173173 state.subsystems .clear ();// ignore subsystems of this GW
174+ utime_t now = ceph_clock_now ();
175+ mon->nvmegwmon ()->gws_deleting_time [group_key][gw_id] = now;
174176 return 0 ;
175177 }
176178 }
@@ -895,10 +897,12 @@ struct CMonRequestProposal : public Context {
895897 }
896898};
897899
898- void NVMeofGwMap::get_health_checks (health_check_map_t *checks) const
900+ void NVMeofGwMap::get_health_checks (health_check_map_t *checks)
899901{
900902 list<string> singleGatewayDetail;
901903 list<string> gatewayDownDetail;
904+ list<string> gatewayInDeletingDetail;
905+ int deleting_gateways = 0 ;
902906 for (const auto & created_map_pair: created_gws) {
903907 const auto & group_key = created_map_pair.first ;
904908 auto & group = group_key.second ;
@@ -915,9 +919,37 @@ void NVMeofGwMap::get_health_checks(health_check_map_t *checks) const
915919 ostringstream ss;
916920 ss << " NVMeoF Gateway '" << gw_id << " ' is unavailable." ;
917921 gatewayDownDetail.push_back (ss.str ());
922+ } else if (gw_created.availability == gw_availability_t ::GW_DELETING) {
923+ deleting_gateways++;
924+ utime_t now = ceph_clock_now ();
925+ bool found_deleting_time = false ;
926+ auto gws_deleting_time = mon->nvmegwmon ()->gws_deleting_time ;
927+ auto group_it = gws_deleting_time.find (group_key);
928+ if (group_it != gws_deleting_time.end ()) {
929+ auto & gw_map = group_it->second ;
930+ auto gw_it = gw_map.find (gw_id);
931+ if (gw_it != gw_map.end ()) {
932+ found_deleting_time = true ;
933+ utime_t delete_time = gw_it->second ;
934+ if ((now - delete_time) > g_conf ().get_val <std::chrono::seconds>(" mon_nvmeofgw_delete_grace" ).count ()) {
935+ ostringstream ss;
936+ ss << " NVMeoF Gateway '" << gw_id << " ' is in deleting state." ;
937+ gatewayInDeletingDetail.push_back (ss.str ());
938+ }
939+ }
940+ }
941+ if (!found_deleting_time) {
942+ // DELETING gateway not found in gws_deleting_time, set timeout now
943+ mon->nvmegwmon ()->gws_deleting_time [group_key][gw_id] = now;
944+ }
918945 }
919946 }
920947 }
948+ if (deleting_gateways == 0 ) {
949+ // no gateway in GW_DELETING state currently, flush old gws_deleting_time
950+ mon->nvmegwmon ()->gws_deleting_time .clear ();
951+ }
952+
921953 if (!singleGatewayDetail.empty ()) {
922954 ostringstream ss;
923955 ss << singleGatewayDetail.size () << " group(s) have only 1 nvmeof gateway"
@@ -934,6 +966,15 @@ void NVMeofGwMap::get_health_checks(health_check_map_t *checks) const
934966 ss.str (), gatewayDownDetail.size ());
935967 d.detail .swap (gatewayDownDetail);
936968 }
969+ if (!gatewayInDeletingDetail.empty ()) {
970+ ostringstream ss;
971+ ss << gatewayInDeletingDetail.size () << " gateway(s) are in deleting state"
972+ << " ; namespaces are automatically balanced across remaining gateways, "
973+ << " this should take a few minutes." ;
974+ auto & d = checks->add (" NVMEOF_GATEWAY_DELETING" , HEALTH_WARN,
975+ ss.str (), gatewayInDeletingDetail.size ());
976+ d.detail .swap (gatewayInDeletingDetail);
977+ }
937978}
938979
939980int NVMeofGwMap::blocklist_gw (
0 commit comments