@@ -37,9 +37,15 @@ void NVMeofGwMap::to_gmap(
3737 for (const auto & gw_created_pair: gw_created_map) {
3838 const auto & gw_id = gw_created_pair.first ;
3939 const auto & gw_created = gw_created_pair.second ;
40+ gw_availability_t availability = gw_created.availability ;
41+ // Gateways expect to see UNAVAILABLE, not DELETING
42+ // for entries in DELETING state
43+ if (gw_created.availability == gw_availability_t ::GW_DELETING) {
44+ availability = gw_availability_t ::GW_UNAVAILABLE;
45+ }
4046
4147 auto gw_state = NvmeGwClientState (
42- gw_created.ana_grp_id , epoch, gw_created. availability );
48+ gw_created.ana_grp_id , epoch, availability);
4349 for (const auto & sub: gw_created.subsystems ) {
4450 gw_state.subsystems .insert ({
4551 sub.nqn ,
@@ -78,10 +84,36 @@ int NVMeofGwMap::cfg_add_gw(
7884 for (auto & itr: created_gws[group_key]) {
7985 allocated.insert (itr.second .ana_grp_id );
8086 if (itr.first == gw_id) {
81- dout (1 ) << __func__ << " ERROR create GW: already exists in map "
82- << gw_id << dendl;
83- return -EEXIST ;
87+ if (itr.second .availability != gw_availability_t ::GW_DELETING) {
88+ dout (1 ) << __func__ << " ERROR create GW: already exists in map "
89+ << gw_id << dendl;
90+ return -EEXIST;
91+ } else {
92+ // this GW exists in the map in "Deleting" state
93+ // but user again creates it - need just set attribute values
94+ created_gws[group_key][gw_id].performed_full_startup = true ;
95+ created_gws[group_key][gw_id].availability
96+ = gw_availability_t ::GW_CREATED;
97+ dout (4 ) << " GW in Deleting state " << gw_id
98+ << " was created again" << dendl;
99+ return 0 ;
100+ }
84101 }
102+ if (itr.second .availability == gw_availability_t ::GW_DELETING) {
103+ // Was found some GW in "Deleting" state. Just to inherit its ANA group
104+ NvmeGwMonState & gw_created = created_gws[group_key][itr.first ];
105+ created_gws[group_key][gw_id] = gw_created;
106+ // Deep copy of all data of "Deleting" GW
107+ created_gws[group_key][gw_id].performed_full_startup = true ;
108+ created_gws[group_key][gw_id].availability
109+ = gw_availability_t ::GW_CREATED;
110+ dout (4 ) << " Created GW inherits ANA group of deleting GW-id :"
111+ << itr.first << " group " << itr.second .ana_grp_id << dendl;
112+ do_erase_gw_id (itr.first , group_key);
113+ dout (4 ) << " Created GWS after create/delete: "
114+ << created_gws << dendl;
115+ return 0 ;
116+ }
85117 }
86118 if (allocated.size () == MAX_SUPPORTED_ANA_GROUPS) {
87119 dout (4 ) << " Warning: cannot add GW " << gw_id
@@ -125,7 +157,40 @@ int NVMeofGwMap::cfg_add_gw(
125157int NVMeofGwMap::cfg_delete_gw (
126158 const NvmeGwId &gw_id, const NvmeGroupKey& group_key)
127159{
128- int rc = 0 ;
160+ if (HAVE_FEATURE (mon->get_quorum_con_features (), NVMEOFHA)) {
161+ dout (10 ) << " has NVMEOFHA: 1" << dendl;
162+ for (auto & gws_states: created_gws[group_key]) {
163+ if (gws_states.first == gw_id) {
164+ auto & state = gws_states.second ;
165+ state.availability = gw_availability_t ::GW_DELETING;
166+ dout (4 ) << " Deleting GW :" << gw_id << " in state "
167+ << state.availability << " Resulting GW availability: "
168+ << state.availability << dendl;
169+ return 0 ;
170+ }
171+ }
172+ } else {
173+ return do_delete_gw (gw_id, group_key);
174+ }
175+ return -EINVAL;
176+ }
177+
178+ int NVMeofGwMap::do_erase_gw_id (const NvmeGwId &gw_id,
179+ const NvmeGroupKey& group_key) {
180+
181+ fsm_timers[group_key].erase (gw_id);
182+ if (fsm_timers[group_key].size () == 0 )
183+ fsm_timers.erase (group_key);
184+
185+ created_gws[group_key].erase (gw_id);
186+ if (created_gws[group_key].size () == 0 )
187+ created_gws.erase (group_key);
188+ return 0 ;
189+ }
190+
191+ int NVMeofGwMap::do_delete_gw (
192+ const NvmeGwId &gw_id, const NvmeGroupKey& group_key)
193+ {
129194 for (auto & gws_states: created_gws[group_key]) {
130195
131196 if (gws_states.first == gw_id) {
@@ -136,26 +201,52 @@ int NVMeofGwMap::cfg_delete_gw(
136201 gw_id, group_key,state_itr.second , state_itr.first , modified);
137202 }
138203 dout (10 ) << " Delete GW :" << gw_id << " ANA grpid: "
139- << state.ana_grp_id << dendl;
204+ << state.ana_grp_id << dendl;
140205 for (auto & itr: created_gws[group_key]) {
141206 // Update state map and other maps
142207 remove_grp_id (itr.first , group_key, state.ana_grp_id );
143208 // of all created gateways. Removed key = anagrp
144209 }
145- fsm_timers[group_key].erase (gw_id);
146- if (fsm_timers[group_key].size () == 0 )
147- fsm_timers.erase (group_key);
148-
149- created_gws[group_key].erase (gw_id);
150- if (created_gws[group_key].size () == 0 )
151- created_gws.erase (group_key);
152- return rc;
210+ return do_erase_gw_id (gw_id, group_key);
153211 }
154212 }
155213
156214 return -EINVAL;
157215}
158216
217+ int NVMeofGwMap::get_num_namespaces (const NvmeGwId &gw_id,
218+ const NvmeGroupKey& group_key, const BeaconSubsystems& subs)
219+ {
220+ auto grpid = created_gws[group_key][gw_id].ana_grp_id ;
221+ int num_ns = 0 ;
222+ for (auto & subs_it:subs) {
223+ for (auto & ns :subs_it.namespaces ) {
224+ if (ns.anagrpid == (grpid+1 )) {
225+ num_ns++;
226+ }
227+ }
228+ }
229+ return num_ns;
230+ }
231+
232+ void NVMeofGwMap::track_deleting_gws (const NvmeGroupKey& group_key,
233+ const BeaconSubsystems& subs, bool &propose_pending)
234+ {
235+ propose_pending = false ;
236+ for (auto & itr: created_gws[group_key]) {
237+ auto &gw_id = itr.first ;
238+ if (itr.second .availability == gw_availability_t ::GW_DELETING) {
239+ int num_ns = 0 ;
240+ if ( (num_ns = get_num_namespaces (gw_id, group_key, subs)) == 0 ) {
241+ do_delete_gw (gw_id, group_key);
242+ propose_pending = true ;
243+ }
244+ dout (4 ) << " to delete ? " << gw_id << " num_ns " << num_ns << dendl;
245+ break ; // handle just one GW in "Deleting" state in time.
246+ }
247+ }
248+ }
249+
159250int NVMeofGwMap::process_gw_map_gw_down (
160251 const NvmeGwId &gw_id, const NvmeGroupKey& group_key, bool &propose_pending)
161252{
@@ -192,17 +283,8 @@ void NVMeofGwMap::process_gw_map_ka(
192283 dout (20 ) << " KA beacon from the GW " << gw_id
193284 << " in state " << (int )st.availability << dendl;
194285
195- if (st.availability == gw_availability_t ::GW_CREATED) {
196- // first time appears - allow IO traffic for this GW
197- st.availability = gw_availability_t ::GW_AVAILABLE;
198- for (auto & state_itr: created_gws[group_key][gw_id].sm_state ) {
199- state_itr.second = gw_states_per_group_t ::GW_STANDBY_STATE;
200- }
201- if (st.ana_grp_id != REDUNDANT_GW_ANA_GROUP_ID) { // not a redundand GW
202- st.active_state (st.ana_grp_id );
203- }
204- propose_pending = true ;
205- } else if (st.availability == gw_availability_t ::GW_UNAVAILABLE) {
286+ if (st.availability == gw_availability_t ::GW_CREATED ||
287+ st.availability == gw_availability_t ::GW_UNAVAILABLE) {
206288 st.availability = gw_availability_t ::GW_AVAILABLE;
207289 if (st.ana_grp_id == REDUNDANT_GW_ANA_GROUP_ID) {
208290 for (auto & state_itr: created_gws[group_key][gw_id].sm_state ) {
@@ -237,7 +319,9 @@ void NVMeofGwMap::handle_abandoned_ana_groups(bool& propose)
237319
238320 // 1. Failover missed : is there is a GW in unavailable state?
239321 // if yes, is its ANA group handled by some other GW?
240- if (state.availability == gw_availability_t ::GW_UNAVAILABLE &&
322+ if ((state.availability == gw_availability_t ::GW_UNAVAILABLE ||
323+ state.availability == gw_availability_t ::GW_DELETING ||
324+ state.availability == gw_availability_t ::GW_CREATED) &&
241325 state.ana_grp_id != REDUNDANT_GW_ANA_GROUP_ID) {
242326 auto found_gw_for_ana_group = false ;
243327 for (auto & gw_state2 : gws_states) {
@@ -251,7 +335,7 @@ void NVMeofGwMap::handle_abandoned_ana_groups(bool& propose)
251335 }
252336 // choose the GW for handle ana group
253337 if (found_gw_for_ana_group == false ) {
254- dout (10 ) << " Was not found the GW " << " that handles ANA grp "
338+ dout (20 ) << " Was not found the GW " << " that handles ANA grp "
255339 << (int )state.ana_grp_id << " find candidate " << dendl;
256340 for (auto & state_itr: created_gws[group_key][gw_id].sm_state ) {
257341 find_failover_candidate (gw_id, group_key, state_itr.first , propose);
@@ -277,14 +361,23 @@ void NVMeofGwMap::set_failover_gw_for_ANA_group(
277361 const NvmeGwId &gw_id, NvmeAnaGrpId ANA_groupid)
278362{
279363 NvmeGwMonState& gw_state = created_gws[group_key][gw_id];
364+ NvmeGwMonState& failed_gw_state = created_gws[group_key][failed_gw_id];
280365 epoch_t epoch;
281366 dout (10 ) << " Found failover GW " << gw_id
282367 << " for ANA group " << (int )ANA_groupid << dendl;
368+ if (failed_gw_state.availability == gw_availability_t ::GW_CREATED) {
369+ dout (10 ) << " Failover GW " << gw_id <<
370+ " takes over the group of GW in Created state " <<
371+ failed_gw_id << dendl;
372+ // just take over on the group of created GW
373+ gw_state.active_state (ANA_groupid);
374+ return ;
375+ }
283376 int rc = blocklist_gw (failed_gw_id, group_key, ANA_groupid, epoch, true );
284377 if (rc) {
285378 // start failover even when nonces are empty !
286379 gw_state.active_state (ANA_groupid);
287- } else {
380+ } else {
288381 gw_state.sm_state [ANA_groupid] =
289382 gw_states_per_group_t ::GW_WAIT_BLOCKLIST_CMPL;
290383 gw_state.blocklist_data [ANA_groupid].osd_epoch = epoch;
@@ -507,7 +600,7 @@ void NVMeofGwMap::fsm_handle_gw_alive(
507600 // ana group wouldnt be taken back during blocklist wait period
508601 cancel_timer (gw_id, group_key, grpid);
509602 map_modified = true ;
510- } else {
603+ } else {
511604 dout (20 ) << " osd epoch not changed from "
512605 << gw_map.blocklist_data [grpid].osd_epoch
513606 << " to " << last_osd_epoch
@@ -576,15 +669,17 @@ void NVMeofGwMap::fsm_handle_gw_down(
576669void NVMeofGwMap::fsm_handle_gw_delete (
577670 const NvmeGwId &gw_id, const NvmeGroupKey& group_key,
578671 gw_states_per_group_t state , NvmeAnaGrpId grpid, bool &map_modified) {
672+ // This function is called when GW already passed Failover and its native
673+ // Ana group has no volumes, so some states are not relevant
579674 switch (state) {
580675 case gw_states_per_group_t ::GW_STANDBY_STATE:
581676 case gw_states_per_group_t ::GW_IDLE_STATE:
582677 case gw_states_per_group_t ::GW_OWNER_WAIT_FAILBACK_PREPARED:
583678 {
584679 NvmeGwMonState& gw_state = created_gws[group_key][gw_id];
585680
586- // Try to find GW that temporary owns my group - if found,
587- // this GW should pass to standby for this group
681+ // Try to find GW that temporary owns gw-id group that is about to disappear!
682+ // - if found, this GW should pass to standby for this group
588683 if (grpid == gw_state.ana_grp_id ) {
589684 auto & gateway_states = created_gws[group_key];
590685 for (auto & gs: gateway_states) {
@@ -605,43 +700,6 @@ void NVMeofGwMap::fsm_handle_gw_delete(
605700 }
606701 break ;
607702
608- case gw_states_per_group_t ::GW_WAIT_BLOCKLIST_CMPL:
609- {
610- NvmeGwMonState& gw_state = created_gws[group_key][gw_id];
611- cancel_timer (gw_id, group_key, grpid);
612- map_modified = true ;
613- gw_state.standby_state (grpid);
614- }
615- break ;
616-
617- case gw_states_per_group_t ::GW_WAIT_FAILBACK_PREPARED:
618- {
619- cancel_timer (gw_id, group_key, grpid);
620- map_modified = true ;
621- for (auto & nqn_gws_state: created_gws[group_key]) {
622- auto & st = nqn_gws_state.second ;
623-
624- // found GW that was intended for Failback for this ana grp
625- if (st.sm_state [grpid] ==
626- gw_states_per_group_t ::GW_OWNER_WAIT_FAILBACK_PREPARED) {
627- dout (4 ) << " Warning: Outgoing Failback when GW is deleted "
628- << " - to rollback it GW " << gw_id << " for ANA Group "
629- << grpid << dendl;
630- st.standby_state (grpid);
631- break ;
632- }
633- }
634- }
635- break ;
636-
637- case gw_states_per_group_t ::GW_ACTIVE_STATE:
638- {
639- NvmeGwMonState& gw_state = created_gws[group_key][gw_id];
640- map_modified = true ;
641- gw_state.standby_state (grpid);
642- }
643- break ;
644-
645703 default : {
646704 dout (4 ) << " Error : Invalid state " << state
647705 << " for GW " << gw_id << dendl;
@@ -781,14 +839,14 @@ int NVMeofGwMap::blocklist_gw(
781839 new CMonRequestProposal (this , addr_vect, expires)
782840 );
783841 // return false;
784- } else {
842+ } else {
785843 mon->nvmegwmon ()->request_proposal (mon->osdmon ());
786844 }
787845 }
788846 dout (10 ) << str << " mon->osdmon()->blocklist: epoch : " << epoch
789847 << " address vector: " << addr_vect << " "
790848 << addr_vect.size () << dendl;
791- } else {
849+ } else {
792850 dout (4 ) << " Error: No nonces context present for gw: "
793851 << gw_id << " ANA group: " << grpid << dendl;
794852 return 1 ;
0 commit comments