Skip to content

Commit e541e99

Browse files
authored
Merge pull request ceph#59579 from leonidc/wip-leonidc0309-gw-deleting-state
gw deleting state
2 parents 29650c2 + 037537e commit e541e99

File tree

6 files changed

+219
-106
lines changed

6 files changed

+219
-106
lines changed

src/mon/NVMeofGwMap.cc

Lines changed: 128 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,15 @@ void NVMeofGwMap::to_gmap(
3737
for (const auto& gw_created_pair: gw_created_map) {
3838
const auto& gw_id = gw_created_pair.first;
3939
const auto& gw_created = gw_created_pair.second;
40+
gw_availability_t availability = gw_created.availability;
41+
// Gateways expect to see UNAVAILABLE, not DELETING
42+
// for entries in DELETING state
43+
if (gw_created.availability == gw_availability_t::GW_DELETING) {
44+
availability = gw_availability_t::GW_UNAVAILABLE;
45+
}
4046

4147
auto gw_state = NvmeGwClientState(
42-
gw_created.ana_grp_id, epoch, gw_created.availability);
48+
gw_created.ana_grp_id, epoch, availability);
4349
for (const auto& sub: gw_created.subsystems) {
4450
gw_state.subsystems.insert({
4551
sub.nqn,
@@ -78,10 +84,36 @@ int NVMeofGwMap::cfg_add_gw(
7884
for (auto& itr: created_gws[group_key]) {
7985
allocated.insert(itr.second.ana_grp_id);
8086
if (itr.first == gw_id) {
81-
dout(1) << __func__ << " ERROR create GW: already exists in map "
82-
<< gw_id << dendl;
83-
return -EEXIST ;
87+
if (itr.second.availability != gw_availability_t::GW_DELETING) {
88+
dout(1) << __func__ << " ERROR create GW: already exists in map "
89+
<< gw_id << dendl;
90+
return -EEXIST;
91+
} else {
92+
//this GW exists in the map in "Deleting" state
93+
// but user again creates it - need just set attribute values
94+
created_gws[group_key][gw_id].performed_full_startup = true;
95+
created_gws[group_key][gw_id].availability
96+
= gw_availability_t::GW_CREATED;
97+
dout(4) << "GW in Deleting state " << gw_id
98+
<< " was created again" << dendl;
99+
return 0;
100+
}
84101
}
102+
if (itr.second.availability == gw_availability_t::GW_DELETING) {
103+
//Was found some GW in "Deleting" state. Just to inherit its ANA group
104+
NvmeGwMonState & gw_created = created_gws[group_key][itr.first];
105+
created_gws[group_key][gw_id] = gw_created;
106+
// Deep copy of all data of "Deleting" GW
107+
created_gws[group_key][gw_id].performed_full_startup = true;
108+
created_gws[group_key][gw_id].availability
109+
= gw_availability_t::GW_CREATED;
110+
dout(4) << "Created GW inherits ANA group of deleting GW-id :"
111+
<< itr.first << " group " << itr.second.ana_grp_id << dendl;
112+
do_erase_gw_id(itr.first, group_key);
113+
dout(4) << "Created GWS after create/delete: "
114+
<< created_gws << dendl;
115+
return 0;
116+
}
85117
}
86118
if (allocated.size() == MAX_SUPPORTED_ANA_GROUPS) {
87119
dout(4) << "Warning: cannot add GW " << gw_id
@@ -125,7 +157,40 @@ int NVMeofGwMap::cfg_add_gw(
125157
int NVMeofGwMap::cfg_delete_gw(
126158
const NvmeGwId &gw_id, const NvmeGroupKey& group_key)
127159
{
128-
int rc = 0;
160+
if (HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOFHA)) {
161+
dout(10) << " has NVMEOFHA: 1" << dendl;
162+
for (auto& gws_states: created_gws[group_key]) {
163+
if (gws_states.first == gw_id) {
164+
auto& state = gws_states.second;
165+
state.availability = gw_availability_t::GW_DELETING;
166+
dout(4) << " Deleting GW :"<< gw_id << " in state "
167+
<< state.availability << " Resulting GW availability: "
168+
<< state.availability << dendl;
169+
return 0;
170+
}
171+
}
172+
} else {
173+
return do_delete_gw(gw_id, group_key);
174+
}
175+
return -EINVAL;
176+
}
177+
178+
int NVMeofGwMap::do_erase_gw_id(const NvmeGwId &gw_id,
179+
const NvmeGroupKey& group_key) {
180+
181+
fsm_timers[group_key].erase(gw_id);
182+
if (fsm_timers[group_key].size() == 0)
183+
fsm_timers.erase(group_key);
184+
185+
created_gws[group_key].erase(gw_id);
186+
if (created_gws[group_key].size() == 0)
187+
created_gws.erase(group_key);
188+
return 0;
189+
}
190+
191+
int NVMeofGwMap::do_delete_gw(
192+
const NvmeGwId &gw_id, const NvmeGroupKey& group_key)
193+
{
129194
for (auto& gws_states: created_gws[group_key]) {
130195

131196
if (gws_states.first == gw_id) {
@@ -136,26 +201,52 @@ int NVMeofGwMap::cfg_delete_gw(
136201
gw_id, group_key,state_itr.second , state_itr.first, modified);
137202
}
138203
dout(10) << " Delete GW :"<< gw_id << " ANA grpid: "
139-
<< state.ana_grp_id << dendl;
204+
<< state.ana_grp_id << dendl;
140205
for (auto& itr: created_gws[group_key]) {
141206
// Update state map and other maps
142207
remove_grp_id(itr.first, group_key, state.ana_grp_id);
143208
// of all created gateways. Removed key = anagrp
144209
}
145-
fsm_timers[group_key].erase(gw_id);
146-
if (fsm_timers[group_key].size() == 0)
147-
fsm_timers.erase(group_key);
148-
149-
created_gws[group_key].erase(gw_id);
150-
if (created_gws[group_key].size() == 0)
151-
created_gws.erase(group_key);
152-
return rc;
210+
return do_erase_gw_id(gw_id, group_key);
153211
}
154212
}
155213

156214
return -EINVAL;
157215
}
158216

217+
int NVMeofGwMap::get_num_namespaces(const NvmeGwId &gw_id,
218+
const NvmeGroupKey& group_key, const BeaconSubsystems& subs)
219+
{
220+
auto grpid = created_gws[group_key][gw_id].ana_grp_id ;
221+
int num_ns = 0;
222+
for (auto & subs_it:subs) {
223+
for (auto & ns :subs_it.namespaces) {
224+
if (ns.anagrpid == (grpid+1)) {
225+
num_ns++;
226+
}
227+
}
228+
}
229+
return num_ns;
230+
}
231+
232+
void NVMeofGwMap::track_deleting_gws(const NvmeGroupKey& group_key,
233+
const BeaconSubsystems& subs, bool &propose_pending)
234+
{
235+
propose_pending = false;
236+
for (auto& itr: created_gws[group_key]) {
237+
auto &gw_id = itr.first;
238+
if (itr.second.availability == gw_availability_t::GW_DELETING) {
239+
int num_ns = 0;
240+
if ( (num_ns = get_num_namespaces(gw_id, group_key, subs)) == 0) {
241+
do_delete_gw(gw_id, group_key);
242+
propose_pending = true;
243+
}
244+
dout(4) << " to delete ? " << gw_id << " num_ns " << num_ns << dendl;
245+
break; // handle just one GW in "Deleting" state in time.
246+
}
247+
}
248+
}
249+
159250
int NVMeofGwMap::process_gw_map_gw_down(
160251
const NvmeGwId &gw_id, const NvmeGroupKey& group_key, bool &propose_pending)
161252
{
@@ -192,17 +283,8 @@ void NVMeofGwMap::process_gw_map_ka(
192283
dout(20) << "KA beacon from the GW " << gw_id
193284
<< " in state " << (int)st.availability << dendl;
194285

195-
if (st.availability == gw_availability_t::GW_CREATED) {
196-
// first time appears - allow IO traffic for this GW
197-
st.availability = gw_availability_t::GW_AVAILABLE;
198-
for (auto& state_itr: created_gws[group_key][gw_id].sm_state) {
199-
state_itr.second = gw_states_per_group_t::GW_STANDBY_STATE;
200-
}
201-
if (st.ana_grp_id != REDUNDANT_GW_ANA_GROUP_ID) { // not a redundand GW
202-
st.active_state(st.ana_grp_id);
203-
}
204-
propose_pending = true;
205-
} else if (st.availability == gw_availability_t::GW_UNAVAILABLE) {
286+
if (st.availability == gw_availability_t::GW_CREATED ||
287+
st.availability == gw_availability_t::GW_UNAVAILABLE) {
206288
st.availability = gw_availability_t::GW_AVAILABLE;
207289
if (st.ana_grp_id == REDUNDANT_GW_ANA_GROUP_ID) {
208290
for (auto& state_itr: created_gws[group_key][gw_id].sm_state) {
@@ -237,7 +319,9 @@ void NVMeofGwMap::handle_abandoned_ana_groups(bool& propose)
237319

238320
// 1. Failover missed : is there is a GW in unavailable state?
239321
// if yes, is its ANA group handled by some other GW?
240-
if (state.availability == gw_availability_t::GW_UNAVAILABLE &&
322+
if ((state.availability == gw_availability_t::GW_UNAVAILABLE ||
323+
state.availability == gw_availability_t::GW_DELETING ||
324+
state.availability == gw_availability_t::GW_CREATED) &&
241325
state.ana_grp_id != REDUNDANT_GW_ANA_GROUP_ID) {
242326
auto found_gw_for_ana_group = false;
243327
for (auto& gw_state2 : gws_states) {
@@ -251,7 +335,7 @@ void NVMeofGwMap::handle_abandoned_ana_groups(bool& propose)
251335
}
252336
// choose the GW for handle ana group
253337
if (found_gw_for_ana_group == false) {
254-
dout(10) << "Was not found the GW " << " that handles ANA grp "
338+
dout(20) << "Was not found the GW " << " that handles ANA grp "
255339
<< (int)state.ana_grp_id << " find candidate "<< dendl;
256340
for (auto& state_itr: created_gws[group_key][gw_id].sm_state) {
257341
find_failover_candidate(gw_id, group_key, state_itr.first, propose);
@@ -277,14 +361,23 @@ void NVMeofGwMap::set_failover_gw_for_ANA_group(
277361
const NvmeGwId &gw_id, NvmeAnaGrpId ANA_groupid)
278362
{
279363
NvmeGwMonState& gw_state = created_gws[group_key][gw_id];
364+
NvmeGwMonState& failed_gw_state = created_gws[group_key][failed_gw_id];
280365
epoch_t epoch;
281366
dout(10) << "Found failover GW " << gw_id
282367
<< " for ANA group " << (int)ANA_groupid << dendl;
368+
if (failed_gw_state.availability == gw_availability_t::GW_CREATED) {
369+
dout(10) << "Failover GW " << gw_id <<
370+
" takes over the group of GW in Created state " <<
371+
failed_gw_id << dendl;
372+
// just take over on the group of created GW
373+
gw_state.active_state(ANA_groupid);
374+
return;
375+
}
283376
int rc = blocklist_gw(failed_gw_id, group_key, ANA_groupid, epoch, true);
284377
if (rc) {
285378
//start failover even when nonces are empty !
286379
gw_state.active_state(ANA_groupid);
287-
} else{
380+
} else {
288381
gw_state.sm_state[ANA_groupid] =
289382
gw_states_per_group_t::GW_WAIT_BLOCKLIST_CMPL;
290383
gw_state.blocklist_data[ANA_groupid].osd_epoch = epoch;
@@ -507,7 +600,7 @@ void NVMeofGwMap::fsm_handle_gw_alive(
507600
// ana group wouldnt be taken back during blocklist wait period
508601
cancel_timer(gw_id, group_key, grpid);
509602
map_modified = true;
510-
} else{
603+
} else {
511604
dout(20) << "osd epoch not changed from "
512605
<< gw_map.blocklist_data[grpid].osd_epoch
513606
<< " to "<< last_osd_epoch
@@ -576,15 +669,17 @@ void NVMeofGwMap::fsm_handle_gw_down(
576669
void NVMeofGwMap::fsm_handle_gw_delete(
577670
const NvmeGwId &gw_id, const NvmeGroupKey& group_key,
578671
gw_states_per_group_t state , NvmeAnaGrpId grpid, bool &map_modified) {
672+
//This function is called when GW already passed Failover and its native
673+
//Ana group has no volumes, so some states are not relevant
579674
switch (state) {
580675
case gw_states_per_group_t::GW_STANDBY_STATE:
581676
case gw_states_per_group_t::GW_IDLE_STATE:
582677
case gw_states_per_group_t::GW_OWNER_WAIT_FAILBACK_PREPARED:
583678
{
584679
NvmeGwMonState& gw_state = created_gws[group_key][gw_id];
585680

586-
// Try to find GW that temporary owns my group - if found,
587-
// this GW should pass to standby for this group
681+
// Try to find GW that temporary owns gw-id group that is about to disappear!
682+
// - if found, this GW should pass to standby for this group
588683
if (grpid == gw_state.ana_grp_id) {
589684
auto& gateway_states = created_gws[group_key];
590685
for (auto& gs: gateway_states) {
@@ -605,43 +700,6 @@ void NVMeofGwMap::fsm_handle_gw_delete(
605700
}
606701
break;
607702

608-
case gw_states_per_group_t::GW_WAIT_BLOCKLIST_CMPL:
609-
{
610-
NvmeGwMonState& gw_state = created_gws[group_key][gw_id];
611-
cancel_timer(gw_id, group_key, grpid);
612-
map_modified = true;
613-
gw_state.standby_state(grpid);
614-
}
615-
break;
616-
617-
case gw_states_per_group_t::GW_WAIT_FAILBACK_PREPARED:
618-
{
619-
cancel_timer(gw_id, group_key, grpid);
620-
map_modified = true;
621-
for (auto& nqn_gws_state: created_gws[group_key]) {
622-
auto& st = nqn_gws_state.second;
623-
624-
// found GW that was intended for Failback for this ana grp
625-
if (st.sm_state[grpid] ==
626-
gw_states_per_group_t::GW_OWNER_WAIT_FAILBACK_PREPARED) {
627-
dout(4) << "Warning: Outgoing Failback when GW is deleted "
628-
<< "- to rollback it GW " << gw_id << "for ANA Group "
629-
<< grpid << dendl;
630-
st.standby_state(grpid);
631-
break;
632-
}
633-
}
634-
}
635-
break;
636-
637-
case gw_states_per_group_t::GW_ACTIVE_STATE:
638-
{
639-
NvmeGwMonState& gw_state = created_gws[group_key][gw_id];
640-
map_modified = true;
641-
gw_state.standby_state(grpid);
642-
}
643-
break;
644-
645703
default: {
646704
dout(4) << "Error : Invalid state " << state
647705
<< "for GW " << gw_id << dendl;
@@ -781,14 +839,14 @@ int NVMeofGwMap::blocklist_gw(
781839
new CMonRequestProposal(this, addr_vect, expires)
782840
);
783841
// return false;
784-
} else{
842+
} else {
785843
mon->nvmegwmon()->request_proposal(mon->osdmon());
786844
}
787845
}
788846
dout(10) << str << " mon->osdmon()->blocklist: epoch : " << epoch
789847
<< " address vector: " << addr_vect << " "
790848
<< addr_vect.size() << dendl;
791-
} else{
849+
} else {
792850
dout(4) << "Error: No nonces context present for gw: "
793851
<< gw_id << " ANA group: " << grpid << dendl;
794852
return 1;

src/mon/NVMeofGwMap.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ class NVMeofGwMap
4444
std::map<NvmeGroupKey, NvmeGwTimers> fsm_timers;
4545

4646
void to_gmap(std::map<NvmeGroupKey, NvmeGwMonClientStates>& Gmap) const;
47-
47+
void track_deleting_gws(const NvmeGroupKey& group_key,
48+
const BeaconSubsystems& subs, bool &propose_pending);
4849
int cfg_add_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key);
4950
int cfg_delete_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key);
5051
void process_gw_map_ka(
@@ -64,6 +65,9 @@ class NVMeofGwMap
6465
void handle_gw_performing_fast_reboot(const NvmeGwId &gw_id,
6566
const NvmeGroupKey& group_key, bool &map_modified);
6667
private:
68+
int do_delete_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key);
69+
int do_erase_gw_id(const NvmeGwId &gw_id,
70+
const NvmeGroupKey& group_key);
6771
void add_grp_id(
6872
const NvmeGwId &gw_id, const NvmeGroupKey& group_key,
6973
const NvmeAnaGrpId grpid);
@@ -95,7 +99,8 @@ class NVMeofGwMap
9599
void set_failover_gw_for_ANA_group(
96100
const NvmeGwId &failed_gw_id, const NvmeGroupKey& group_key,
97101
const NvmeGwId &gw_id, NvmeAnaGrpId groupid);
98-
102+
int get_num_namespaces(const NvmeGwId &gw_id,
103+
const NvmeGroupKey& group_key, const BeaconSubsystems& subs );
99104
int get_timer(
100105
const NvmeGwId &gw_id, const NvmeGroupKey& group_key,
101106
NvmeAnaGrpId anagrpid);

0 commit comments

Comments
 (0)