Skip to content

Commit e55730c

Browse files
committed
mon: fix duplicated entity addr in the map during reboot of several nvvmeof GWs
Signed-off-by: Leonid Chernin <[email protected]>
1 parent eb3d949 commit e55730c

File tree

6 files changed

+50
-6
lines changed

6 files changed

+50
-6
lines changed

src/common/options/mon.yaml.in

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,13 @@ options:
110110
default: 15_min
111111
services:
112112
- mon
113+
- name: mon_nvmeofgw_wrong_map_ignore_sec
114+
type: uint
115+
level: advanced
116+
desc: Period in seconds from MonClient startup to ignore wrong maps from Monitor
117+
default: 15
118+
services:
119+
- mon
113120
- name: mon_mgr_inactive_grace
114121
type: int
115122
level: advanced

src/mon/NVMeofGwMap.cc

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,25 @@ void NVMeofGwMap::gw_performed_startup(const NvmeGwId &gw_id,
239239
increment_gw_epoch( group_key);
240240
}
241241

242+
void NVMeofGwMap::set_addr_vect(const NvmeGwId &gw_id,
243+
const NvmeGroupKey& group_key, const entity_addr_t &addr) {
244+
entity_addrvec_t addrvec(addr);
245+
for (auto& gws_states: created_gws[group_key]) {
246+
auto &state = gws_states.second;
247+
auto &gw_found = gws_states.first;
248+
if (state.addr_vect == addrvec && gw_found != gw_id) {
249+
/* This can happen when several GWs restart simultaneously and
250+
* they got entity_addr that differ from the previous one
251+
*/
252+
entity_addr_t a;
253+
state.addr_vect = entity_addrvec_t(a);// cleanup duplicated address
254+
dout(4) << "found duplicated addr vect in gw " << gw_found << dendl;
255+
}
256+
}
257+
created_gws[group_key][gw_id].addr_vect = addrvec;
258+
dout(10) << "Set addr vect " << addrvec << " for gw " << gw_id << dendl;
259+
}
260+
242261
void NVMeofGwMap::increment_gw_epoch(const NvmeGroupKey& group_key)
243262
{
244263
if (HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOFHAMAP)) {

src/mon/NVMeofGwMap.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ class NVMeofGwMap
8787
const NvmeGroupKey& group_key, bool &map_modified);
8888
void gw_performed_startup(const NvmeGwId &gw_id,
8989
const NvmeGroupKey& group_key, bool &propose_pending);
90+
void set_addr_vect(const NvmeGwId &gw_id,
91+
const NvmeGroupKey& group_key, const entity_addr_t &addr_vect);
9092
void skip_failovers_for_group(const NvmeGroupKey& group_key);
9193
private:
9294
int do_delete_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key);

src/mon/NVMeofGwMon.cc

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -678,8 +678,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op)
678678
false) {
679679
pending_map.created_gws[group_key][gw_id].performed_full_startup = true;
680680
pending_map.gw_performed_startup(gw_id, group_key, gw_propose);
681-
pending_map.created_gws[group_key][gw_id].addr_vect =
682-
entity_addrvec_t(con->get_peer_addr());
681+
pending_map.set_addr_vect(gw_id, group_key, con->get_peer_addr());
683682
}
684683
LastBeacon lb = {gw_id, group_key};
685684
last_beacon[lb] = now; //Update last beacon
@@ -728,8 +727,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op)
728727
dout(4) << "Warning: entity addr need to set for GW client " << gw_id
729728
<< " was " << pending_map.created_gws[group_key][gw_id].addr_vect
730729
<< " now " << entity_addrvec_t(con->get_peer_addr()) << dendl;
731-
pending_map.created_gws[group_key][gw_id].addr_vect =
732-
entity_addrvec_t(con->get_peer_addr());
730+
pending_map.set_addr_vect(gw_id, group_key, con->get_peer_addr());
733731
gw_propose = true;
734732
}
735733
// deep copy the whole nonce map of this GW

src/nvmeof/NVMeofGwMonitorClient.cc

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ NVMeofGwMonitorClient::NVMeofGwMonitorClient(int argc, const char **argv) :
3939
osdmap_epoch(0),
4040
gwmap_epoch(0),
4141
last_map_time(std::chrono::steady_clock::now()),
42+
reset_timestamp(std::chrono::steady_clock::now()),
4243
monc{g_ceph_context, poolctx},
4344
client_messenger(Messenger::create(g_ceph_context, "async", entity_name_t::CLIENT(-1), "client", getpid())),
4445
objecter{g_ceph_context, client_messenger.get(), &monc, poolctx},
@@ -305,18 +306,32 @@ void NVMeofGwMonitorClient::shutdown()
305306

306307
void NVMeofGwMonitorClient::handle_nvmeof_gw_map(ceph::ref_t<MNVMeofGwMap> nmap)
307308
{
308-
last_map_time = std::chrono::steady_clock::now(); // record time of last monitor message
309+
auto now = std::chrono::steady_clock::now();
310+
last_map_time = now; // record time of last monitor message
309311

310312
auto &new_map = nmap->get_map();
311313
gwmap_epoch = nmap->get_gwmap_epoch();
312314
auto group_key = std::make_pair(pool, group);
313315
dout(10) << "handle nvmeof gw map: " << new_map << dendl;
314-
316+
uint64_t reset_elapsed_seconds =
317+
std::chrono::duration_cast<std::chrono::seconds>(now - reset_timestamp).count();
315318
NvmeGwClientState old_gw_state;
319+
uint64_t ignore_wrong_map_interval_sec =
320+
g_conf().get_val<uint64_t>("mon_nvmeofgw_wrong_map_ignore_sec");
316321
auto got_old_gw_state = get_gw_state("old map", map, group_key, name, old_gw_state);
317322
NvmeGwClientState new_gw_state;
318323
auto got_new_gw_state = get_gw_state("new map", new_map, group_key, name, new_gw_state);
319324

325+
/*It is possible that wrong second map would be sent by monitor in rear cases when several GWs doing reboot
326+
* and entity_address of the monitor client changes. So Monitor may send the unicast map to the wrong destination
327+
* since this "old" address still appears in its map. It is asynchronous process in the monitor, better to protect
328+
* from this scenario by silently ignoring the wrong map. This can happen just in the first several seconds after restart
329+
*/
330+
if ( (reset_elapsed_seconds < ignore_wrong_map_interval_sec) &&
331+
!got_new_gw_state && got_old_gw_state) {
332+
dout(4) << "Wrong map received, Ignore it" << dendl;
333+
return;
334+
}
320335
// ensure that the gateway state has not vanished
321336
ceph_assert(got_new_gw_state || !got_old_gw_state);
322337

src/nvmeof/NVMeofGwMonitorClient.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ class NVMeofGwMonitorClient: public Dispatcher,
4545
epoch_t gwmap_epoch; // last received gw map epoch
4646
std::chrono::time_point<std::chrono::steady_clock>
4747
last_map_time; // used to panic on disconnect
48+
std::chrono::time_point<std::chrono::steady_clock>
49+
reset_timestamp; // used to bypass some validations
50+
4851
bool first_beacon = true;
4952
// init gw ssl opts
5053
void init_gw_ssl_opts();

0 commit comments

Comments
 (0)