Skip to content

Commit 8fb15d1

Browse files
authored
Merge pull request ceph#63003 from leonidc/fix_duplicate_entity_addr
fix duplicated entity addr in the map during reboot of several GWs Reviewed-by: Samuel Just <[email protected]>
2 parents e4af373 + e55730c commit 8fb15d1

File tree

6 files changed

+50
-6
lines changed

6 files changed

+50
-6
lines changed

src/common/options/mon.yaml.in

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,13 @@ options:
111111
default: 15_min
112112
services:
113113
- mon
114+
- name: mon_nvmeofgw_wrong_map_ignore_sec
115+
type: uint
116+
level: advanced
117+
desc: Period in seconds from MonClient startup to ignore wrong maps from Monitor
118+
default: 15
119+
services:
120+
- mon
114121
- name: mon_mgr_inactive_grace
115122
type: int
116123
level: advanced

src/mon/NVMeofGwMap.cc

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,25 @@ void NVMeofGwMap::gw_performed_startup(const NvmeGwId &gw_id,
264264
}
265265
}
266266

267+
void NVMeofGwMap::set_addr_vect(const NvmeGwId &gw_id,
268+
const NvmeGroupKey& group_key, const entity_addr_t &addr) {
269+
entity_addrvec_t addrvec(addr);
270+
for (auto& gws_states: created_gws[group_key]) {
271+
auto &state = gws_states.second;
272+
auto &gw_found = gws_states.first;
273+
if (state.addr_vect == addrvec && gw_found != gw_id) {
274+
/* This can happen when several GWs restart simultaneously and
275+
* they got entity_addr that differ from the previous one
276+
*/
277+
entity_addr_t a;
278+
state.addr_vect = entity_addrvec_t(a);// cleanup duplicated address
279+
dout(4) << "found duplicated addr vect in gw " << gw_found << dendl;
280+
}
281+
}
282+
created_gws[group_key][gw_id].addr_vect = addrvec;
283+
dout(10) << "Set addr vect " << addrvec << " for gw " << gw_id << dendl;
284+
}
285+
267286
void NVMeofGwMap::increment_gw_epoch(const NvmeGroupKey& group_key)
268287
{
269288
if (HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOFHAMAP)) {

src/mon/NVMeofGwMap.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ class NVMeofGwMap
8787
const NvmeGroupKey& group_key, bool &map_modified);
8888
void gw_performed_startup(const NvmeGwId &gw_id,
8989
const NvmeGroupKey& group_key, bool &propose_pending);
90+
void set_addr_vect(const NvmeGwId &gw_id,
91+
const NvmeGroupKey& group_key, const entity_addr_t &addr_vect);
9092
void skip_failovers_for_group(const NvmeGroupKey& group_key);
9193
private:
9294
int do_delete_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key);

src/mon/NVMeofGwMon.cc

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -680,8 +680,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op)
680680
false) {
681681
pending_map.created_gws[group_key][gw_id].performed_full_startup = true;
682682
pending_map.gw_performed_startup(gw_id, group_key, gw_propose);
683-
pending_map.created_gws[group_key][gw_id].addr_vect =
684-
entity_addrvec_t(con->get_peer_addr());
683+
pending_map.set_addr_vect(gw_id, group_key, con->get_peer_addr());
685684
}
686685
LastBeacon lb = {gw_id, group_key};
687686
last_beacon[lb] = now; //Update last beacon
@@ -730,8 +729,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op)
730729
dout(4) << "Warning: entity addr need to set for GW client " << gw_id
731730
<< " was " << pending_map.created_gws[group_key][gw_id].addr_vect
732731
<< " now " << entity_addrvec_t(con->get_peer_addr()) << dendl;
733-
pending_map.created_gws[group_key][gw_id].addr_vect =
734-
entity_addrvec_t(con->get_peer_addr());
732+
pending_map.set_addr_vect(gw_id, group_key, con->get_peer_addr());
735733
gw_propose = true;
736734
}
737735
// deep copy the whole nonce map of this GW

src/nvmeof/NVMeofGwMonitorClient.cc

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ NVMeofGwMonitorClient::NVMeofGwMonitorClient(int argc, const char **argv) :
3939
osdmap_epoch(0),
4040
gwmap_epoch(0),
4141
last_map_time(std::chrono::steady_clock::now()),
42+
reset_timestamp(std::chrono::steady_clock::now()),
4243
monc{g_ceph_context, poolctx},
4344
client_messenger(Messenger::create(g_ceph_context, "async", entity_name_t::CLIENT(-1), "client", getpid())),
4445
objecter{g_ceph_context, client_messenger.get(), &monc, poolctx},
@@ -304,18 +305,32 @@ void NVMeofGwMonitorClient::shutdown()
304305

305306
void NVMeofGwMonitorClient::handle_nvmeof_gw_map(ceph::ref_t<MNVMeofGwMap> nmap)
306307
{
307-
last_map_time = std::chrono::steady_clock::now(); // record time of last monitor message
308+
auto now = std::chrono::steady_clock::now();
309+
last_map_time = now; // record time of last monitor message
308310

309311
auto &new_map = nmap->get_map();
310312
gwmap_epoch = nmap->get_gwmap_epoch();
311313
auto group_key = std::make_pair(pool, group);
312314
dout(10) << "handle nvmeof gw map: " << new_map << dendl;
313-
315+
uint64_t reset_elapsed_seconds =
316+
std::chrono::duration_cast<std::chrono::seconds>(now - reset_timestamp).count();
314317
NvmeGwClientState old_gw_state;
318+
uint64_t ignore_wrong_map_interval_sec =
319+
g_conf().get_val<uint64_t>("mon_nvmeofgw_wrong_map_ignore_sec");
315320
auto got_old_gw_state = get_gw_state("old map", map, group_key, name, old_gw_state);
316321
NvmeGwClientState new_gw_state;
317322
auto got_new_gw_state = get_gw_state("new map", new_map, group_key, name, new_gw_state);
318323

324+
/*It is possible that wrong second map would be sent by monitor in rear cases when several GWs doing reboot
325+
* and entity_address of the monitor client changes. So Monitor may send the unicast map to the wrong destination
326+
* since this "old" address still appears in its map. It is asynchronous process in the monitor, better to protect
327+
* from this scenario by silently ignoring the wrong map. This can happen just in the first several seconds after restart
328+
*/
329+
if ( (reset_elapsed_seconds < ignore_wrong_map_interval_sec) &&
330+
!got_new_gw_state && got_old_gw_state) {
331+
dout(4) << "Wrong map received, Ignore it" << dendl;
332+
return;
333+
}
319334
// ensure that the gateway state has not vanished
320335
ceph_assert(got_new_gw_state || !got_old_gw_state);
321336

src/nvmeof/NVMeofGwMonitorClient.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ class NVMeofGwMonitorClient: public Dispatcher,
4545
epoch_t gwmap_epoch; // last received gw map epoch
4646
std::chrono::time_point<std::chrono::steady_clock>
4747
last_map_time; // used to panic on disconnect
48+
std::chrono::time_point<std::chrono::steady_clock>
49+
reset_timestamp; // used to bypass some validations
50+
4851
bool first_beacon = true;
4952
// init gw ssl opts
5053
void init_gw_ssl_opts();

0 commit comments

Comments
 (0)