Skip to content

Commit 3cdf529

Browse files
authored
Merge pull request ceph#60871 from leonidc/leonidc-epoch-filter
Epoch filtering Reviewed-by: Samuel Just <[email protected]> Reviewed-by: Aviv Caro <[email protected]> Reviewed-by: Ronen Friedman <[email protected]>
2 parents eecb479 + 9f14dab commit 3cdf529

File tree

11 files changed

+336
-66
lines changed

11 files changed

+336
-66
lines changed

src/common/options/mon.yaml.in

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,11 @@ options:
8989
receives a monitor group ID assignment before the gateway is fully up during
9090
initialization, a retry is required.
9191
default: 1000
92+
- name: mon_nvmeofgw_beacons_till_ack
93+
type: uint
94+
level: advanced
95+
default: 15
96+
desc: Number of beacons from MonClient before NVMeofGwMon sends ack-map to it
9297
services:
9398
- mon
9499
- name: mon_nvmeofgw_delete_grace

src/include/ceph_features.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ DEFINE_CEPH_FEATURE(42, 1, MSGR_KEEPALIVE2) // 4.3 (for consistency)
148148
DEFINE_CEPH_FEATURE(43, 1, OSD_POOLRESEND) // 4.13
149149
DEFINE_CEPH_FEATURE(44, 2, NVMEOFHA)
150150
DEFINE_CEPH_FEATURE_RETIRED(45, 1, OSD_SET_ALLOC_HINT, JEWEL, LUMINOUS)
151+
DEFINE_CEPH_FEATURE(45, 2, NVMEOFHAMAP)
151152
// available
152153
DEFINE_CEPH_FEATURE(46, 1, OSD_FADVISE_FLAGS)
153154
DEFINE_CEPH_FEATURE_RETIRED(46, 1, OSD_REPOP, JEWEL, LUMINOUS) // overlap
@@ -226,6 +227,7 @@ DEFINE_CEPH_FEATURE_RETIRED(63, 1, RESERVED_BROKEN, LUMINOUS, QUINCY) // client-
226227
CEPH_FEATURE_MSGR_KEEPALIVE2 | \
227228
CEPH_FEATURE_OSD_POOLRESEND | \
228229
CEPH_FEATUREMASK_NVMEOFHA | \
230+
CEPH_FEATUREMASK_NVMEOFHAMAP | \
229231
CEPH_FEATURE_OSD_FADVISE_FLAGS | \
230232
CEPH_FEATURE_MDS_QUOTA | \
231233
CEPH_FEATURE_CRUSH_V4 | \

src/mon/NVMeofGwMap.cc

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,14 @@ int NVMeofGwMap::cfg_add_gw(
8383
const NvmeGwId &gw_id, const NvmeGroupKey& group_key)
8484
{
8585
std::set<NvmeAnaGrpId> allocated;
86+
if (HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOFHAMAP)) {
87+
auto gw_epoch_it = gw_epoch.find(group_key);
88+
if (gw_epoch_it == gw_epoch.end()) {
89+
gw_epoch[group_key] = epoch;
90+
dout(10) << "Allocated first gw_epoch : group_key "
91+
<< group_key << " epoch " << gw_epoch[group_key] << dendl;
92+
}
93+
}
8694
for (auto& itr: created_gws[group_key]) {
8795
allocated.insert(itr.second.ana_grp_id);
8896
if (itr.first == gw_id) {
@@ -190,8 +198,10 @@ int NVMeofGwMap::do_erase_gw_id(const NvmeGwId &gw_id,
190198
fsm_timers.erase(group_key);
191199

192200
created_gws[group_key].erase(gw_id);
193-
if (created_gws[group_key].size() == 0)
201+
if (created_gws[group_key].size() == 0) {
194202
created_gws.erase(group_key);
203+
gw_epoch.erase(group_key);
204+
}
195205
return 0;
196206
}
197207

@@ -221,6 +231,23 @@ int NVMeofGwMap::do_delete_gw(
221231
return -EINVAL;
222232
}
223233

234+
void NVMeofGwMap::gw_performed_startup(const NvmeGwId &gw_id,
235+
const NvmeGroupKey& group_key, bool &propose_pending)
236+
{
237+
dout(4) << "GW performed the full startup " << gw_id << dendl;
238+
propose_pending = true;
239+
increment_gw_epoch( group_key);
240+
}
241+
242+
void NVMeofGwMap::increment_gw_epoch(const NvmeGroupKey& group_key)
243+
{
244+
if (HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOFHAMAP)) {
245+
gw_epoch[group_key] ++;
246+
dout(4) << "incremented epoch of " << group_key
247+
<< " " << gw_epoch[group_key] << dendl;
248+
}
249+
}
250+
224251
int NVMeofGwMap::get_num_namespaces(const NvmeGwId &gw_id,
225252
const NvmeGroupKey& group_key, const BeaconSubsystems& subs)
226253
{
@@ -273,7 +300,10 @@ int NVMeofGwMap::process_gw_map_gw_no_subsys_no_listeners(
273300
gw_id, group_key, state_itr.second,state_itr.first, propose_pending);
274301
}
275302
propose_pending = true; // map should reflect that gw becames Created
276-
if (propose_pending) validate_gw_map(group_key);
303+
if (propose_pending) {
304+
validate_gw_map(group_key);
305+
increment_gw_epoch(group_key);
306+
}
277307
} else {
278308
dout(1) << __FUNCTION__ << "ERROR GW-id was not found in the map "
279309
<< gw_id << dendl;
@@ -299,7 +329,10 @@ int NVMeofGwMap::process_gw_map_gw_down(
299329
state_itr.second = gw_states_per_group_t::GW_STANDBY_STATE;
300330
}
301331
propose_pending = true; // map should reflect that gw becames Unavailable
302-
if (propose_pending) validate_gw_map(group_key);
332+
if (propose_pending) {
333+
validate_gw_map(group_key);
334+
increment_gw_epoch(group_key);
335+
}
303336
} else {
304337
dout(1) << __FUNCTION__ << "ERROR GW-id was not found in the map "
305338
<< gw_id << dendl;
@@ -338,7 +371,10 @@ void NVMeofGwMap::process_gw_map_ka(
338371
state_itr.first, last_osd_epoch, propose_pending);
339372
}
340373
}
341-
if (propose_pending) validate_gw_map(group_key);
374+
if (propose_pending) {
375+
validate_gw_map(group_key);
376+
increment_gw_epoch(group_key);
377+
}
342378
}
343379

344380
void NVMeofGwMap::handle_abandoned_ana_groups(bool& propose)
@@ -387,6 +423,7 @@ void NVMeofGwMap::handle_abandoned_ana_groups(bool& propose)
387423
}
388424
if (propose) {
389425
validate_gw_map(group_key);
426+
increment_gw_epoch(group_key);
390427
}
391428
}
392429
}
@@ -751,7 +788,10 @@ void NVMeofGwMap::fsm_handle_gw_delete(
751788
<< "for GW " << gw_id << dendl;
752789
}
753790
}
754-
if (map_modified) validate_gw_map(group_key);
791+
if (map_modified) {
792+
validate_gw_map(group_key);
793+
increment_gw_epoch(group_key);
794+
}
755795
}
756796

757797
void NVMeofGwMap::fsm_handle_to_expired(
@@ -817,7 +857,10 @@ void NVMeofGwMap::fsm_handle_to_expired(
817857
//another Trigger for GW down (failover)
818858
process_gw_map_gw_down(gw_id, group_key, map_modified);
819859
}
820-
if (map_modified) validate_gw_map(group_key);
860+
if (map_modified) {
861+
validate_gw_map(group_key);
862+
increment_gw_epoch(group_key);
863+
}
821864
}
822865

823866
struct CMonRequestProposal : public Context {

src/mon/NVMeofGwMap.h

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,21 @@ class NVMeofGwMap
4545

4646
// map that handles timers started by all Gateway FSMs
4747
std::map<NvmeGroupKey, NvmeGwTimers> fsm_timers;
48+
/**
49+
* gw_epoch
50+
*
51+
* Mapping from NvmeGroupKey -> epoch_t e such that e is the most recent
52+
* map epoch which affects NvmeGroupKey.
53+
*
54+
* The purpose of this map is to allow us to determine whether a particular
55+
* gw needs to be sent the current map. If a gw with NvmeGroupKey key already
56+
* has map epoch e, we only need to send a new map if gw_epoch[key] > e. See
57+
* check_sub for this logic.
58+
*
59+
* Map mutators generally need to invoke increment_gw_epoch(group_key) when
60+
* updating the map with a change affecting gws in group_key.
61+
*/
62+
std::map<NvmeGroupKey, epoch_t> gw_epoch;
4863

4964
void to_gmap(std::map<NvmeGroupKey, NvmeGwMonClientStates>& Gmap) const;
5065
void track_deleting_gws(const NvmeGroupKey& group_key,
@@ -70,6 +85,8 @@ class NVMeofGwMap
7085
NvmeAnaGrpId anagrpid, uint8_t value);
7186
void handle_gw_performing_fast_reboot(const NvmeGwId &gw_id,
7287
const NvmeGroupKey& group_key, bool &map_modified);
88+
void gw_performed_startup(const NvmeGwId &gw_id,
89+
const NvmeGroupKey& group_key, bool &propose_pending);
7390
private:
7491
int do_delete_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key);
7592
int do_erase_gw_id(const NvmeGwId &gw_id,
@@ -115,6 +132,7 @@ class NVMeofGwMap
115132
NvmeAnaGrpId anagrpid);
116133
void validate_gw_map(
117134
const NvmeGroupKey& group_key);
135+
void increment_gw_epoch(const NvmeGroupKey& group_key);
118136

119137
public:
120138
int blocklist_gw(
@@ -123,21 +141,31 @@ class NVMeofGwMap
123141

124142
void encode(ceph::buffer::list &bl, uint64_t features) const {
125143
using ceph::encode;
126-
ENCODE_START(1, 1, bl);
144+
uint8_t version = 1;
145+
if (HAVE_FEATURE(features, NVMEOFHAMAP)) {
146+
version = 2;
147+
}
148+
ENCODE_START(version, version, bl);
127149
encode(epoch, bl);// global map epoch
128150

129151
encode(created_gws, bl, features); //Encode created GWs
130152
encode(fsm_timers, bl, features);
153+
if (version >= 2) {
154+
encode(gw_epoch, bl);
155+
}
131156
ENCODE_FINISH(bl);
132157
}
133158

134159
void decode(ceph::buffer::list::const_iterator &bl) {
135160
using ceph::decode;
136-
DECODE_START(1, bl);
137-
decode(epoch, bl);
161+
DECODE_START(2, bl);
138162

163+
decode(epoch, bl);
139164
decode(created_gws, bl);
140165
decode(fsm_timers, bl);
166+
if (struct_v >= 2) {
167+
decode(gw_epoch, bl);
168+
}
141169
DECODE_FINISH(bl);
142170
}
143171

0 commit comments

Comments
 (0)