Skip to content

Commit d517b39

Browse files
lxbszidryomov
authored andcommitted
ceph: reconnect to the export targets on new mdsmaps
In the case where the export MDS has crashed just after the EImportStart journal is flushed, a standby MDS takes over for it and when replaying the EImportStart journal the MDS will wait the client to reconnect. That may never happen because the client may not have registered or opened the sessions yet. When receiving a new map, ensure we reconnect to valid export targets as well if their sessions don't exist yet. Signed-off-by: Xiubo Li <[email protected]> Reviewed-by: Jeff Layton <[email protected]> Signed-off-by: Ilya Dryomov <[email protected]>
1 parent 692e171 commit d517b39

File tree

2 files changed

+65
-4
lines changed

2 files changed

+65
-4
lines changed

fs/ceph/mds_client.c

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <linux/ratelimit.h>
1212
#include <linux/bits.h>
1313
#include <linux/ktime.h>
14+
#include <linux/bitmap.h>
1415

1516
#include "super.h"
1617
#include "mds_client.h"
@@ -4171,13 +4172,21 @@ static void check_new_map(struct ceph_mds_client *mdsc,
41714172
struct ceph_mdsmap *newmap,
41724173
struct ceph_mdsmap *oldmap)
41734174
{
4174-
int i;
4175+
int i, j, err;
41754176
int oldstate, newstate;
41764177
struct ceph_mds_session *s;
4178+
unsigned long targets[DIV_ROUND_UP(CEPH_MAX_MDS, sizeof(unsigned long))] = {0};
41774179

41784180
dout("check_new_map new %u old %u\n",
41794181
newmap->m_epoch, oldmap->m_epoch);
41804182

4183+
if (newmap->m_info) {
4184+
for (i = 0; i < newmap->possible_max_rank; i++) {
4185+
for (j = 0; j < newmap->m_info[i].num_export_targets; j++)
4186+
set_bit(newmap->m_info[i].export_targets[j], targets);
4187+
}
4188+
}
4189+
41814190
for (i = 0; i < oldmap->possible_max_rank && i < mdsc->max_sessions; i++) {
41824191
if (!mdsc->sessions[i])
41834192
continue;
@@ -4231,6 +4240,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
42314240
if (s->s_state == CEPH_MDS_SESSION_RESTARTING &&
42324241
newstate >= CEPH_MDS_STATE_RECONNECT) {
42334242
mutex_unlock(&mdsc->mutex);
4243+
clear_bit(i, targets);
42344244
send_mds_reconnect(mdsc, s);
42354245
mutex_lock(&mdsc->mutex);
42364246
}
@@ -4253,6 +4263,51 @@ static void check_new_map(struct ceph_mds_client *mdsc,
42534263
}
42544264
}
42554265

4266+
/*
4267+
* Only open and reconnect sessions that don't exist yet.
4268+
*/
4269+
for (i = 0; i < newmap->possible_max_rank; i++) {
4270+
/*
4271+
* In case the import MDS is crashed just after
4272+
* the EImportStart journal is flushed, so when
4273+
* a standby MDS takes over it and is replaying
4274+
* the EImportStart journal the new MDS daemon
4275+
* will wait the client to reconnect it, but the
4276+
* client may never register/open the session yet.
4277+
*
4278+
* Will try to reconnect that MDS daemon if the
4279+
* rank number is in the export targets array and
4280+
* is the up:reconnect state.
4281+
*/
4282+
newstate = ceph_mdsmap_get_state(newmap, i);
4283+
if (!test_bit(i, targets) || newstate != CEPH_MDS_STATE_RECONNECT)
4284+
continue;
4285+
4286+
/*
4287+
* The session maybe registered and opened by some
4288+
* requests which were choosing random MDSes during
4289+
* the mdsc->mutex's unlock/lock gap below in rare
4290+
* case. But the related MDS daemon will just queue
4291+
* that requests and be still waiting for the client's
4292+
* reconnection request in up:reconnect state.
4293+
*/
4294+
s = __ceph_lookup_mds_session(mdsc, i);
4295+
if (likely(!s)) {
4296+
s = __open_export_target_session(mdsc, i);
4297+
if (IS_ERR(s)) {
4298+
err = PTR_ERR(s);
4299+
pr_err("failed to open export target session, err %d\n",
4300+
err);
4301+
continue;
4302+
}
4303+
}
4304+
dout("send reconnect to export target mds.%d\n", i);
4305+
mutex_unlock(&mdsc->mutex);
4306+
send_mds_reconnect(mdsc, s);
4307+
ceph_put_mds_session(s);
4308+
mutex_lock(&mdsc->mutex);
4309+
}
4310+
42564311
for (i = 0; i < newmap->possible_max_rank && i < mdsc->max_sessions; i++) {
42574312
s = mdsc->sessions[i];
42584313
if (!s)

fs/ceph/mdsmap.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
122122
int err;
123123
u8 mdsmap_v;
124124
u16 mdsmap_ev;
125+
u32 target;
125126

126127
m = kzalloc(sizeof(*m), GFP_NOFS);
127128
if (!m)
@@ -260,9 +261,14 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
260261
sizeof(u32), GFP_NOFS);
261262
if (!info->export_targets)
262263
goto nomem;
263-
for (j = 0; j < num_export_targets; j++)
264-
info->export_targets[j] =
265-
ceph_decode_32(&pexport_targets);
264+
for (j = 0; j < num_export_targets; j++) {
265+
target = ceph_decode_32(&pexport_targets);
266+
if (target >= m->possible_max_rank) {
267+
err = -EIO;
268+
goto corrupt;
269+
}
270+
info->export_targets[j] = target;
271+
}
266272
} else {
267273
info->export_targets = NULL;
268274
}

0 commit comments

Comments
 (0)