Skip to content

Commit 69c7b2f

Browse files
committed
libceph: fix race between delayed_work() and ceph_monc_stop()
The way the delayed work is handled in ceph_monc_stop() is prone to races with mon_fault() and possibly also finish_hunting(). Both of these can requeue the delayed work which wouldn't be canceled by any of the following code in case that happens after cancel_delayed_work_sync() runs -- __close_session() doesn't mess with the delayed work in order to avoid interfering with the hunting interval logic. This part was missed in commit b5d9170 ("libceph: behave in mon_fault() if cur_mon < 0") and use-after-free can still ensue on monc and objects that hang off of it, with monc->auth and monc->monmap being particularly susceptible to quickly being reused. To fix this: - clear monc->cur_mon and monc->hunting as part of closing the session in ceph_monc_stop() - bail from delayed_work() if monc->cur_mon is cleared, similar to how it's done in mon_fault() and finish_hunting() (based on monc->hunting) - call cancel_delayed_work_sync() after the session is closed Cc: [email protected] Link: https://tracker.ceph.com/issues/66857 Signed-off-by: Ilya Dryomov <[email protected]> Reviewed-by: Xiubo Li <[email protected]>
1 parent 256abd8 commit 69c7b2f

File tree

1 file changed

+12
-2
lines changed

1 file changed

+12
-2
lines changed

net/ceph/mon_client.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1085,13 +1085,19 @@ static void delayed_work(struct work_struct *work)
10851085
struct ceph_mon_client *monc =
10861086
container_of(work, struct ceph_mon_client, delayed_work.work);
10871087

1088-
dout("monc delayed_work\n");
10891088
mutex_lock(&monc->mutex);
1089+
dout("%s mon%d\n", __func__, monc->cur_mon);
1090+
if (monc->cur_mon < 0) {
1091+
goto out;
1092+
}
1093+
10901094
if (monc->hunting) {
10911095
dout("%s continuing hunt\n", __func__);
10921096
reopen_session(monc);
10931097
} else {
10941098
int is_auth = ceph_auth_is_authenticated(monc->auth);
1099+
1100+
dout("%s is_authed %d\n", __func__, is_auth);
10951101
if (ceph_con_keepalive_expired(&monc->con,
10961102
CEPH_MONC_PING_TIMEOUT)) {
10971103
dout("monc keepalive timeout\n");
@@ -1116,6 +1122,8 @@ static void delayed_work(struct work_struct *work)
11161122
}
11171123
}
11181124
__schedule_delayed(monc);
1125+
1126+
out:
11191127
mutex_unlock(&monc->mutex);
11201128
}
11211129

@@ -1232,13 +1240,15 @@ EXPORT_SYMBOL(ceph_monc_init);
12321240
void ceph_monc_stop(struct ceph_mon_client *monc)
12331241
{
12341242
dout("stop\n");
1235-
cancel_delayed_work_sync(&monc->delayed_work);
12361243

12371244
mutex_lock(&monc->mutex);
12381245
__close_session(monc);
1246+
monc->hunting = false;
12391247
monc->cur_mon = -1;
12401248
mutex_unlock(&monc->mutex);
12411249

1250+
cancel_delayed_work_sync(&monc->delayed_work);
1251+
12421252
/*
12431253
* flush msgr queue before we destroy ourselves to ensure that:
12441254
* - any work that references our embedded con is finished.

0 commit comments

Comments
 (0)