Skip to content

Commit e3dfcab

Browse files
lxbszidryomov
authored andcommitted
ceph: drop messages from MDS when unmounting
When unmounting all the dirty buffers will be flushed and after the last osd request is finished the last reference of the i_count will be released. Then it will flush the dirty cap/snap to MDSs, and the unmounting won't wait the possible acks, which will ihold the inodes when updating the metadata locally but makes no sense any more, of this. This will make the evict_inodes() to skip these inodes. If encrypt is enabled the kernel generate a warning when removing the encrypt keys when the skipped inodes still hold the keyring: WARNING: CPU: 4 PID: 168846 at fs/crypto/keyring.c:242 fscrypt_destroy_keyring+0x7e/0xd0 CPU: 4 PID: 168846 Comm: umount Tainted: G S 6.1.0-rc5-ceph-g72ead199864c #1 Hardware name: Supermicro SYS-5018R-WR/X10SRW-F, BIOS 2.0 12/17/2015 RIP: 0010:fscrypt_destroy_keyring+0x7e/0xd0 RSP: 0018:ffffc9000b277e28 EFLAGS: 00010202 RAX: 0000000000000002 RBX: ffff88810d52ac00 RCX: ffff88810b56aa00 RDX: 0000000080000000 RSI: ffffffff822f3a09 RDI: ffff888108f59000 RBP: ffff8881d394fb88 R08: 0000000000000028 R09: 0000000000000000 R10: 0000000000000001 R11: 11ff4fe6834fcd91 R12: ffff8881d394fc40 R13: ffff888108f59000 R14: ffff8881d394f800 R15: 0000000000000000 FS: 00007fd83f6f1080(0000) GS:ffff88885fd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f918d417000 CR3: 000000017f89a005 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <TASK> generic_shutdown_super+0x47/0x120 kill_anon_super+0x14/0x30 ceph_kill_sb+0x36/0x90 [ceph] deactivate_locked_super+0x29/0x60 cleanup_mnt+0xb8/0x140 task_work_run+0x67/0xb0 exit_to_user_mode_prepare+0x23d/0x240 syscall_exit_to_user_mode+0x25/0x60 do_syscall_64+0x40/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fd83dc39e9b Later the kernel will crash when iput() the inodes and dereferencing the "sb->s_master_keys", which has been released by the generic_shutdown_super(). Link: https://tracker.ceph.com/issues/59162 Signed-off-by: Xiubo Li <[email protected]> Reviewed-and-tested-by: Luís Henriques <[email protected]> Reviewed-by: Milind Changire <[email protected]> Signed-off-by: Ilya Dryomov <[email protected]>
1 parent 230bd8b commit e3dfcab

File tree

7 files changed

+109
-22
lines changed

7 files changed

+109
-22
lines changed

fs/ceph/caps.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4247,6 +4247,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
42474247

42484248
dout("handle_caps from mds%d\n", session->s_mds);
42494249

4250+
if (!ceph_inc_mds_stopping_blocker(mdsc, session))
4251+
return;
4252+
42504253
/* decode */
42514254
end = msg->front.iov_base + msg->front.iov_len;
42524255
if (msg->front.iov_len < sizeof(*h))
@@ -4348,7 +4351,6 @@ void ceph_handle_caps(struct ceph_mds_session *session,
43484351
vino.snap, inode);
43494352

43504353
mutex_lock(&session->s_mutex);
4351-
inc_session_sequence(session);
43524354
dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
43534355
(unsigned)seq);
43544356

@@ -4457,6 +4459,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
44574459
done_unlocked:
44584460
iput(inode);
44594461
out:
4462+
ceph_dec_mds_stopping_blocker(mdsc);
4463+
44604464
ceph_put_string(extra_info.pool_ns);
44614465

44624466
/* Defer closing the sessions after s_mutex lock being released */

fs/ceph/mds_client.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4889,6 +4889,9 @@ static void handle_lease(struct ceph_mds_client *mdsc,
48894889

48904890
dout("handle_lease from mds%d\n", mds);
48914891

4892+
if (!ceph_inc_mds_stopping_blocker(mdsc, session))
4893+
return;
4894+
48924895
/* decode */
48934896
if (msg->front.iov_len < sizeof(*h) + sizeof(u32))
48944897
goto bad;
@@ -4907,8 +4910,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
49074910
dname.len, dname.name);
49084911

49094912
mutex_lock(&session->s_mutex);
4910-
inc_session_sequence(session);
4911-
49124913
if (!inode) {
49134914
dout("handle_lease no inode %llx\n", vino.ino);
49144915
goto release;
@@ -4970,9 +4971,13 @@ static void handle_lease(struct ceph_mds_client *mdsc,
49704971
out:
49714972
mutex_unlock(&session->s_mutex);
49724973
iput(inode);
4974+
4975+
ceph_dec_mds_stopping_blocker(mdsc);
49734976
return;
49744977

49754978
bad:
4979+
ceph_dec_mds_stopping_blocker(mdsc);
4980+
49764981
pr_err("corrupt lease message\n");
49774982
ceph_msg_dump(msg);
49784983
}
@@ -5168,6 +5173,9 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
51685173
}
51695174

51705175
init_completion(&mdsc->safe_umount_waiters);
5176+
spin_lock_init(&mdsc->stopping_lock);
5177+
atomic_set(&mdsc->stopping_blockers, 0);
5178+
init_completion(&mdsc->stopping_waiter);
51715179
init_waitqueue_head(&mdsc->session_close_wq);
51725180
INIT_LIST_HEAD(&mdsc->waiting_for_map);
51735181
mdsc->quotarealms_inodes = RB_ROOT;

fs/ceph/mds_client.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -399,8 +399,9 @@ struct cap_wait {
399399
};
400400

401401
enum {
402-
CEPH_MDSC_STOPPING_BEGIN = 1,
403-
CEPH_MDSC_STOPPING_FLUSHED = 2,
402+
CEPH_MDSC_STOPPING_BEGIN = 1,
403+
CEPH_MDSC_STOPPING_FLUSHING = 2,
404+
CEPH_MDSC_STOPPING_FLUSHED = 3,
404405
};
405406

406407
/*
@@ -419,7 +420,11 @@ struct ceph_mds_client {
419420
struct ceph_mds_session **sessions; /* NULL for mds if no session */
420421
atomic_t num_sessions;
421422
int max_sessions; /* len of sessions array */
422-
int stopping; /* true if shutting down */
423+
424+
spinlock_t stopping_lock; /* protect snap_empty */
425+
int stopping; /* the stage of shutting down */
426+
atomic_t stopping_blockers;
427+
struct completion stopping_waiter;
423428

424429
atomic64_t quotarealms_count; /* # realms with quota */
425430
/*

fs/ceph/quota.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,25 +47,23 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
4747
struct inode *inode;
4848
struct ceph_inode_info *ci;
4949

50+
if (!ceph_inc_mds_stopping_blocker(mdsc, session))
51+
return;
52+
5053
if (msg->front.iov_len < sizeof(*h)) {
5154
pr_err("%s corrupt message mds%d len %d\n", __func__,
5255
session->s_mds, (int)msg->front.iov_len);
5356
ceph_msg_dump(msg);
54-
return;
57+
goto out;
5558
}
5659

57-
/* increment msg sequence number */
58-
mutex_lock(&session->s_mutex);
59-
inc_session_sequence(session);
60-
mutex_unlock(&session->s_mutex);
61-
6260
/* lookup inode */
6361
vino.ino = le64_to_cpu(h->ino);
6462
vino.snap = CEPH_NOSNAP;
6563
inode = ceph_find_inode(sb, vino);
6664
if (!inode) {
6765
pr_warn("Failed to find inode %llu\n", vino.ino);
68-
return;
66+
goto out;
6967
}
7068
ci = ceph_inode(inode);
7169

@@ -78,6 +76,8 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
7876
spin_unlock(&ci->i_ceph_lock);
7977

8078
iput(inode);
79+
out:
80+
ceph_dec_mds_stopping_blocker(mdsc);
8181
}
8282

8383
static struct ceph_quotarealm_inode *

fs/ceph/snap.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,6 +1015,9 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
10151015
int locked_rwsem = 0;
10161016
bool close_sessions = false;
10171017

1018+
if (!ceph_inc_mds_stopping_blocker(mdsc, session))
1019+
return;
1020+
10181021
/* decode */
10191022
if (msg->front.iov_len < sizeof(*h))
10201023
goto bad;
@@ -1030,10 +1033,6 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
10301033
dout("%s from mds%d op %s split %llx tracelen %d\n", __func__,
10311034
mds, ceph_snap_op_name(op), split, trace_len);
10321035

1033-
mutex_lock(&session->s_mutex);
1034-
inc_session_sequence(session);
1035-
mutex_unlock(&session->s_mutex);
1036-
10371036
down_write(&mdsc->snap_rwsem);
10381037
locked_rwsem = 1;
10391038

@@ -1151,6 +1150,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
11511150
up_write(&mdsc->snap_rwsem);
11521151

11531152
flush_snaps(mdsc);
1153+
ceph_dec_mds_stopping_blocker(mdsc);
11541154
return;
11551155

11561156
bad:
@@ -1160,6 +1160,8 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
11601160
if (locked_rwsem)
11611161
up_write(&mdsc->snap_rwsem);
11621162

1163+
ceph_dec_mds_stopping_blocker(mdsc);
1164+
11631165
if (close_sessions)
11641166
ceph_mdsc_close_sessions(mdsc);
11651167
return;

fs/ceph/super.c

Lines changed: 70 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1462,25 +1462,90 @@ static int ceph_init_fs_context(struct fs_context *fc)
14621462
return -ENOMEM;
14631463
}
14641464

1465+
/*
1466+
* Return true if it successfully increases the blocker counter,
1467+
* or false if the mdsc is in stopping and flushed state.
1468+
*/
1469+
static bool __inc_stopping_blocker(struct ceph_mds_client *mdsc)
1470+
{
1471+
spin_lock(&mdsc->stopping_lock);
1472+
if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING) {
1473+
spin_unlock(&mdsc->stopping_lock);
1474+
return false;
1475+
}
1476+
atomic_inc(&mdsc->stopping_blockers);
1477+
spin_unlock(&mdsc->stopping_lock);
1478+
return true;
1479+
}
1480+
1481+
static void __dec_stopping_blocker(struct ceph_mds_client *mdsc)
1482+
{
1483+
spin_lock(&mdsc->stopping_lock);
1484+
if (!atomic_dec_return(&mdsc->stopping_blockers) &&
1485+
mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING)
1486+
complete_all(&mdsc->stopping_waiter);
1487+
spin_unlock(&mdsc->stopping_lock);
1488+
}
1489+
1490+
/* For metadata IO requests */
1491+
bool ceph_inc_mds_stopping_blocker(struct ceph_mds_client *mdsc,
1492+
struct ceph_mds_session *session)
1493+
{
1494+
mutex_lock(&session->s_mutex);
1495+
inc_session_sequence(session);
1496+
mutex_unlock(&session->s_mutex);
1497+
1498+
return __inc_stopping_blocker(mdsc);
1499+
}
1500+
1501+
void ceph_dec_mds_stopping_blocker(struct ceph_mds_client *mdsc)
1502+
{
1503+
__dec_stopping_blocker(mdsc);
1504+
}
1505+
14651506
static void ceph_kill_sb(struct super_block *s)
14661507
{
14671508
struct ceph_fs_client *fsc = ceph_sb_to_client(s);
1509+
struct ceph_mds_client *mdsc = fsc->mdsc;
1510+
bool wait;
14681511

14691512
dout("kill_sb %p\n", s);
14701513

1471-
ceph_mdsc_pre_umount(fsc->mdsc);
1514+
ceph_mdsc_pre_umount(mdsc);
14721515
flush_fs_workqueues(fsc);
14731516

14741517
/*
14751518
* Though the kill_anon_super() will finally trigger the
1476-
* sync_filesystem() anyway, we still need to do it here
1477-
* and then bump the stage of shutdown to stop the work
1478-
* queue as earlier as possible.
1519+
* sync_filesystem() anyway, we still need to do it here and
1520+
* then bump the stage of shutdown. This will allow us to
1521+
* drop any further message, which will increase the inodes'
1522+
* i_count reference counters but makes no sense any more,
1523+
* from MDSs.
1524+
*
1525+
* Without this when evicting the inodes it may fail in the
1526+
* kill_anon_super(), which will trigger a warning when
1527+
* destroying the fscrypt keyring and then possibly trigger
1528+
* a further crash in ceph module when the iput() tries to
1529+
* evict the inodes later.
14791530
*/
14801531
sync_filesystem(s);
14811532

1482-
fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
1533+
spin_lock(&mdsc->stopping_lock);
1534+
mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHING;
1535+
wait = !!atomic_read(&mdsc->stopping_blockers);
1536+
spin_unlock(&mdsc->stopping_lock);
1537+
1538+
if (wait && atomic_read(&mdsc->stopping_blockers)) {
1539+
long timeleft = wait_for_completion_killable_timeout(
1540+
&mdsc->stopping_waiter,
1541+
fsc->client->options->mount_timeout);
1542+
if (!timeleft) /* timed out */
1543+
pr_warn("umount timed out, %ld\n", timeleft);
1544+
else if (timeleft < 0) /* killed */
1545+
pr_warn("umount was killed, %ld\n", timeleft);
1546+
}
14831547

1548+
mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
14841549
kill_anon_super(s);
14851550

14861551
fsc->client->extra_mon_dispatch = NULL;

fs/ceph/super.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1413,4 +1413,7 @@ extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc,
14131413
struct kstatfs *buf);
14141414
extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc);
14151415

1416+
bool ceph_inc_mds_stopping_blocker(struct ceph_mds_client *mdsc,
1417+
struct ceph_mds_session *session);
1418+
void ceph_dec_mds_stopping_blocker(struct ceph_mds_client *mdsc);
14161419
#endif /* _FS_CEPH_SUPER_H */

0 commit comments

Comments
 (0)