Skip to content

Commit e7e607b

Browse files
lxbszidryomov
authored andcommitted
ceph: defer stopping mdsc delayed_work
Flushing the dirty buffer may take a long time if the cluster is overloaded or if there is network issue. So we should ping the MDSs periodically to keep alive, else the MDS will blocklist the kclient. Cc: [email protected] Link: https://tracker.ceph.com/issues/61843 Signed-off-by: Xiubo Li <[email protected]> Reviewed-by: Milind Changire <[email protected]> Signed-off-by: Ilya Dryomov <[email protected]>
1 parent 5d0c230 commit e7e607b

File tree

3 files changed

+17
-2
lines changed

3 files changed

+17
-2
lines changed

fs/ceph/mds_client.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4764,7 +4764,7 @@ static void delayed_work(struct work_struct *work)
47644764

47654765
dout("mdsc delayed_work\n");
47664766

4767-
if (mdsc->stopping)
4767+
if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED)
47684768
return;
47694769

47704770
mutex_lock(&mdsc->mutex);
@@ -4943,7 +4943,7 @@ void send_flush_mdlog(struct ceph_mds_session *s)
49434943
void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
49444944
{
49454945
dout("pre_umount\n");
4946-
mdsc->stopping = 1;
4946+
mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN;
49474947

49484948
ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true);
49494949
ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false);

fs/ceph/mds_client.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,11 @@ struct cap_wait {
380380
int want;
381381
};
382382

383+
enum {
384+
CEPH_MDSC_STOPPING_BEGIN = 1,
385+
CEPH_MDSC_STOPPING_FLUSHED = 2,
386+
};
387+
383388
/*
384389
* mds client state
385390
*/

fs/ceph/super.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1374,6 +1374,16 @@ static void ceph_kill_sb(struct super_block *s)
13741374
ceph_mdsc_pre_umount(fsc->mdsc);
13751375
flush_fs_workqueues(fsc);
13761376

1377+
/*
1378+
* Though the kill_anon_super() will finally trigger the
1379+
* sync_filesystem() anyway, we still need to do it here
1380+
* and then bump the stage of shutdown to stop the work
1381+
* queue as earlier as possible.
1382+
*/
1383+
sync_filesystem(s);
1384+
1385+
fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
1386+
13771387
kill_anon_super(s);
13781388

13791389
fsc->client->extra_mon_dispatch = NULL;

0 commit comments

Comments
 (0)