Skip to content

Commit b6fc548

Browse files
committed
qa: add filesystem sync stuck test support
This will test the sync of the filesystem, which maybe stuck for at most 5 seconds. This was because the related code will wait for all the unsafe requests to get safe reply from MDSes, but the MDSes just think that it's unnecessary to flush the mdlog immediately after early reply, and the mdlog will be flushed every 5 seconds in the tick thread. This should have been fixed in kclient and libcephfs by triggering mdlog flush before waiting requests' safe reply. Fixes: https://tracker.ceph.com/issues/55283 Signed-off-by: Xiubo Li <[email protected]>
1 parent e43ba4c commit b6fc548

File tree

1 file changed

+31
-0
lines changed

1 file changed

+31
-0
lines changed

qa/tasks/cephfs/test_misc.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import time
99
import json
1010
import logging
11+
import os
1112

1213
log = logging.getLogger(__name__)
1314

@@ -235,6 +236,36 @@ def test_fs_lsflags(self):
235236
self.assertEqual(lsflags["allow_multimds_snaps"], True)
236237
self.assertEqual(lsflags["allow_standby_replay"], True)
237238

239+
def test_filesystem_sync_stuck_for_around_5s(self):
240+
"""
241+
To check whether the fsync will be stuck to wait for the mdlog to be
242+
flushed for at most 5 seconds.
243+
"""
244+
245+
dir_path = "fsync_do_not_wait_mdlog_testdir"
246+
self.mount_a.run_shell(["mkdir", dir_path])
247+
248+
# run create/delete directories and test the sync time duration
249+
for i in range(300):
250+
for j in range(5):
251+
self.mount_a.run_shell(["mkdir", os.path.join(dir_path, f"{i}_{j}")])
252+
start = time.time()
253+
self.mount_a.run_shell(["sync"])
254+
duration = time.time() - start
255+
log.info(f"mkdir i = {i}, duration = {duration}")
256+
self.assertLess(duration, 4)
257+
258+
for j in range(5):
259+
self.mount_a.run_shell(["rm", "-rf", os.path.join(dir_path, f"{i}_{j}")])
260+
start = time.time()
261+
self.mount_a.run_shell(["sync"])
262+
duration = time.time() - start
263+
log.info(f"rmdir i = {i}, duration = {duration}")
264+
self.assertLess(duration, 4)
265+
266+
self.mount_a.run_shell(["rm", "-rf", dir_path])
267+
268+
238269
class TestCacheDrop(CephFSTestCase):
239270
CLIENTS_REQUIRED = 1
240271

0 commit comments

Comments
 (0)