Skip to content

Commit 3db3b4e

Browse files
committed
qa: add file sync stuck test support
This will test the file sync of a directory, which maybe stuck for at most 5 seconds. This was because the related code will wait for all the unsafe requests to get safe reply from MDSes, but the MDSes just think that it's unnecessary to flush the mdlog immediately after early reply, and the mdlog will be flushed every 5 seconds in the tick thread. This should have been fixed in kclient and libcephfs by triggering mdlog flush before waiting requests' safe reply. Fixes: https://tracker.ceph.com/issues/55283 Signed-off-by: Xiubo Li <[email protected]>
1 parent b6fc548 commit 3db3b4e

File tree

1 file changed

+39
-11
lines changed

1 file changed

+39
-11
lines changed

qa/tasks/cephfs/test_misc.py

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from tasks.cephfs.fuse_mount import FuseMount
44
from tasks.cephfs.cephfs_test_case import CephFSTestCase
55
from teuthology.exceptions import CommandFailedError
6+
from textwrap import dedent
67
import errno
78
import platform
89
import time
@@ -236,35 +237,62 @@ def test_fs_lsflags(self):
236237
self.assertEqual(lsflags["allow_multimds_snaps"], True)
237238
self.assertEqual(lsflags["allow_standby_replay"], True)
238239

239-
def test_filesystem_sync_stuck_for_around_5s(self):
240-
"""
241-
To check whether the fsync will be stuck to wait for the mdlog to be
242-
flushed for at most 5 seconds.
243-
"""
244-
245-
dir_path = "fsync_do_not_wait_mdlog_testdir"
240+
def _test_sync_stuck_for_around_5s(self, dir_path, file_sync=False):
246241
self.mount_a.run_shell(["mkdir", dir_path])
247242

243+
sync_dir_pyscript = dedent("""
244+
import os
245+
246+
path = "{path}"
247+
dfd = os.open(path, os.O_DIRECTORY)
248+
os.fsync(dfd)
249+
os.close(dfd)
250+
""".format(path=dir_path))
251+
248252
# run create/delete directories and test the sync time duration
249253
for i in range(300):
250254
for j in range(5):
251255
self.mount_a.run_shell(["mkdir", os.path.join(dir_path, f"{i}_{j}")])
252256
start = time.time()
253-
self.mount_a.run_shell(["sync"])
257+
if file_sync:
258+
self.mount_a.run_shell(['python3', '-c', sync_dir_pyscript])
259+
else:
260+
self.mount_a.run_shell(["sync"])
254261
duration = time.time() - start
255-
log.info(f"mkdir i = {i}, duration = {duration}")
262+
log.info(f"sync mkdir i = {i}, duration = {duration}")
256263
self.assertLess(duration, 4)
257264

258265
for j in range(5):
259266
self.mount_a.run_shell(["rm", "-rf", os.path.join(dir_path, f"{i}_{j}")])
260267
start = time.time()
261-
self.mount_a.run_shell(["sync"])
268+
if file_sync:
269+
self.mount_a.run_shell(['python3', '-c', sync_dir_pyscript])
270+
else:
271+
self.mount_a.run_shell(["sync"])
262272
duration = time.time() - start
263-
log.info(f"rmdir i = {i}, duration = {duration}")
273+
log.info(f"sync rmdir i = {i}, duration = {duration}")
264274
self.assertLess(duration, 4)
265275

266276
self.mount_a.run_shell(["rm", "-rf", dir_path])
267277

278+
def test_filesystem_sync_stuck_for_around_5s(self):
279+
"""
280+
To check whether the fsync will be stuck to wait for the mdlog to be
281+
flushed for at most 5 seconds.
282+
"""
283+
284+
dir_path = "filesystem_sync_do_not_wait_mdlog_testdir"
285+
self._test_sync_stuck_for_around_5s(dir_path)
286+
287+
def test_file_sync_stuck_for_around_5s(self):
288+
"""
289+
To check whether the filesystem sync will be stuck to wait for the
290+
mdlog to be flushed for at most 5 seconds.
291+
"""
292+
293+
dir_path = "file_sync_do_not_wait_mdlog_testdir"
294+
self._test_sync_stuck_for_around_5s(dir_path, True)
295+
268296

269297
class TestCacheDrop(CephFSTestCase):
270298
CLIENTS_REQUIRED = 1

0 commit comments

Comments
 (0)