Skip to content

Commit e32fb12

Browse files
committed
qa/cephfs: check that a completed quiesce doesn't hold remote auth pins
Signed-off-by: Leonid Usov <[email protected]>
1 parent c395c78 commit e32fb12

File tree

1 file changed

+60
-2
lines changed

1 file changed

+60
-2
lines changed

qa/tasks/cephfs/test_quiesce.py

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -697,8 +697,66 @@ def test_quiesce_path_splitauth(self):
697697
op = self.fs.rank_tell(["quiesce", "path", self.subvolume, '--await'], rank=0, check_status=False)['op']
698698
self.assertEqual(op['result'], -1) # EPERM
699699

700-
@unittest.skip("https://tracker.ceph.com/issues/66152")
701-
def test_quiesce_drops_remote_authpins_on_failure(self):
700+
def test_quiesce_drops_remote_authpins_when_done(self):
701+
"""
702+
That a quiesce operation drops remote authpins after marking the node as quiesced
703+
704+
It's important that a remote quiesce doesn't stall freezing ops on the auth
705+
"""
706+
self._configure_subvolume()
707+
708+
# create two dirs for pinning
709+
self.mount_a.run_shell_payload("mkdir -p pin0 pin1")
710+
# enable export by populating the directories
711+
self.mount_a.run_shell_payload("touch pin0/export_dummy pin1/export_dummy")
712+
# pin the files to different ranks
713+
self.mount_a.setfattr("pin0", "ceph.dir.pin", "0")
714+
self.mount_a.setfattr("pin1", "ceph.dir.pin", "1")
715+
716+
# prepare the patient at rank 0
717+
self.mount_a.write_file("pin0/thefile", "I'm ready, doc")
718+
719+
# wait for the export to settle
720+
self._wait_subtrees([(f"{self.mntpnt}/pin0", 0), (f"{self.mntpnt}/pin1", 1)])
721+
722+
def reqid(cmd):
723+
J = json.loads(cmd.stdout.getvalue())
724+
J = J.get('type_data', J) # for op get
725+
J = J.get('op', J) # for quiesce path
726+
# lock path returns the op directly
727+
return self._reqid_tostr(J['reqid'])
728+
729+
def assertQuiesceOpDone(expected_done, quiesce_op, rank):
730+
cmd = self.fs.run_ceph_cmd(f"tell mds.{self.fs.name}:{rank} op get {quiesce_op}", stdout=StringIO())
731+
732+
J = json.loads(cmd.stdout.getvalue())
733+
self.assertEqual(J['type_data']['result'], 0 if expected_done else None)
734+
735+
# Take the policy lock on the auth to cause a quiesce operation to request the remote authpin
736+
# This is needed to cause the next command to block
737+
cmd = self.fs.run_ceph_cmd(f"tell mds.{self.fs.name}:0 lock path {self.mntpnt}/pin0/thefile policy:x --await", stdout=StringIO())
738+
policy_block_op = reqid(cmd)
739+
740+
# Try quiescing on the replica. This should block for the policy lock
741+
# As a side effect, it should take the remote authpin
742+
cmd = self.fs.run_ceph_cmd(f"tell mds.{self.fs.name}:1 quiesce path {self.mntpnt}/pin0/thefile", stdout=StringIO())
743+
quiesce_op = reqid(cmd)
744+
745+
# verify the quiesce is pending
746+
assertQuiesceOpDone(False, quiesce_op, rank=1)
747+
748+
# kill the op that holds the policy lock exclusively and verify the quiesce succeeds
749+
self.fs.kill_op(policy_block_op, rank=0)
750+
assertQuiesceOpDone(True, quiesce_op, rank=1)
751+
752+
# If all is good, the ap-freeze operation below should succeed
753+
# despite the quiesce_op that's still active.
754+
# We payload this with some lock that we know shouldn't block
755+
# The call below will block on freezing if the quiesce failed to release
756+
# remote authpins, and after the lifetime elapses will return ECANCELED
757+
cmd = self.fs.run_ceph_cmd(f"tell mds.{self.fs.name}:1 lock path {self.mntpnt}/pin0/thefile policy:r --ap-freeze --await --lifetime 5")
758+
759+
def test_request_drops_remote_authpins_when_waiting_for_quiescelock(self):
702760
"""
703761
That remote authpins are dropped when the request fails to acquire the quiesce lock
704762

0 commit comments

Comments
 (0)