@@ -461,18 +461,28 @@ def resumed():
461461
462462 def test_quiesce_path_link_terminal (self ):
463463 """
464- That quiesce on path with an terminal link fails with ENOTDIR even
465- pointing to a valid subvolume.
464+ That quiesce on path with an terminal link quiesces just the link inode
466465 """
467466
468467 self ._configure_subvolume ()
469468
470- self .mount_a .run_shell_payload ("ln -s ../.. subvol_quiesce" )
471- path = self .mount_a .cephfs_mntpt + "/subvol_quiesce"
469+ self .mount_a .run_shell_payload ("mkdir -p dir/" )
470+ self .mount_a .write_file ("dir/afile" , "I'm a file" )
471+ self .mount_a .run_shell_payload ("ln -s dir symlink_to_dir" )
472+ path = self .mount_a .cephfs_mntpt + "/symlink_to_dir"
472473
473- J = self .fs .rank_tell (["quiesce" , "path" , path , '--wait' ])
474- log .debug (f"{ J } " )
475- self .assertEqual (J ['op' ]['result' ], - 20 ) # ENOTDIR: the link is not a directory
474+ # MDS doesn't treat symlinks differently from regular inodes,
475+ # so quiescing one is allowed
476+ self .quiesce_and_verify (path )
477+
478+ # however, this also means that the directory this symlink points to isn't quiesced
479+ ops = self .fs .get_ops ()
480+ quiesce_inode = 0
481+ for op in ops ['ops' ]:
482+ op_name = op ['type_data' ].get ('op_name' , None )
483+ if op_name == "quiesce_inode" :
484+ quiesce_inode += 1
485+ self .assertEqual (1 , quiesce_inode )
476486
477487 def test_quiesce_path_link_intermediate (self ):
478488 """
@@ -484,7 +494,7 @@ def test_quiesce_path_link_intermediate(self):
484494 self .mount_a .run_shell_payload ("ln -s ../../.. _nogroup" )
485495 path = self .mount_a .cephfs_mntpt + "/_nogroup/" + self .QUIESCE_SUBVOLUME
486496
487- J = self .fs .rank_tell (["quiesce" , "path" , path , '--wait' ] )
497+ J = self .fs .rank_tell (["quiesce" , "path" , path , '--await' ], check_status = False )
488498 log .debug (f"{ J } " )
489499 self .assertEqual (J ['op' ]['result' ], - 20 ) # ENOTDIR: path_traverse: the intermediate link is not a directory
490500
@@ -498,24 +508,24 @@ def test_quiesce_path_notsubvol(self):
498508 self .mount_a .run_shell_payload ("mkdir dir" )
499509 path = self .mount_a .cephfs_mntpt + "/dir"
500510
501- J = self .fs .rank_tell (["quiesce" , "path" , path , '--wait' ] )
511+ J = self .fs .rank_tell (["quiesce" , "path" , path , '--await' ], check_status = False )
502512 reqid = self ._reqid_tostr (J ['op' ]['reqid' ])
503513 self ._wait_for_quiesce_complete (reqid , path = path )
504514 self ._verify_quiesce (root = path )
505515
506516 def test_quiesce_path_regfile (self ):
507517 """
508- That quiesce on a regular file fails with ENOTDIR .
518+ That quiesce on a regular file is possible .
509519 """
510520
511521 self ._configure_subvolume ()
512522
513523 self .mount_a .run_shell_payload ("touch file" )
514524 path = self .mount_a .cephfs_mntpt + "/file"
515525
516- J = self .fs .rank_tell (["quiesce" , "path" , path , '--wait' ] )
526+ J = self .fs .rank_tell (["quiesce" , "path" , path , '--await' ], check_status = False )
517527 log .debug (f"{ J } " )
518- self .assertEqual (J ['op' ]['result' ], - 20 ) # ENOTDIR
528+ self .assertEqual (J ['op' ]['result' ], 0 )
519529
520530 def test_quiesce_path_dup (self ):
521531 """
@@ -525,9 +535,9 @@ def test_quiesce_path_dup(self):
525535
526536 self ._configure_subvolume ()
527537
528- op1 = self .fs .rank_tell (["quiesce" , "path" , self .subvolume ])['op' ]
538+ op1 = self .fs .rank_tell (["quiesce" , "path" , self .subvolume ], check_status = False )['op' ]
529539 op1_reqid = self ._reqid_tostr (op1 ['reqid' ])
530- op2 = self .fs .rank_tell (["quiesce" , "path" , self .subvolume , '--wait' ] )['op' ]
540+ op2 = self .fs .rank_tell (["quiesce" , "path" , self .subvolume , '--await' ], check_status = False )['op' ]
531541 op1 = self .fs .get_op (op1_reqid )['type_data' ] # for possible dup result
532542 log .debug (f"op1 = { op1 } " )
533543 log .debug (f"op2 = { op2 } " )
@@ -545,7 +555,7 @@ def test_quiesce_blocked(self):
545555 self .mount_a .run_shell_payload ("touch file" )
546556 self .mount_a .setfattr ("file" , "ceph.quiesce.block" , "1" )
547557
548- J = self .fs .rank_tell (["quiesce" , "path" , self .subvolume , '--wait' ] )
558+ J = self .fs .rank_tell (["quiesce" , "path" , self .subvolume , '--await' ], check_status = False )
549559 log .debug (f"{ J } " )
550560 self .assertEqual (J ['op' ]['result' ], 0 )
551561 self .assertEqual (J ['state' ]['inodes_blocked' ], 1 )
@@ -560,7 +570,7 @@ def test_quiesce_slow(self):
560570 self ._configure_subvolume ()
561571 self ._client_background_workload ()
562572
563- J = self .fs .rank_tell (["quiesce" , "path" , self .subvolume ])
573+ J = self .fs .rank_tell (["quiesce" , "path" , self .subvolume ], check_status = False )
564574 log .debug (f"{ J } " )
565575 reqid = self ._reqid_tostr (J ['op' ]['reqid' ])
566576 self ._wait_for_quiesce_complete (reqid )
@@ -582,7 +592,7 @@ def test_quiesce_find(self):
582592 # drop cache
583593 self .fs .rank_tell (["cache" , "drop" ])
584594
585- J = self .fs .rank_tell (["quiesce" , "path" , self .subvolume ])
595+ J = self .fs .rank_tell (["quiesce" , "path" , self .subvolume ], check_status = False )
586596 log .debug (f"{ J } " )
587597 reqid = self ._reqid_tostr (J ['op' ]['reqid' ])
588598 self ._wait_for_quiesce_complete (reqid )
@@ -684,11 +694,69 @@ def test_quiesce_path_splitauth(self):
684694 self ._client_background_workload ()
685695 self ._wait_distributed_subtrees (2 * 2 , rank = "all" , path = self .mntpnt )
686696
687- op = self .fs .rank_tell (["quiesce" , "path" , self .subvolume , '--wait ' ], rank = 0 )['op' ]
697+ op = self .fs .rank_tell (["quiesce" , "path" , self .subvolume , '--await ' ], rank = 0 , check_status = False )['op' ]
688698 self .assertEqual (op ['result' ], - 1 ) # EPERM
689699
690- @unittest .skip ("https://tracker.ceph.com/issues/66152" )
691- def test_quiesce_drops_remote_authpins_on_failure (self ):
700+ def test_quiesce_drops_remote_authpins_when_done (self ):
701+ """
702+ That a quiesce operation drops remote authpins after marking the node as quiesced
703+
704+ It's important that a remote quiesce doesn't stall freezing ops on the auth
705+ """
706+ self ._configure_subvolume ()
707+
708+ # create two dirs for pinning
709+ self .mount_a .run_shell_payload ("mkdir -p pin0 pin1" )
710+ # enable export by populating the directories
711+ self .mount_a .run_shell_payload ("touch pin0/export_dummy pin1/export_dummy" )
712+ # pin the files to different ranks
713+ self .mount_a .setfattr ("pin0" , "ceph.dir.pin" , "0" )
714+ self .mount_a .setfattr ("pin1" , "ceph.dir.pin" , "1" )
715+
716+ # prepare the patient at rank 0
717+ self .mount_a .write_file ("pin0/thefile" , "I'm ready, doc" )
718+
719+ # wait for the export to settle
720+ self ._wait_subtrees ([(f"{ self .mntpnt } /pin0" , 0 ), (f"{ self .mntpnt } /pin1" , 1 )])
721+
722+ def reqid (cmd ):
723+ J = json .loads (cmd .stdout .getvalue ())
724+ J = J .get ('type_data' , J ) # for op get
725+ J = J .get ('op' , J ) # for quiesce path
726+ # lock path returns the op directly
727+ return self ._reqid_tostr (J ['reqid' ])
728+
729+ def assertQuiesceOpDone (expected_done , quiesce_op , rank ):
730+ cmd = self .fs .run_ceph_cmd (f"tell mds.{ self .fs .name } :{ rank } op get { quiesce_op } " , stdout = StringIO ())
731+
732+ J = json .loads (cmd .stdout .getvalue ())
733+ self .assertEqual (J ['type_data' ]['result' ], 0 if expected_done else None )
734+
735+ # Take the policy lock on the auth to cause a quiesce operation to request the remote authpin
736+ # This is needed to cause the next command to block
737+ cmd = self .fs .run_ceph_cmd (f"tell mds.{ self .fs .name } :0 lock path { self .mntpnt } /pin0/thefile policy:x --await" , stdout = StringIO ())
738+ policy_block_op = reqid (cmd )
739+
740+ # Try quiescing on the replica. This should block for the policy lock
741+ # As a side effect, it should take the remote authpin
742+ cmd = self .fs .run_ceph_cmd (f"tell mds.{ self .fs .name } :1 quiesce path { self .mntpnt } /pin0/thefile" , stdout = StringIO ())
743+ quiesce_op = reqid (cmd )
744+
745+ # verify the quiesce is pending
746+ assertQuiesceOpDone (False , quiesce_op , rank = 1 )
747+
748+ # kill the op that holds the policy lock exclusively and verify the quiesce succeeds
749+ self .fs .kill_op (policy_block_op , rank = 0 )
750+ assertQuiesceOpDone (True , quiesce_op , rank = 1 )
751+
752+ # If all is good, the ap-freeze operation below should succeed
753+ # despite the quiesce_op that's still active.
754+ # We payload this with some lock that we know shouldn't block
755+ # The call below will block on freezing if the quiesce failed to release
756+ # remote authpins, and after the lifetime elapses will return ECANCELED
757+ cmd = self .fs .run_ceph_cmd (f"tell mds.{ self .fs .name } :1 lock path { self .mntpnt } /pin0/thefile policy:r --ap-freeze --await --lifetime 5" )
758+
759+ def test_request_drops_remote_authpins_when_waiting_for_quiescelock (self ):
692760 """
693761 That remote authpins are dropped when the request fails to acquire the quiesce lock
694762
@@ -736,65 +804,71 @@ def test_quiesce_authpin_wait(self):
736804 That a quiesce_inode op with outstanding remote authpin requests can be killed.
737805 """
738806
739- self . config_set ( 'mds' , 'mds_heartbeat_grace' , '120' )
740- self .fs . set_session_timeout ( 240 ) # avoid spurious session warnings
741- self . _configure_subvolume ()
742- self .mount_a .setfattr ( "." , "ceph.dir.pin.distributed" , "1 " )
743- self . _client_background_workload ()
744- self ._wait_distributed_subtrees ( 2 * 2 , rank = "all " , path = self . mntpnt )
745- status = self .fs . status ( )
807+ # create two dirs for pinning
808+ self .mount_a . run_shell_payload ( "mkdir -p pin0 pin1" )
809+ # enable export by populating the directories
810+ self .mount_a .run_shell_payload ( "touch pin0/export_dummy pin1/export_dummy " )
811+ # pin the files to different ranks
812+ self .mount_a . setfattr ( "pin0" , "ceph.dir.pin " , "0" )
813+ self .mount_a . setfattr ( "pin1" , "ceph.dir.pin" , "1" )
746814
747- p = self . mount_a . run_shell_payload ( "ls" , stdout = StringIO ())
748- dirs = p . stdout . getvalue (). strip (). split ( )
815+ # prepare the patient at rank 0
816+ self . mount_a . write_file ( "pin0/thefile" , "I'm ready, doc" )
749817
750- # make rank 1 unresponsive to auth pin requests
751- p = self .run_ceph_cmd ( "tell " , f"mds. { self . fs . id } :1" , "lockup " , "90000" , wait = False )
818+ # wait for the export to settle
819+ self ._wait_subtrees ([( "/pin0 " , 0 ), ( "/pin1 " , 1 )] )
752820
753- qops = []
754- for d in dirs :
755- path = os .path .join (self .mntpnt , d )
756- op = self .fs .rank_tell ("quiesce" , "path" , path , rank = 0 )['op' ]
757- reqid = self ._reqid_tostr (op ['reqid' ])
758- log .info (f"created { reqid } " )
759- qops .append (reqid )
821+ path = "/pin0/thefile"
822+
823+ # take the policy lock on the file to cause remote authpin from the replica
824+ # when we get to quiescing the path
825+ op = self .fs .rank_tell ("lock" , "path" , path , "policy:x" , "--await" , rank = 0 )
826+ policy_reqid = self ._reqid_tostr (op ['reqid' ])
760827
761- def find_quiesce (blocked_on_remote_auth_pin ):
762- # verify no quiesce ops
763- ops = self .fs .get_ops (locks = False , rank = 0 , path = "/tmp/mds.0-ops" , status = status )['ops' ]
828+ # We need to simulate a freezing inode to have quiesce block on the authpin.
829+ # This can be done with authpin freeze feature, but it only works when sent from the replica.
830+ # We'll rdlock the policy lock, but it doesn't really matter as the quiesce won't get that far
831+ op = self .fs .rank_tell ("lock" , "path" , path , "policy:r" , "--ap-freeze" , rank = 1 )
832+ freeze_reqid = self ._reqid_tostr (op ['reqid' ])
833+
834+ # we should quiesce the same path from the replica side to cause the remote authpin (due to file xlock)
835+ op = self .fs .rank_tell ("quiesce" , "path" , path , rank = 1 )['op' ]
836+ quiesce_reqid = self ._reqid_tostr (op ['reqid' ])
837+
838+ def has_quiesce (* , blocked_on_remote_auth_pin ):
839+ ops = self .fs .get_ops (locks = False , rank = 1 , path = "/tmp/mds.1-ops" )['ops' ]
840+ log .debug (ops )
764841 for op in ops :
765842 type_data = op ['type_data' ]
766843 flag_point = type_data ['flag_point' ]
767- op_type = type_data ['op_type' ]
768- if op_type == 'client_request' or op_type == 'peer_request' :
769- continue
770844 if type_data ['op_name' ] == "quiesce_inode" :
771845 if blocked_on_remote_auth_pin :
772- if flag_point == "requesting remote authpins" :
773- return True
774- else :
775- return True
846+ self .assertEqual ("requesting remote authpins" , flag_point )
847+ return True
776848 return False
777849
778- with safe_while (sleep = 1 , tries = 30 , action = 'wait for quiesce op with outstanding remote authpin requests' ) as proceed :
779- while proceed ():
780- if find_quiesce (True ):
781- break
850+ # The quiesce should be pending
851+ self .assertTrue (has_quiesce (blocked_on_remote_auth_pin = True ))
782852
783- # okay, now kill all quiesce ops
784- for reqid in qops :
785- self .fs . kill_op ( reqid , rank = 0 )
853+ # even after killing the quiesce op, it should still stay pending
854+ self . fs . kill_op ( quiesce_reqid , rank = 1 )
855+ self .assertTrue ( has_quiesce ( blocked_on_remote_auth_pin = True ) )
786856
787- # verify some quiesce_inode ops still exist because authpin acks have not been received
788- if not find_quiesce ( True ):
789- self . fail ( "did not find quiesce_inode op blocked on remote authpins! (did the lockup on rank 1 complete?)" )
857+ # first, kill the policy xlock to release the freezing request
858+ self . fs . kill_op ( policy_reqid , rank = 0 )
859+ time . sleep ( 1 )
790860
791- # wait for sleep to complete
792- p .wait ()
861+ # this should have let the freezing request to progress
862+ op = self .fs .rank_tell ("op" , "get" , freeze_reqid , rank = 1 )
863+ log .debug (op )
864+ self .assertEqual (0 , op ['type_data' ]['result' ])
865+
866+ # now unfreeze the inode
867+ self .fs .kill_op (freeze_reqid , rank = 1 )
868+ time .sleep (1 )
793869
794- with safe_while (sleep = 1 , tries = 30 , action = 'wait for quiesce kill' ) as proceed :
795- while proceed ():
796- if not find_quiesce (False ):
797- break
870+ # the quiesce op should be gone
871+ self .assertFalse (has_quiesce (blocked_on_remote_auth_pin = False ))
798872
799873 def test_quiesce_block_file_replicated (self ):
800874 """
@@ -934,8 +1008,8 @@ def test_quiesce_path_multirank_exports(self):
9341008
9351009 sleep (2 )
9361010
937- op0 = self .fs .rank_tell (["quiesce" , "path" , self .subvolume ], rank = 0 )['op' ]
938- op1 = self .fs .rank_tell (["quiesce" , "path" , self .subvolume ], rank = 1 )['op' ]
1011+ op0 = self .fs .rank_tell (["quiesce" , "path" , self .subvolume ], rank = 0 , check_status = False )['op' ]
1012+ op1 = self .fs .rank_tell (["quiesce" , "path" , self .subvolume ], rank = 1 , check_status = False )['op' ]
9391013 reqid0 = self ._reqid_tostr (op0 ['reqid' ])
9401014 reqid1 = self ._reqid_tostr (op1 ['reqid' ])
9411015 op0 = self ._wait_for_quiesce_complete (reqid0 , rank = 0 , timeout = 300 )
0 commit comments