1515from textwrap import dedent
1616
1717from teuthology .exceptions import CommandFailedError
18+ from teuthology import contextutil
19+ from tasks .cephfs .filesystem import FSDamaged
1820from tasks .cephfs .cephfs_test_case import CephFSTestCase , for_teuthology
1921
2022log = logging .getLogger (__name__ )
@@ -84,6 +86,18 @@ def damage(self):
8486 pool = self ._filesystem .get_metadata_pool_name ()
8587 self ._filesystem .rados (["purge" , pool , '--yes-i-really-really-mean-it' ])
8688
89+ def is_damaged (self ):
90+ sleep = 2
91+ timeout = 120
92+ with contextutil .safe_while (sleep = sleep , tries = timeout / sleep ) as proceed :
93+ while proceed ():
94+ try :
95+ self ._filesystem .wait_for_daemons ()
96+ except FSDamaged as e :
97+ if 0 in e .ranks :
98+ return True
99+ return False
100+
87101 def flush (self ):
88102 """
89103 Called after client unmount, after write: flush whatever you want
@@ -150,6 +164,90 @@ def validate(self):
150164 self .assert_equal (target , "symdir/onemegs" )
151165 return self ._errors
152166
167+ class NestedDirWorkload (Workload ):
168+ """
169+ Nested directories, one is lost.
170+ """
171+
172+ def write (self ):
173+ self ._mount .run_shell_payload ("mkdir -p dir_x/dir_xx/dir_xxx/" )
174+ self ._mount .run_shell_payload ("dd if=/dev/urandom of=dir_x/dir_xx/dir_xxx/file_y conv=fsync bs=1 count=1" )
175+ self ._initial_state = self ._filesystem .read_cache ("dir_x/dir_xx" , depth = 0 )
176+
177+ def damage (self ):
178+ dirfrag_obj = "{0:x}.00000000" .format (self ._initial_state [0 ]['ino' ])
179+ self ._filesystem .radosm (["rm" , dirfrag_obj ])
180+
181+ def is_damaged (self ):
182+ # workload runner expects MDS to be offline
183+ self ._filesystem .fail ()
184+ return True
185+
186+ def validate (self ):
187+ self ._mount .run_shell_payload ("find dir_x -execdir stat {} +" )
188+ self ._mount .run_shell_payload ("stat dir_x/dir_xx/dir_xxx/file_y" )
189+ return self ._errors
190+
191+ class NestedDirWorkloadRename (Workload ):
192+ """
193+ Nested directories, one is lost. With renames.
194+ """
195+
196+ def write (self ):
197+ self ._mount .run_shell_payload ("mkdir -p dir_x/dir_xx/dir_xxx/; mkdir -p dir_y" )
198+ self ._mount .run_shell_payload ("dd if=/dev/urandom of=dir_x/dir_xx/dir_xxx/file_y conv=fsync bs=1 count=1" )
199+ self ._initial_state = self ._filesystem .read_cache ("dir_x/dir_xx" , depth = 0 )
200+ self ._filesystem .flush ()
201+ self ._mount .run_shell_payload ("mv dir_x/dir_xx dir_y/dir_yy; sync dir_y" )
202+
203+ def damage (self ):
204+ dirfrag_obj = "{0:x}.00000000" .format (self ._initial_state [0 ]['ino' ])
205+ self ._filesystem .radosm (["rm" , dirfrag_obj ])
206+
207+ def is_damaged (self ):
208+ # workload runner expects MDS to be offline
209+ self ._filesystem .fail ()
210+ return True
211+
212+ def validate (self ):
213+ self ._mount .run_shell_payload ("find . -execdir stat {} +" )
214+ self ._mount .run_shell_payload ("stat dir_y/dir_yy/dir_xxx/file_y" )
215+ return self ._errors
216+
217+ class NestedDoubleDirWorkloadRename (Workload ):
218+ """
219+ Nested directories, two lost with backtraces to rebuild. With renames.
220+ """
221+
222+ def write (self ):
223+ self ._mount .run_shell_payload ("mkdir -p dir_x/dir_xx/dir_xxx/; mkdir -p dir_y" )
224+ self ._mount .run_shell_payload ("dd if=/dev/urandom of=dir_x/dir_xx/dir_xxx/file_y conv=fsync bs=1 count=1" )
225+ self ._initial_state = []
226+ self ._initial_state .append (self ._filesystem .read_cache ("dir_x/dir_xx" , depth = 0 ))
227+ self ._initial_state .append (self ._filesystem .read_cache ("dir_y" , depth = 0 ))
228+ self ._filesystem .flush ()
229+ self ._mount .run_shell_payload ("""
230+ mv dir_x/dir_xx dir_y/dir_yy
231+ sync dir_y
232+ dd if=/dev/urandom of=dir_y/dir_yy/dir_xxx/file_z conv=fsync bs=1 count=1
233+ """ )
234+
235+ def damage (self ):
236+ for o in self ._initial_state :
237+ dirfrag_obj = "{0:x}.00000000" .format (o [0 ]['ino' ])
238+ self ._filesystem .radosm (["rm" , dirfrag_obj ])
239+
240+ def is_damaged (self ):
241+ # workload runner expects MDS to be offline
242+ self ._filesystem .fail ()
243+ return True
244+
245+ def validate (self ):
246+ self ._mount .run_shell_payload ("find . -execdir stat {} +" )
247+ # during recovery: we may get dir_x/dir_xx or dir_y/dir_yy; depending on rados pg iteration order
248+ self ._mount .run_shell_payload ("stat dir_y/dir_yy/dir_xxx/file_y || stat dir_x/dir_xx/dir_xxx/file_y" )
249+ return self ._errors
250+
153251
154252class MovedFile (Workload ):
155253 def write (self ):
@@ -319,37 +417,6 @@ def validate(self):
319417 return self ._errors
320418
321419
322- class MovedDir (Workload ):
323- def write (self ):
324- # Create a nested dir that we will then move. Two files with two different
325- # backtraces referring to the moved dir, claiming two different locations for
326- # it. We will see that only one backtrace wins and the dir ends up with
327- # single linkage.
328- self ._mount .run_shell (["mkdir" , "-p" , "grandmother/parent" ])
329- self ._mount .write_n_mb ("grandmother/parent/orig_pos_file" , 1 )
330- self ._filesystem .mds_asok (["flush" , "journal" ])
331- self ._mount .run_shell (["mkdir" , "grandfather" ])
332- self ._mount .run_shell (["mv" , "grandmother/parent" , "grandfather" ])
333- self ._mount .write_n_mb ("grandfather/parent/new_pos_file" , 2 )
334- self ._filesystem .mds_asok (["flush" , "journal" ])
335-
336- self ._initial_state = (
337- self ._mount .stat ("grandfather/parent/orig_pos_file" ),
338- self ._mount .stat ("grandfather/parent/new_pos_file" )
339- )
340-
341- def validate (self ):
342- root_files = self ._mount .ls ()
343- self .assert_equal (len (root_files ), 1 )
344- self .assert_equal (root_files [0 ] in ["grandfather" , "grandmother" ], True )
345- winner = root_files [0 ]
346- st_opf = self ._mount .stat (f"{ winner } /parent/orig_pos_file" , sudo = True )
347- st_npf = self ._mount .stat (f"{ winner } /parent/new_pos_file" , sudo = True )
348-
349- self .assert_equal (st_opf ['st_size' ], self ._initial_state [0 ]['st_size' ])
350- self .assert_equal (st_npf ['st_size' ], self ._initial_state [1 ]['st_size' ])
351-
352-
353420class MissingZerothObject (Workload ):
354421 def write (self ):
355422 self ._mount .run_shell (["mkdir" , "subdir" ])
@@ -391,10 +458,6 @@ def validate(self):
391458class TestDataScan (CephFSTestCase ):
392459 MDSS_REQUIRED = 2
393460
394- def is_marked_damaged (self , rank ):
395- mds_map = self .fs .get_mds_map ()
396- return rank in mds_map ['damaged' ]
397-
398461 def _rebuild_metadata (self , workload , workers = 1 , unmount = True ):
399462 """
400463 That when all objects in metadata pool are removed, we can rebuild a metadata pool
@@ -416,28 +479,20 @@ def _rebuild_metadata(self, workload, workers=1, unmount=True):
416479
417480 # After recovery, we need the MDS to not be strict about stats (in production these options
418481 # are off by default, but in QA we need to explicitly disable them)
419- self .fs . set_ceph_conf ('mds' , 'mds verify scatter' , False )
420- self .fs . set_ceph_conf ('mds' , 'mds debug scatterstat' , False )
482+ self .config_set ('mds' , 'mds verify scatter' , False )
483+ self .config_set ('mds' , 'mds debug scatterstat' , False )
421484
422485 # Apply any data damage the workload wants
423486 workload .damage ()
424487
425488 # Reset the MDS map in case multiple ranks were in play: recovery procedure
426489 # only understands how to rebuild metadata under rank 0
427490 self .fs .reset ()
491+ self .assertEqual (self .fs .get_var ('max_mds' ), 1 )
428492
429493 self .fs .set_joinable () # redundant with reset
430494
431- def get_state (mds_id ):
432- info = self .mds_cluster .get_mds_info (mds_id )
433- return info ['state' ] if info is not None else None
434-
435- self .wait_until_true (lambda : self .is_marked_damaged (0 ), 60 )
436- for mds_id in self .fs .mds_ids :
437- self .wait_until_equal (
438- lambda : get_state (mds_id ),
439- "up:standby" ,
440- timeout = 60 )
495+ self .assertTrue (workload .is_damaged ())
441496
442497 self .fs .table_tool ([self .fs .name + ":0" , "reset" , "session" ])
443498 self .fs .table_tool ([self .fs .name + ":0" , "reset" , "snap" ])
@@ -450,7 +505,7 @@ def get_state(mds_id):
450505 self .fs .journal_tool (["journal" , "reset" , "--yes-i-really-really-mean-it" ], 0 )
451506
452507 self .fs .journal_tool (["journal" , "reset" , "--force" , "--yes-i-really-really-mean-it" ], 0 )
453- self .fs .data_scan (["init" ])
508+ self .fs .data_scan (["init" , "--force-init" ])
454509 self .fs .data_scan (["scan_extents" ], worker_count = workers )
455510 self .fs .data_scan (["scan_inodes" ], worker_count = workers )
456511 self .fs .data_scan (["scan_links" ])
@@ -461,7 +516,7 @@ def get_state(mds_id):
461516 self .run_ceph_cmd ('mds' , 'repaired' , '0' )
462517
463518 # Start the MDS
464- self .fs .mds_restart ()
519+ self .fs .set_joinable () # necessary for some tests without damage
465520 self .fs .wait_for_daemons ()
466521 log .info (str (self .mds_cluster .status ()))
467522
@@ -490,6 +545,15 @@ def test_rebuild_simple(self):
490545 def test_rebuild_symlink (self ):
491546 self ._rebuild_metadata (SymlinkWorkload (self .fs , self .mount_a ))
492547
548+ def test_rebuild_nested (self ):
549+ self ._rebuild_metadata (NestedDirWorkload (self .fs , self .mount_a ))
550+
551+ def test_rebuild_nested_rename (self ):
552+ self ._rebuild_metadata (NestedDirWorkloadRename (self .fs , self .mount_a ))
553+
554+ def test_rebuild_nested_double_rename (self ):
555+ self ._rebuild_metadata (NestedDoubleDirWorkloadRename (self .fs , self .mount_a ))
556+
493557 def test_rebuild_moved_file (self ):
494558 self ._rebuild_metadata (MovedFile (self .fs , self .mount_a ))
495559
@@ -499,9 +563,6 @@ def test_rebuild_backtraceless(self):
499563 def test_rebuild_backtraceless_with_lf_dir_removed (self ):
500564 self ._rebuild_metadata (BacktracelessFileRemoveLostAndFoundDirectory (self .fs , self .mount_a ), unmount = False )
501565
502- def test_rebuild_moved_dir (self ):
503- self ._rebuild_metadata (MovedDir (self .fs , self .mount_a ))
504-
505566 def test_rebuild_missing_zeroth (self ):
506567 self ._rebuild_metadata (MissingZerothObject (self .fs , self .mount_a ))
507568
@@ -723,10 +784,10 @@ def test_rebuild_inotable(self):
723784 mds0_id = active_mds_names [0 ]
724785 mds1_id = active_mds_names [1 ]
725786
726- self .mount_a .run_shell ([ "mkdir" , " dir1" ] )
787+ self .mount_a .run_shell_payload ( "mkdir -p dir1/dir2" )
727788 dir_ino = self .mount_a .path_to_ino ("dir1" )
728789 self .mount_a .setfattr ("dir1" , "ceph.dir.pin" , "1" )
729- # wait for subtree migration
790+ self . _wait_subtrees ([( '/dir1' , 1 )], rank = 1 )
730791
731792 file_ino = 0 ;
732793 while True :
0 commit comments