|
3 | 3 | import logging |
4 | 4 | import operator |
5 | 5 | from random import randint, choice |
| 6 | +from json import loads as json_loads |
6 | 7 |
|
7 | 8 | from tasks.cephfs.cephfs_test_case import CephFSTestCase |
8 | 9 | from teuthology.exceptions import CommandFailedError |
| 10 | +from teuthology.contextutil import safe_while |
9 | 11 | from tasks.cephfs.fuse_mount import FuseMount |
10 | 12 |
|
11 | 13 | log = logging.getLogger(__name__) |
@@ -520,7 +522,8 @@ def test_connect_bootstrapping(self): |
520 | 522 |
|
521 | 523 |
|
522 | 524 | class TestStandbyReplay(CephFSTestCase): |
523 | | - CLIENTS_REQUIRED = 0 |
| 525 | + |
| 526 | + CLIENTS_REQUIRED = 1 |
524 | 527 | MDSS_REQUIRED = 4 |
525 | 528 |
|
526 | 529 | def _confirm_no_replay(self): |
@@ -706,6 +709,72 @@ def test_rank_stopped(self): |
706 | 709 | status = self._confirm_single_replay() |
707 | 710 | self.assertTrue(standby_count, len(list(status.get_standbys()))) |
708 | 711 |
|
| 712 | + def test_health_warn_oversize_cache_has_no_counters(self): |
| 713 | + ''' |
| 714 | + Test that when MDS cache size crosses the limit, health warning |
| 715 | + printed for standy-replay MDS doesn't include inode and stray |
| 716 | + counters. |
| 717 | +
|
| 718 | + Tests: https://tracker.ceph.com/issues/63514 |
| 719 | + ''' |
| 720 | + # reduce MDS cache limit, default MDS cache limit is too high which |
| 721 | + # will unnecessarily consume too many resources and too much time. |
| 722 | + self.config_set('mds', 'mds_cache_memory_limit', '1K') |
| 723 | + # health warning for crossing MDS cache size limit won't be raised |
| 724 | + # until a threshold. default threshold is too high. it will |
| 725 | + # unnecessarily consume so much time and resources. |
| 726 | + self.config_set('mds', 'mds_health_cache_threshold', '1.000001') |
| 727 | + # so that there is only active MDS and only 1 health warning is |
| 728 | + # produced. presence of 2 warning should cause this test to fail |
| 729 | + self.fs.set_max_mds(1) |
| 730 | + self.fs.set_allow_standby_replay(True) |
| 731 | + self._confirm_single_replay() |
| 732 | + self.fs.wait_for_daemons() |
| 733 | + # The call above (to self.fs.wait_for_daemons()) should ensure we have |
| 734 | + # only 1 active MDS on cluster |
| 735 | + active_mds_id = self.fs.get_active_names()[0] |
| 736 | + sr_mds_id = self.fs.get_standby_replay_names()[0] |
| 737 | + |
| 738 | + # this should generate more than enough MDS cache to trigger health |
| 739 | + # warning MDS_CACHE_OVERSIZED. |
| 740 | + self.mount_a.open_n_background(".", 400) |
| 741 | + |
| 742 | + # actual test begins now... |
| 743 | + with safe_while(sleep=3, tries=10) as proceed: |
| 744 | + while proceed(): |
| 745 | + # logging cache generated so far for th sake of easy |
| 746 | + # debugging in future. |
| 747 | + self.get_ceph_cmd_stdout(f'tell mds.{active_mds_id} cache ' |
| 748 | + 'status') |
| 749 | + |
| 750 | + health_report = self.get_ceph_cmd_stdout('health detail ' |
| 751 | + '--format json') |
| 752 | + health_report = json_loads(health_report) |
| 753 | + if 'MDS_CACHE_OVERSIZED' not in health_report['checks']: |
| 754 | + log.debug('warning hasn\'t appeared in health report yet.' |
| 755 | + 'trying again after some sleep...') |
| 756 | + continue |
| 757 | + |
| 758 | + cache_warn = health_report['checks']['MDS_CACHE_OVERSIZED']\ |
| 759 | + ['detail'] |
| 760 | + log.debug(f'cache_warn - {cache_warn}') |
| 761 | + # sanity check: "ceph health detail" output should've 2 |
| 762 | + # warnings -- one for active MDS and other for standby-replay |
| 763 | + # MDS. |
| 764 | + if len(cache_warn) != 2: |
| 765 | + log.debug('expected 2 warnings but instead found ' |
| 766 | + f'{len(cache_warn)} warnings; trying again ' |
| 767 | + 'after some sleep...') |
| 768 | + continue |
| 769 | + |
| 770 | + for cw in cache_warn: |
| 771 | + msg = cw['message'] |
| 772 | + if f'mds.{sr_mds_id}' not in cw['message']: |
| 773 | + continue |
| 774 | + self.assertNotIn('inodes in use by clients', msg) |
| 775 | + self.assertNotIn('stray files', msg) |
| 776 | + return |
| 777 | + |
709 | 778 |
|
710 | 779 | class TestMultiFilesystems(CephFSTestCase): |
711 | 780 | CLIENTS_REQUIRED = 2 |
|
0 commit comments