1616 gen_osd_cap_str , gen_mds_cap_str )
1717
1818log = logging .getLogger (__name__ )
19+ MDS_RESTART_GRACE = 60
1920
2021class TestLabeledPerfCounters (CephFSTestCase ):
2122 CLIENTS_REQUIRED = 2
2223 MDSS_REQUIRED = 1
2324
24- def test_per_client_labeled_perf_counters (self ):
25+ def _get_counters_for (self , filesystem , client_id ):
26+ dump = self .fs .rank_tell (["counter" , "dump" ])
27+ per_client_metrics_key = f'mds_client_metrics-{ filesystem } '
28+ counters = [c ["counters" ] for \
29+ c in dump [per_client_metrics_key ] if c ["labels" ]["client" ] == client_id ]
30+ return counters [0 ]
31+
32+ def test_per_client_labeled_perf_counters_on_client_disconnect (self ):
33+ """
34+ That the per-client labelled metrics are unavailable during client disconnect
35+ """
36+ mount_a_id = f'client.{ self .mount_a .get_global_id ()} '
37+ self .mount_a .teardown ()
38+ with safe_while (sleep = 1 , tries = 30 , action = f'wait for counters - { mount_a_id } ' ) as proceed :
39+ while proceed ():
40+ dump = self .fs .rank_tell (["counter" , "dump" ])
41+ per_client_metrics_key = f"mds_client_metrics-{ dump ['mds_client_metrics' ][0 ]['labels' ]['fs_name' ]} "
42+ clients = [c ["labels" ]["client" ] for c in dump .get (per_client_metrics_key , {})]
43+ if clients and mount_a_id not in clients :
44+ # success, no metrics.
45+ return True
46+
47+ def test_per_client_labeled_perf_counters_on_client_reconnect (self ):
2548 """
26- That the per-client labelled perf counters depict the clients
27- performaing IO.
49+ That the per-client labelled metrics are generated during client reconnect
2850 """
29- def get_counters_for (filesystem , client_id ):
30- dump = self .fs .rank_tell (["counter" , "dump" ])
31- per_client_metrics_key = f'mds_client_metrics-{ filesystem } '
32- counters = [c ["counters" ] for \
33- c in dump [per_client_metrics_key ] if c ["labels" ]["client" ] == client_id ]
34- return counters [0 ]
51+ # fail active mds and wait for reconnect
52+ mds = self .fs .get_active_names ()[0 ]
53+ self .mds_cluster .mds_fail (mds )
54+ self .fs .wait_for_state ('up:active' , rank = 0 , timeout = MDS_RESTART_GRACE )
55+ mount_a_id = f'client.{ self .mount_a .get_global_id ()} '
56+ mount_b_id = f'client.{ self .mount_b .get_global_id ()} '
57+ fs_suffix = ""
58+
59+ with safe_while (sleep = 1 , tries = 30 , action = 'wait for counters' ) as proceed :
60+ while proceed ():
61+ dump = self .fs .rank_tell (["counter" , "dump" ])
62+ fs_suffix = dump ['mds_client_metrics' ][0 ]['labels' ]['fs_name' ]
63+ per_client_metrics_key = f"mds_client_metrics-{ fs_suffix } "
64+ clients = [c ["labels" ]["client" ] for c in dump .get (per_client_metrics_key , {})]
65+ if mount_a_id in clients and mount_b_id in clients :
66+ # success, got metrics.
67+ break # break to continue the test
68+
69+ # Post reconnecting, validate the io perf counters
70+ # write workload
71+ self .mount_a .create_n_files ("test_dir/test_file" , 1000 , sync = True )
72+ with safe_while (sleep = 1 , tries = 30 , action = f'wait for counters - { mount_a_id } ' ) as proceed :
73+ while proceed ():
74+ counters_dump_a = self ._get_counters_for (fs_suffix , mount_a_id )
75+ if counters_dump_a ["total_write_ops" ] > 0 and counters_dump_a ["total_write_size" ] > 0 and \
76+ counters_dump_a ["avg_write_latency" ] >= 0 and counters_dump_a ["avg_metadata_latency" ] >= 0 and \
77+ counters_dump_a ["opened_files" ] >= 0 and counters_dump_a ["opened_inodes" ] > 0 and \
78+ counters_dump_a ["cap_hits" ] > 0 and counters_dump_a ["dentry_lease_hits" ] > 0 and \
79+ counters_dump_a ["pinned_icaps" ] > 0 :
80+ break # break to continue the test
81+
82+ # read from the other client
83+ for i in range (100 ):
84+ self .mount_b .open_background (basename = f'test_dir/test_file_{ i } ' , write = False )
85+ with safe_while (sleep = 1 , tries = 30 , action = f'wait for counters - { mount_b_id } ' ) as proceed :
86+ while proceed ():
87+ counters_dump_b = self ._get_counters_for (fs_suffix , mount_b_id )
88+ if counters_dump_b ["total_read_ops" ] >= 0 and counters_dump_b ["total_read_size" ] >= 0 and \
89+ counters_dump_b ["avg_read_latency" ] >= 0 and counters_dump_b ["avg_metadata_latency" ] >= 0 and \
90+ counters_dump_b ["opened_files" ] >= 0 and counters_dump_b ["opened_inodes" ] >= 0 and \
91+ counters_dump_b ["cap_hits" ] > 0 and counters_dump_a ["dentry_lease_hits" ] > 0 and \
92+ counters_dump_b ["pinned_icaps" ] > 0 :
93+ break # break to continue the test
94+ self .mount_a .teardown ()
95+ self .mount_b .teardown ()
3596
97+ def test_per_client_labeled_perf_counters_io (self ):
98+ """
99+ That the per-client labelled perf counters depict the clients performing IO.
100+ """
36101 # sleep a bit so that we get updated clients...
37102 sleep (10 )
38103
@@ -53,21 +118,29 @@ def get_counters_for(filesystem, client_id):
53118 # write workload
54119 self .mount_a .create_n_files ("test_dir/test_file" , 1000 , sync = True )
55120 with safe_while (sleep = 1 , tries = 30 , action = f'wait for counters - { mount_a_id } ' ) as proceed :
56- counters_dump_a = get_counters_for (fs_suffix , mount_a_id )
57121 while proceed ():
58- if counters_dump_a ["total_write_ops" ] > 0 and counters_dump_a ["total_write_size" ] > 0 :
59- return True
122+ counters_dump_a = self ._get_counters_for (fs_suffix , mount_a_id )
123+ if counters_dump_a ["total_write_ops" ] > 0 and counters_dump_a ["total_write_size" ] > 0 and \
124+ counters_dump_a ["avg_write_latency" ] >= 0 and counters_dump_a ["avg_metadata_latency" ] >= 0 and \
125+ counters_dump_a ["opened_files" ] >= 0 and counters_dump_a ["opened_inodes" ] > 0 and \
126+ counters_dump_a ["cap_hits" ] > 0 and counters_dump_a ["dentry_lease_hits" ] > 0 and \
127+ counters_dump_a ["pinned_icaps" ] > 0 :
128+ break # break to continue the test
60129
61130 # read from the other client
62131 for i in range (100 ):
63132 self .mount_b .open_background (basename = f'test_dir/test_file_{ i } ' , write = False )
64133 with safe_while (sleep = 1 , tries = 30 , action = f'wait for counters - { mount_b_id } ' ) as proceed :
65- counters_dump_b = get_counters_for (fs_suffix , mount_b_id )
66134 while proceed ():
67- if counters_dump_b ["total_read_ops" ] > 0 and counters_dump_b ["total_read_size" ] > 0 :
68- return True
69-
70- self .fs .teardown ()
135+ counters_dump_b = self ._get_counters_for (fs_suffix , mount_b_id )
136+ if counters_dump_b ["total_read_ops" ] >= 0 and counters_dump_b ["total_read_size" ] >= 0 and \
137+ counters_dump_b ["avg_read_latency" ] >= 0 and counters_dump_b ["avg_metadata_latency" ] >= 0 and \
138+ counters_dump_b ["opened_files" ] >= 0 and counters_dump_b ["opened_inodes" ] >= 0 and \
139+ counters_dump_b ["cap_hits" ] > 0 and counters_dump_a ["dentry_lease_hits" ] > 0 and \
140+ counters_dump_b ["pinned_icaps" ] > 0 :
141+ break # break to continue the test
142+ self .mount_a .teardown ()
143+ self .mount_b .teardown ()
71144
72145class TestAdminCommands (CephFSTestCase ):
73146 """
0 commit comments