Skip to content

Commit 1327fc8

Browse files
salieri11vshankar
authored andcommitted
test: add subvolume metrics sanity test
Signed-off-by: Igor Golikov <[email protected]> Fixes: https://tracker.ceph.com/issues/68929
1 parent 13d9baf commit 1327fc8

File tree

3 files changed

+125
-2
lines changed

3 files changed

+125
-2
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
overrides:
2+
install:
3+
extra_system_packages:
4+
rpm:
5+
- fio
6+
deb:
7+
- fio
8+
tasks:
9+
- cephfs_test_runner:
10+
fail_on_skip: false
11+
modules:
12+
- tasks.cephfs.test_subvolume.TestSubvolumeMetrics

qa/tasks/cephfs/mount.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1713,4 +1713,7 @@ def validate_subvol_options(self):
17131713
path_to_mount = subvol_paths[mount_subvol_num]
17141714
self.cephfs_mntpt = path_to_mount
17151715

1716+
def get_mount_point(self):
1717+
return self.hostfs_mntpt
1718+
17161719
CephFSMount = CephFSMountBase

qa/tasks/cephfs/test_subvolume.py

Lines changed: 110 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import logging
22
from time import sleep
3+
import os
34

45
from tasks.cephfs.cephfs_test_case import CephFSTestCase
56
from teuthology.exceptions import CommandFailedError
7+
from teuthology.contextutil import safe_while
68

79
log = logging.getLogger(__name__)
810

@@ -16,6 +18,7 @@ def setUp(self):
1618
self.setup_test()
1719

1820
def tearDown(self):
21+
#pass
1922
# clean up
2023
self.cleanup_test()
2124
super().tearDown()
@@ -185,7 +188,7 @@ def test_subvolume_create_snapshot_inside_new_subvolume_parent(self):
185188
self.mount_a.run_shell(['mkdir', 'group/subvol2/dir/.snap/s2'])
186189

187190
# override subdir subvolume with parent subvolume
188-
self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume',
191+
(['setfattr', '-n', 'ceph.dir.subvolume',
189192
'-v', '1', 'group/subvol2/dir'])
190193
self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume',
191194
'-v', '1', 'group/subvol2'])
@@ -197,6 +200,7 @@ def test_subvolume_create_snapshot_inside_new_subvolume_parent(self):
197200
# clean up
198201
self.mount_a.run_shell(['rmdir', 'group/subvol2/dir/.snap/s2'])
199202

203+
200204
def test_subvolume_vxattr_removal_without_setting(self):
201205
"""
202206
To verify that the ceph.dir.subvolume vxattr removal without setting doesn't cause mds crash
@@ -209,7 +213,6 @@ def test_subvolume_vxattr_removal_without_setting(self):
209213
# cleanup
210214
self.mount_a.run_shell(['rm', '-rf', 'group/subvol3'])
211215

212-
213216
class TestSubvolumeReplicated(CephFSTestCase):
214217
CLIENTS_REQUIRED = 1
215218
MDSS_REQUIRED = 2
@@ -245,3 +248,108 @@ def test_subvolume_replicated(self):
245248
ino0 = self.fs.read_cache("/dir1/dir2", depth=0, rank=0)[0]
246249
self.assertFalse(ino0['is_auth'])
247250
self.assertTrue(ino0['is_subvolume'])
251+
252+
class TestSubvolumeMetrics(CephFSTestCase):
253+
CLIENTS_REQUIRED = 1
254+
MDSS_REQUIRED = 1
255+
256+
def get_subvolume_metrics(self, mds_rank=0):
257+
"""
258+
Helper to fetch current subvolume metrics from MDS counters using rank_tell.
259+
"""
260+
mds_info = self.fs.get_rank(rank=mds_rank)
261+
mds_name = mds_info['name']
262+
counters = self.fs.mds_tell(["counter", "dump"], mds_id=mds_name)
263+
return counters.get("mds_subvolume_metrics")
264+
265+
def test_subvolume_metrics_lifecycle(self):
266+
"""
267+
Verify that subvolume metrics are initially absent, appear after IO,
268+
and disappear after the aggregation window expires.
269+
"""
270+
subvol_name = "metrics_subv"
271+
subv_path = "/volumes/_nogroup/metrics_subv"
272+
273+
# no metrics initially
274+
subvol_metrics = self.get_subvolume_metrics()
275+
self.assertFalse(subvol_metrics, "Subvolume metrics should not be present before I/O")
276+
277+
# create subvolume
278+
self.fs.run_ceph_cmd('fs', 'subvolume', 'create', 'cephfs', subvol_name)
279+
280+
# generate some I/O
281+
mount_point = self.mount_a.get_mount_point()
282+
suvolume_fs_path = self.fs.get_ceph_cmd_stdout('fs', 'subvolume', 'getpath', 'cephfs', subvol_name).strip()
283+
suvolume_fs_path = os.path.join(mount_point, suvolume_fs_path.strip('/'))
284+
285+
# do some writes
286+
filename = os.path.join(suvolume_fs_path, "file0")
287+
self.mount_a.run_shell_payload("sudo fio "
288+
"--name test -rw=write "
289+
"--bs=4k --numjobs=1 --time_based "
290+
"--runtime=20s --verify=0 --size=1G "
291+
f"--filename={filename}", wait=True)
292+
293+
subvol_metrics = None
294+
with safe_while(sleep=1, tries=30, action='wait for subvolume write counters') as proceed:
295+
while proceed():
296+
# verify that metrics are available
297+
subvol_metrics = self.get_subvolume_metrics()
298+
if subvol_metrics:
299+
break
300+
301+
log.debug(f'verifying for write: subvol_metrics={subvol_metrics}')
302+
303+
# Extract first metric entry
304+
metric = subvol_metrics[0]
305+
counters = metric["counters"]
306+
labels = metric["labels"]
307+
308+
# Label checks
309+
self.assertEqual(labels["fs_name"], "cephfs", "Unexpected fs_name in subvolume metrics")
310+
self.assertEqual(labels["subvolume_path"], subv_path, "Unexpected subvolume_path in subvolume metrics")
311+
312+
# Counter presence and value checks
313+
self.assertIn("avg_read_iops", counters)
314+
self.assertIn("avg_read_tp_Bps", counters)
315+
self.assertIn("avg_read_lat_msec", counters)
316+
self.assertIn("avg_write_iops", counters)
317+
self.assertIn("avg_write_tp_Bps", counters)
318+
self.assertIn("avg_write_lat_msec", counters)
319+
320+
# check write metrics
321+
self.assertGreater(counters["avg_write_iops"], 0, "Expected avg_write_iops to be > 0")
322+
self.assertGreater(counters["avg_write_tp_Bps"], 0, "Expected avg_write_tp_Bps to be > 0")
323+
self.assertGreaterEqual(counters["avg_write_lat_msec"], 0, "Expected avg_write_lat_msec to be > 0")
324+
325+
# do some reads
326+
self.mount_a.run_shell_payload("sudo fio "
327+
"--name test -rw=read "
328+
"--bs=4k --numjobs=1 --time_based "
329+
"--runtime=20s --verify=0 --size=1G "
330+
f"--filename={filename}", wait=True)
331+
332+
subvol_metrics = None
333+
with safe_while(sleep=1, tries=30, action='wait for subvolume read counters') as proceed:
334+
while proceed():
335+
# verify that metrics are available
336+
subvol_metrics = self.get_subvolume_metrics()
337+
if subvol_metrics:
338+
break
339+
340+
log.debug(f'verifying for read: subvol_metrics={subvol_metrics}')
341+
342+
metric = subvol_metrics[0]
343+
counters = metric["counters"]
344+
345+
# Assert expected values (example: write I/O occurred, read did not)
346+
self.assertGreater(counters["avg_read_iops"], 0, "Expected avg_read_iops to be >= 0")
347+
self.assertGreater(counters["avg_read_tp_Bps"], 0, "Expected avg_read_tp_Bps to be >= 0")
348+
self.assertGreaterEqual(counters["avg_read_lat_msec"], 0, "Expected avg_read_lat_msec to be >= 0")
349+
350+
# wait for metrics to expire after inactivity
351+
sleep(60)
352+
353+
# verify that metrics are not present anymore
354+
subvolume_metrics = self.get_subvolume_metrics()
355+
self.assertFalse(subvolume_metrics, "Subvolume metrics should be gone after inactivity window")

0 commit comments

Comments
 (0)