Skip to content

Commit 7fdc8a7

Browse files
authored
Merge pull request ceph#59383 from joscollin/wip-B67360-counters-set-on-mds-failed
qa: restrict 'perf dump' on active mds only
2 parents 8c677fe + 7a952dc commit 7fdc8a7

File tree

1 file changed

+28
-4
lines changed

1 file changed

+28
-4
lines changed

qa/tasks/check_counter.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11

22
import logging
33
import json
4+
import errno
45

56
from teuthology.task import Task
67
from teuthology import misc
78

89
from tasks import ceph_manager
10+
from tasks.cephfs.filesystem import MDSCluster
11+
from teuthology.exceptions import CommandFailedError
912

1013
log = logging.getLogger(__name__)
1114

@@ -61,6 +64,9 @@ def end(self):
6164
mon_manager = ceph_manager.CephManager(self.admin_remote, ctx=self.ctx, logger=log.getChild('ceph_manager'))
6265
active_mgr = json.loads(mon_manager.raw_cluster_cmd("mgr", "dump", "--format=json-pretty"))["active_name"]
6366

67+
mds_cluster = MDSCluster(self.ctx)
68+
status = mds_cluster.status()
69+
6470
for daemon_type, counters in targets.items():
6571
# List of 'a', 'b', 'c'...
6672
daemon_ids = list(misc.all_roles_of_type(self.ctx.cluster, daemon_type))
@@ -80,13 +86,31 @@ def end(self):
8086
else:
8187
log.debug("Getting stats from {0}".format(daemon_id))
8288

83-
manager = self.ctx.managers[cluster_name]
84-
proc = manager.admin_socket(daemon_type, daemon_id, ["perf", "dump"])
85-
response_data = proc.stdout.getvalue().strip()
89+
if daemon_type == 'mds':
90+
mds_info = status.get_mds(daemon_id)
91+
if not mds_info:
92+
continue
93+
mds = f"mds.{mds_info['gid']}"
94+
if mds_info['state'] != "up:active":
95+
log.debug(f"skipping {mds}")
96+
continue
97+
log.debug(f"Getting stats from {mds}")
98+
try:
99+
proc = mon_manager.raw_cluster_cmd("tell", mds, "perf", "dump",
100+
"--format=json-pretty")
101+
response_data = proc.strip()
102+
except CommandFailedError as e:
103+
if e.exitstatus == errno.ENOENT:
104+
log.debug(f"Failed to do 'perf dump' on {mds}")
105+
continue
106+
else:
107+
manager = self.ctx.managers[cluster_name]
108+
proc = manager.admin_socket(daemon_type, daemon_id, ["perf", "dump"])
109+
response_data = proc.stdout.getvalue().strip()
86110
if response_data:
87111
perf_dump = json.loads(response_data)
88112
else:
89-
log.warning("No admin socket response from {0}, skipping".format(daemon_id))
113+
log.warning("No response from {0}, skipping".format(daemon_id))
90114
continue
91115

92116
minval = ''

0 commit comments

Comments
 (0)