Skip to content

Commit 07e0061

Browse files
author
Theofilos Manitaras
committed
Address PR comments
1 parent b516a9e commit 07e0061

File tree

1 file changed

+5
-27
lines changed

1 file changed

+5
-27
lines changed

cscs-checks/system/jobreport/gpu_report.py

Lines changed: 5 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,9 @@ class gpu_usage_report_check(gpu_burn_check):
2323
'''
2424

2525
valid_systems = ['daint:gpu', 'dom:gpu']
26-
valid_prog_environs = ['PrgEnv-nvidia']
2726
descr = 'Check GPU usage from job report'
2827
gpu_build = 'cuda'
29-
num_tasks = 0
28+
num_tasks = 2
3029
num_gpus_per_node = 1
3130
perf_floor = variable(float, value=-0.2)
3231
tags = {'production'}
@@ -57,7 +56,7 @@ def assert_jobreport_success(self):
5756
time.sleep(25)
5857

5958
return sn.all([
60-
self.count_successful_burns(), self.gpu_usage_sanity()
59+
self.assert_successful_burn_count(), self.gpu_usage_sanity()
6160
])
6261

6362
@deferrable
@@ -73,7 +72,7 @@ def gpu_usage_sanity(self):
7372
# Parse job report data
7473
patt = r'^\s*(\w*)\s*(\d+)\s*%\s*\d+\s*MiB\s*\d+:\d+:(\d+)'
7574
self.nodes_reported = sn.extractall(patt, self.stdout, 1)
76-
self.num_tasks_assigned = self.job.num_tasks * self.num_gpus_per_node
75+
self.num_tasks_assigned = self.num_tasks * self.num_gpus_per_node
7776
usage = sn.extractall(patt, self.stdout, 2, int)
7877
time_reported = sn.extractall(patt, self.stdout, 3, int)
7978
return sn.all([
@@ -85,29 +84,8 @@ def gpu_usage_sanity(self):
8584
])
8685

8786
@deferrable
88-
def count_successful_burns(self):
89-
'''Set the sanity patterns to count the number of successful burns.'''
87+
def assert_successful_burn_count(self):
88+
'''Assert that the expected successful burn count is reported.'''
9089
return sn.assert_eq(sn.count(sn.findall(r'^GPU\s*\d+\(OK\)',
9190
self.stdout)),
9291
self.num_tasks_assigned)
93-
94-
@performance_function('nodes')
95-
def total_nodes_reported(self):
96-
return sn.count(self.nodes_reported)
97-
98-
@run_before('performance')
99-
def set_perf_variables(self):
100-
'''The number of reported nodes can be used as a perf metric.
101-
102-
For now, the low limit can go to zero, but this can be set to a more
103-
restrictive value.
104-
'''
105-
106-
self.reference = {
107-
'*': {
108-
'nodes_reported': (self.num_tasks, self.perf_floor, 0)
109-
},
110-
}
111-
self.perf_variables = {
112-
'nodes_reported': self.total_nodes_reported()
113-
}

0 commit comments

Comments
 (0)