Skip to content

Commit a3030ac

Browse files
Merge pull request #93 from NessieCanCode/add-kpi-tile-for-total-gpu-hours
Add GPU-hours KPI tile and backend support
2 parents 20a15c6 + c60a11f commit a3030ac

File tree

3 files changed

+62
-8
lines changed

3 files changed

+62
-8
lines changed

src/slurmcostmanager.js

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,7 @@ function SuccessFailChart({ data }) {
442442

443443
function Summary({ summary, details, daily, monthly }) {
444444
const sparklineData = daily.map(d => d.core_hours);
445+
const gpuSparklineData = daily.map(d => d.gpu_hours || 0);
445446
const ratio = summary.projected_revenue
446447
? summary.total / summary.projected_revenue
447448
: 1;
@@ -477,6 +478,12 @@ function Summary({ summary, details, daily, monthly }) {
477478
null,
478479
React.createElement('th', null, 'Total Core Hours'),
479480
React.createElement('td', null, summary.core_hours)
481+
),
482+
React.createElement(
483+
'tr',
484+
null,
485+
React.createElement('th', null, 'Total GPU Hours'),
486+
React.createElement('td', null, summary.gpu_hours || 0)
480487
)
481488
)
482489
)
@@ -489,6 +496,12 @@ function Summary({ summary, details, daily, monthly }) {
489496
value: summary.core_hours,
490497
renderChart: () => React.createElement(KpiSparkline, { data: sparklineData })
491498
}),
499+
React.createElement(KpiTile, {
500+
label: 'Total GPU-hours',
501+
value: summary.gpu_hours,
502+
renderChart: () =>
503+
React.createElement(KpiSparkline, { data: gpuSparklineData })
504+
}),
492505
React.createElement(KpiTile, {
493506
label: 'Cost recovery ratio',
494507
value: `${(ratio * 100).toFixed(1)}%`,

src/slurmdb.py

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,14 @@ def aggregate_usage(self, start_time, end_time):
254254
"""Aggregate usage metrics by account and time period."""
255255
rows = self.fetch_usage_records(start_time, end_time)
256256
agg = {}
257-
totals = {'daily': {}, 'monthly': {}, 'yearly': {}}
257+
totals = {
258+
'daily': {},
259+
'monthly': {},
260+
'yearly': {},
261+
'daily_gpu': {},
262+
'monthly_gpu': {},
263+
'yearly_gpu': {},
264+
}
258265
for row in rows:
259266
start = self._to_datetime(row['time_start'])
260267
end = self._to_datetime(row['time_end'] or row['time_start'])
@@ -271,20 +278,28 @@ def aggregate_usage(self, start_time, end_time):
271278
cpus = float(row.get('cpus_alloc') or 0)
272279
except (TypeError, ValueError):
273280
cpus = 0.0
281+
gpus = self._parse_tres(row.get('tres_alloc'), 'gpu')
282+
if not gpus:
283+
gpus = self._parse_tres(row.get('tres_alloc'), 'gres/gpu')
274284

275285
totals['daily'][day] = totals['daily'].get(day, 0.0) + cpus * dur_hours
276286
totals['monthly'][month] = totals['monthly'].get(month, 0.0) + cpus * dur_hours
277287
totals['yearly'][year] = totals['yearly'].get(year, 0.0) + cpus * dur_hours
288+
totals['daily_gpu'][day] = totals['daily_gpu'].get(day, 0.0) + gpus * dur_hours
289+
totals['monthly_gpu'][month] = totals['monthly_gpu'].get(month, 0.0) + gpus * dur_hours
290+
totals['yearly_gpu'][year] = totals['yearly_gpu'].get(year, 0.0) + gpus * dur_hours
278291

279292
month_entry = agg.setdefault(month, {})
280293
acct_entry = month_entry.setdefault(
281294
account,
282295
{
283296
'core_hours': 0.0,
297+
'gpu_hours': 0.0,
284298
'users': {},
285299
},
286300
)
287301
acct_entry['core_hours'] += cpus * dur_hours
302+
acct_entry['gpu_hours'] += gpus * dur_hours
288303
user_entry = acct_entry['users'].setdefault(
289304
user, {'core_hours': 0.0, 'jobs': {}}
290305
)
@@ -345,6 +360,7 @@ def export_summary(self, start_time, end_time):
345360
'invoices': [],
346361
}
347362
total_ch = 0.0
363+
total_gpu = 0.0
348364
total_cost = 0.0
349365

350366
rates_path = os.path.join(os.path.dirname(__file__), 'rates.json')
@@ -407,11 +423,13 @@ def export_summary(self, start_time, end_time):
407423
{
408424
'account': account,
409425
'core_hours': round(vals['core_hours'], 2),
426+
'gpu_hours': round(vals.get('gpu_hours', 0.0), 2),
410427
'cost': round(acct_cost, 2),
411428
'users': users,
412429
}
413430
)
414431
total_ch += vals['core_hours']
432+
total_gpu += vals.get('gpu_hours', 0.0)
415433
total_cost += acct_cost
416434
start_dt = (
417435
datetime.fromisoformat(start_time)
@@ -427,18 +445,31 @@ def export_summary(self, start_time, end_time):
427445
'period': f"{start_dt.strftime('%Y-%m-%d')} to {end_dt.strftime('%Y-%m-%d')}",
428446
'total': round(total_cost, 2),
429447
'core_hours': round(total_ch, 2),
448+
'gpu_hours': round(total_gpu, 2),
430449
}
431450
summary['daily'] = [
432-
{'date': d, 'core_hours': round(v, 2)}
433-
for d, v in sorted(totals['daily'].items())
451+
{
452+
'date': d,
453+
'core_hours': round(totals['daily'].get(d, 0.0), 2),
454+
'gpu_hours': round(totals.get('daily_gpu', {}).get(d, 0.0), 2),
455+
}
456+
for d in sorted(set(totals['daily']) | set(totals.get('daily_gpu', {})))
434457
]
435458
summary['monthly'] = [
436-
{'month': m, 'core_hours': round(v, 2)}
437-
for m, v in sorted(totals['monthly'].items())
459+
{
460+
'month': m,
461+
'core_hours': round(totals['monthly'].get(m, 0.0), 2),
462+
'gpu_hours': round(totals.get('monthly_gpu', {}).get(m, 0.0), 2),
463+
}
464+
for m in sorted(set(totals['monthly']) | set(totals.get('monthly_gpu', {})))
438465
]
439466
summary['yearly'] = [
440-
{'year': y, 'core_hours': round(v, 2)}
441-
for y, v in sorted(totals['yearly'].items())
467+
{
468+
'year': y,
469+
'core_hours': round(totals['yearly'].get(y, 0.0), 2),
470+
'gpu_hours': round(totals.get('yearly_gpu', {}).get(y, 0.0), 2),
471+
}
472+
for y in sorted(set(totals['yearly']) | set(totals.get('yearly_gpu', {})))
442473
]
443474
summary['invoices'] = self.fetch_invoices(start_time, end_time)
444475
return summary

test/unit/billing_summary.test.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ def test_export_summary_aggregates_costs(self):
1010
'2023-10': {
1111
'acct': {
1212
'core_hours': 10.0,
13+
'gpu_hours': 5.0,
1314
'users': {
1415
'user1': {'core_hours': 10.0, 'jobs': {}}
1516
},
@@ -20,15 +21,24 @@ def test_export_summary_aggregates_costs(self):
2021
with mock.patch.object(
2122
SlurmDB,
2223
'aggregate_usage',
23-
return_value=(usage, {'daily': {}, 'monthly': {}, 'yearly': {}}),
24+
return_value=(usage, {
25+
'daily': {},
26+
'monthly': {},
27+
'yearly': {},
28+
'daily_gpu': {},
29+
'monthly_gpu': {},
30+
'yearly_gpu': {},
31+
}),
2432
):
2533
with mock.patch.object(SlurmDB, 'fetch_invoices', return_value=invoices):
2634
db = SlurmDB()
2735
summary = db.export_summary('2023-10-01', '2023-10-31')
2836
self.assertEqual(summary['summary']['total'], 0.2)
2937
self.assertEqual(summary['details'][0]['account'], 'acct')
3038
self.assertEqual(summary['details'][0]['core_hours'], 10.0)
39+
self.assertEqual(summary['details'][0]['gpu_hours'], 5.0)
3140
self.assertEqual(summary['details'][0]['cost'], 0.2)
41+
self.assertEqual(summary['summary']['gpu_hours'], 5.0)
3242
self.assertEqual(summary['invoices'][0]['file'], 'inv1.pdf')
3343

3444
def test_export_summary_applies_overrides_and_discounts(self):

0 commit comments

Comments
 (0)