Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@ This repository now includes a responsive Cockpit UI built with React. The inte
- **Invoice dashboards** to view, download, and archive invoice PDFs.
- **Detailed cost drill-downs** (core‑hours, GPU-hours) for per‑account transparency.
- **Historical billing data** accessible from account inception for auditing and trend analysis.
- **Organization-wide views** consolidating charges across all member Slurm accounts.
- **Configurable rate table** with per-account overrides and cluster core count, editable from a dedicated Settings tab.
- **Organization-wide views** consolidating charges across all member Slurm accounts; cluster capacity is detected automatically from `slurm.conf`.
- **Configurable rate table** with per-account overrides.



## 📁 Project Structure
Expand Down
12 changes: 1 addition & 11 deletions src/slurmcostmanager.js
Original file line number Diff line number Diff line change
Expand Up @@ -867,8 +867,7 @@ function Rates({ onRatesUpdated }) {
if (cancelled) return;
const json = JSON.parse(text);
setConfig({
defaultRate: json.defaultRate,
clusterCores: json.clusterCores || ''
defaultRate: json.defaultRate
});
const ovrs = json.overrides
? Object.entries(json.overrides).map(([account, cfg]) => ({
Expand Down Expand Up @@ -949,15 +948,6 @@ function Rates({ onRatesUpdated }) {

const json = { defaultRate };

if (config.clusterCores !== undefined && config.clusterCores !== '') {
const cores = parseInt(config.clusterCores, 10);
if (!Number.isFinite(cores) || cores < 0) {
console.warn('Invalid cluster core count:', config.clusterCores);
setError('Invalid cluster core count');
return;
}
json.clusterCores = cores;
}

if (overrides.length) {
const overridesJson = {};
Expand Down
79 changes: 78 additions & 1 deletion src/slurmdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import sys
from datetime import date, datetime, timedelta
from calendar import monthrange
from itertools import product


try:
import pymysql
Expand Down Expand Up @@ -124,6 +126,7 @@ def __init__(
or os.environ.get("SLURM_CLUSTER")
or self._load_cluster_name(self._slurm_conf)
)
self._cluster_resources = None

self._validate_config()

Expand Down Expand Up @@ -210,6 +213,78 @@ def _load_cluster_name(self, conf_path):
pass
return None

def _expand_nodelist(self, expr):
names = []
for part in expr.split(','):
m = re.match(r'(.*)\[(.*)\](.*)', part)
if m:
prefix, inner, suffix = m.groups()
ranges = []
for grp in inner.split(','):
if '-' in grp:
start, end = grp.split('-')
width = len(start)
ranges.append([
f"{int(i):0{width}d}" for i in range(int(start), int(end) + 1)
])
else:
ranges.append([grp])
for combo in product(*ranges):
names.append(prefix + ''.join(combo) + suffix)
else:
names.append(part)
return names

def _parse_slurm_conf(self, conf_path):
totals = {"nodes": 0, "sockets": 0, "cores": 0, "threads": 0, "gres": {}}
defaults = {}
if not conf_path or not os.path.exists(conf_path):
return totals
try:
with open(conf_path) as fh:
for raw in fh:
line = raw.split('#', 1)[0].strip()
if not line:
continue
if line.startswith('NodeName='):
parts = re.split(r'\s+', line)
attrs = defaults.copy()
for part in parts:
if '=' in part:
k, v = part.split('=', 1)
attrs[k] = v
node_expr = attrs.get('NodeName')
if node_expr == 'DEFAULT':
defaults.update(attrs)
continue
nodes = self._expand_nodelist(node_expr)
num_nodes = len(nodes)
sockets = int(attrs.get('Sockets', defaults.get('Sockets', 1)))
cps = int(attrs.get('CoresPerSocket', defaults.get('CoresPerSocket', 1)))
tpc = int(attrs.get('ThreadsPerCore', defaults.get('ThreadsPerCore', 1)))
totals['nodes'] += num_nodes
totals['sockets'] += sockets * num_nodes
totals['cores'] += cps * sockets * num_nodes
totals['threads'] += tpc * cps * sockets * num_nodes
gres_val = attrs.get('Gres', defaults.get('Gres'))
if gres_val:
for gres in gres_val.split(','):
segs = gres.split(':')
name = segs[0]
try:
count = float(segs[-1])
except ValueError:
continue
totals['gres'][name] = totals['gres'].get(name, 0) + count * num_nodes
except OSError:
return totals
return totals

def cluster_resources(self):
if self._cluster_resources is None:
self._cluster_resources = self._parse_slurm_conf(self._slurm_conf)
return self._cluster_resources

def connect(self):
if pymysql is None:
raise RuntimeError("pymysql is required but not installed")
Expand Down Expand Up @@ -553,7 +628,8 @@ def export_summary(self, start_time, end_time):
overrides = rates_cfg.get('overrides', {})
historical = rates_cfg.get('historicalRates', {})
gpu_historical = rates_cfg.get('historicalGpuRates', {})
cluster_cores = rates_cfg.get('clusterCores')
resources = self.cluster_resources()
cluster_cores = resources.get('cores')

for month, accounts in usage.items():
base_rate = historical.get(month, default_rate)
Expand Down Expand Up @@ -638,6 +714,7 @@ def export_summary(self, start_time, end_time):
'total': round(total_cost, 2),
'core_hours': round(total_ch, 2),
'gpu_hours': round(total_gpu, 2),
'cluster': resources,
}
if cluster_cores:
start_date = start_dt.date()
Expand Down
5 changes: 4 additions & 1 deletion test/unit/billing_summary.test.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,8 @@ def test_export_summary_projected_revenue(self):

def fake_open(path, *args, **kwargs):
if path.endswith('rates.json'):
return io.StringIO('{"defaultRate": 0.02, "clusterCores": 100}')
return io.StringIO('{"defaultRate": 0.02}')

return open_orig(path, *args, **kwargs)

open_orig = open
Expand All @@ -205,6 +206,8 @@ def fake_open(path, *args, **kwargs):
return_value=(usage, {'daily': {}, 'monthly': {}, 'yearly': {}}),
), mock.patch.object(SlurmDB, 'fetch_invoices', return_value=[]), mock.patch(
'builtins.open', side_effect=fake_open
), mock.patch.object(
SlurmDB, 'cluster_resources', return_value={'cores': 100}
):
db = SlurmDB()
summary = db.export_summary('2024-02-01', '2024-02-29')
Expand Down
Loading