diff --git a/README.md b/README.md index 87faf82..7ecdd87 100644 --- a/README.md +++ b/README.md @@ -11,8 +11,9 @@ This repository now includes a responsive Cockpit UI built with React. The inte - **Invoice dashboards** to view, download, and archive invoice PDFs. - **Detailed cost drill-downs** (core‑hours, GPU-hours) for per‑account transparency. - **Historical billing data** accessible from account inception for auditing and trend analysis. -- **Organization-wide views** consolidating charges across all member Slurm accounts. -- **Configurable rate table** with per-account overrides and cluster core count, editable from a dedicated Settings tab. +- **Organization-wide views** consolidating charges across all member Slurm accounts; cluster capacity is detected automatically from `slurm.conf`. +- **Configurable rate table** with per-account overrides. + ## 📁 Project Structure diff --git a/src/slurmcostmanager.js b/src/slurmcostmanager.js index 553748a..2330589 100644 --- a/src/slurmcostmanager.js +++ b/src/slurmcostmanager.js @@ -867,8 +867,7 @@ function Rates({ onRatesUpdated }) { if (cancelled) return; const json = JSON.parse(text); setConfig({ - defaultRate: json.defaultRate, - clusterCores: json.clusterCores || '' + defaultRate: json.defaultRate }); const ovrs = json.overrides ? Object.entries(json.overrides).map(([account, cfg]) => ({ @@ -949,15 +948,6 @@ function Rates({ onRatesUpdated }) { const json = { defaultRate }; - if (config.clusterCores !== undefined && config.clusterCores !== '') { - const cores = parseInt(config.clusterCores, 10); - if (!Number.isFinite(cores) || cores < 0) { - console.warn('Invalid cluster core count:', config.clusterCores); - setError('Invalid cluster core count'); - return; - } - json.clusterCores = cores; - } if (overrides.length) { const overridesJson = {}; diff --git a/src/slurmdb.py b/src/slurmdb.py index cd2e16e..5c4c74e 100644 --- a/src/slurmdb.py +++ b/src/slurmdb.py @@ -6,6 +6,8 @@ import sys from datetime import date, datetime, timedelta from calendar import monthrange +from itertools import product + try: import pymysql @@ -124,6 +126,7 @@ def __init__( or os.environ.get("SLURM_CLUSTER") or self._load_cluster_name(self._slurm_conf) ) + self._cluster_resources = None self._validate_config() @@ -210,6 +213,78 @@ def _load_cluster_name(self, conf_path): pass return None + def _expand_nodelist(self, expr): + names = [] + for part in expr.split(','): + m = re.match(r'(.*)\[(.*)\](.*)', part) + if m: + prefix, inner, suffix = m.groups() + ranges = [] + for grp in inner.split(','): + if '-' in grp: + start, end = grp.split('-') + width = len(start) + ranges.append([ + f"{int(i):0{width}d}" for i in range(int(start), int(end) + 1) + ]) + else: + ranges.append([grp]) + for combo in product(*ranges): + names.append(prefix + ''.join(combo) + suffix) + else: + names.append(part) + return names + + def _parse_slurm_conf(self, conf_path): + totals = {"nodes": 0, "sockets": 0, "cores": 0, "threads": 0, "gres": {}} + defaults = {} + if not conf_path or not os.path.exists(conf_path): + return totals + try: + with open(conf_path) as fh: + for raw in fh: + line = raw.split('#', 1)[0].strip() + if not line: + continue + if line.startswith('NodeName='): + parts = re.split(r'\s+', line) + attrs = defaults.copy() + for part in parts: + if '=' in part: + k, v = part.split('=', 1) + attrs[k] = v + node_expr = attrs.get('NodeName') + if node_expr == 'DEFAULT': + defaults.update(attrs) + continue + nodes = self._expand_nodelist(node_expr) + num_nodes = len(nodes) + sockets = int(attrs.get('Sockets', defaults.get('Sockets', 1))) + cps = int(attrs.get('CoresPerSocket', defaults.get('CoresPerSocket', 1))) + tpc = int(attrs.get('ThreadsPerCore', defaults.get('ThreadsPerCore', 1))) + totals['nodes'] += num_nodes + totals['sockets'] += sockets * num_nodes + totals['cores'] += cps * sockets * num_nodes + totals['threads'] += tpc * cps * sockets * num_nodes + gres_val = attrs.get('Gres', defaults.get('Gres')) + if gres_val: + for gres in gres_val.split(','): + segs = gres.split(':') + name = segs[0] + try: + count = float(segs[-1]) + except ValueError: + continue + totals['gres'][name] = totals['gres'].get(name, 0) + count * num_nodes + except OSError: + return totals + return totals + + def cluster_resources(self): + if self._cluster_resources is None: + self._cluster_resources = self._parse_slurm_conf(self._slurm_conf) + return self._cluster_resources + def connect(self): if pymysql is None: raise RuntimeError("pymysql is required but not installed") @@ -553,7 +628,8 @@ def export_summary(self, start_time, end_time): overrides = rates_cfg.get('overrides', {}) historical = rates_cfg.get('historicalRates', {}) gpu_historical = rates_cfg.get('historicalGpuRates', {}) - cluster_cores = rates_cfg.get('clusterCores') + resources = self.cluster_resources() + cluster_cores = resources.get('cores') for month, accounts in usage.items(): base_rate = historical.get(month, default_rate) @@ -638,6 +714,7 @@ def export_summary(self, start_time, end_time): 'total': round(total_cost, 2), 'core_hours': round(total_ch, 2), 'gpu_hours': round(total_gpu, 2), + 'cluster': resources, } if cluster_cores: start_date = start_dt.date() diff --git a/test/unit/billing_summary.test.py b/test/unit/billing_summary.test.py index 799bb07..3b96e22 100644 --- a/test/unit/billing_summary.test.py +++ b/test/unit/billing_summary.test.py @@ -195,7 +195,8 @@ def test_export_summary_projected_revenue(self): def fake_open(path, *args, **kwargs): if path.endswith('rates.json'): - return io.StringIO('{"defaultRate": 0.02, "clusterCores": 100}') + return io.StringIO('{"defaultRate": 0.02}') + return open_orig(path, *args, **kwargs) open_orig = open @@ -205,6 +206,8 @@ def fake_open(path, *args, **kwargs): return_value=(usage, {'daily': {}, 'monthly': {}, 'yearly': {}}), ), mock.patch.object(SlurmDB, 'fetch_invoices', return_value=[]), mock.patch( 'builtins.open', side_effect=fake_open + ), mock.patch.object( + SlurmDB, 'cluster_resources', return_value={'cores': 100} ): db = SlurmDB() summary = db.export_summary('2024-02-01', '2024-02-29')