Skip to content

Commit b0f7382

Browse files
committed
slurm2sql: Change GPUEff to use AllocTRES and TRESUsageInTot
1 parent b8b3a90 commit b0f7382

File tree

1 file changed

+19
-5
lines changed

1 file changed

+19
-5
lines changed

slurm2sql.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,19 @@ def calc(row):
383383
if m:
384384
return int(m.group(1))
385385

386+
RE_TRES_GPU = re.compile(rf'\bgres/gpu=([^,]*)\b')
387+
RE_TRES_GPU_UTIL = re.compile(rf'\bgres/gpuutil=([^,]*)\b')
388+
class slurmGPUEff2(linefunc):
389+
"""Slurm GPU efficiency (using AllocTRES and TRESUsageInTot columns).
390+
"""
391+
type = 'real'
392+
@staticmethod
393+
def calc(row):
394+
m_used = RE_TRES_GPU_UTIL.search(row['TRESUsageInTot'])
395+
m_alloc = RE_TRES_GPU.search(row['AllocTRES'])
396+
if m_alloc and m_used:
397+
return (float_metric(m_used.group(1)) / 100.) / float_metric(m_alloc.group(1))
398+
return None
386399

387400
# Job ID related stuff
388401
jobidonly_re = re.compile(r'[0-9]+')
@@ -633,10 +646,10 @@ def calc(row):
633646
'_ReqGPUS': ExtractField('ReqGpus', 'ReqTRES', 'gres/gpu', float_metric),
634647
'Comment': nullstr_strip, # Slurm Comment field (at Aalto used for GPU stats)
635648
#'_GPUMem': slurmGPUMem, # GPU mem extracted from comment field
636-
#'_GPUEff': slurmGPUEff, # GPU utilization (0.0 to 1.0) extracted from comment field
649+
'_GpuEff': slurmGPUEff2, # GPU utilization (0.0 to 1.0) from AllocTRES()
637650
#'_NGPU': slurmGPUCount, # Number of GPUs, extracted from comment field
638651
'_NGpus': ExtractField('NGpus', 'AllocTRES', 'gres/gpu', float_metric),
639-
'_GpuUtil': ExtractField('GpuUtil', 'TRESUsageInAve', 'gres/gpuutil', float_metric, wrap=lambda x: x/100.),
652+
'_GpuUtil': ExtractField('GpuUtil', 'TRESUsageInAve', 'gres/gpuutil', float_metric, wrap=lambda x: x/100.), # can be >100 for multi-GPU.
640653
'_GpuMem': ExtractField('GpuMem2', 'TRESUsageInAve', 'gres/gpumem', float_metric),
641654
'_GpuUtilTot': ExtractField('GpuUtilTot', 'TRESUsageInTot', 'gres/gpuutil', float_metric),
642655
'_GpuMemTot': ExtractField('GpuMemTot', 'TRESUsageInTot', 'gres/gpumem', float_metric),
@@ -909,7 +922,8 @@ def infer_type(cd):
909922
'max(NGpus) AS NGpus, '
910923
'max(NGpus)*max(Elapsed) AS gpu_s_reserved, '
911924
'max(NGpus)*max(Elapsed)*max(GPUutil) AS gpu_s_used, '
912-
'max(GPUutil)/max(NGpus) AS GPUeff, ' # Individual job with highest use (check this)
925+
#'max(GPUutil)/max(NGpus) AS GPUeff, ' # Individual job with highest use (check this)
926+
'max(GPUEff) AS GPUeff, ' # Individual job with highest use (check this)
913927
'max(GPUMem) AS GPUMem, '
914928
'MaxDiskRead, '
915929
'MaxDiskWrite, '
@@ -1032,8 +1046,8 @@ def compact_table():
10321046
)
10331047

10341048

1035-
SACCT_DEFAULT_FIELDS = 'JobID,User,State,Start,End,Partition,ExitCodeRaw,NodeList,NCPUS,CPUtime,CPUEff,AllocMem,TotalMem,MemEff,ReqGPUS,GPUUtil,TotDiskRead,TotDiskWrite,ReqTRES,AllocTRES,TRESUsageInTot,TRESUsageOutTot'
1036-
SACCT_DEFAULT_FIELDS_LONG = 'JobID,User,State,Start,End,Elapsed,Partition,ExitCodeRaw,NodeList,NCPUS,CPUtime,CPUEff,AllocMem,TotalMem,MemEff,ReqMem,MaxRSS,ReqGPUS,GPUUtil,TotDiskRead,TotDiskWrite,ReqTRES,AllocTRES,TRESUsageInTot,TRESUsageOutTot'
1049+
SACCT_DEFAULT_FIELDS = 'JobID,User,State,Start,End,Partition,ExitCodeRaw,NodeList,NCPUS,CPUtime,CPUEff,AllocMem,TotalMem,MemEff,ReqGPUS,GPUEff,TotDiskRead,TotDiskWrite,ReqTRES,AllocTRES,TRESUsageInTot,TRESUsageOutTot'
1050+
SACCT_DEFAULT_FIELDS_LONG = 'JobID,User,State,Start,End,Elapsed,Partition,ExitCodeRaw,NodeList,NCPUS,CPUtime,CPUEff,AllocMem,TotalMem,MemEff,ReqMem,MaxRSS,ReqGPUS,GPUEff,GPUUtil,TotDiskRead,TotDiskWrite,ReqTRES,AllocTRES,TRESUsageInTot,TRESUsageOutTot'
10371051
COMPLETED_STATES = 'CA,CD,DL,F,NF,OOM,PR,RV,TO'
10381052
def sacct_cli(argv=sys.argv[1:], csv_input=None):
10391053
"""A command line that uses slurm2sql to give an sacct-like interface."""

0 commit comments

Comments
 (0)