@@ -383,6 +383,19 @@ def calc(row):
383
383
if m :
384
384
return int (m .group (1 ))
385
385
386
+ RE_TRES_GPU = re .compile (rf'\bgres/gpu=([^,]*)\b' )
387
+ RE_TRES_GPU_UTIL = re .compile (rf'\bgres/gpuutil=([^,]*)\b' )
388
+ class slurmGPUEff2 (linefunc ):
389
+ """Slurm GPU efficiency (using AllocTRES and TRESUsageInTot columns).
390
+ """
391
+ type = 'real'
392
+ @staticmethod
393
+ def calc (row ):
394
+ m_used = RE_TRES_GPU_UTIL .search (row ['TRESUsageInTot' ])
395
+ m_alloc = RE_TRES_GPU .search (row ['AllocTRES' ])
396
+ if m_alloc and m_used :
397
+ return (float_metric (m_used .group (1 )) / 100. ) / float_metric (m_alloc .group (1 ))
398
+ return None
386
399
387
400
# Job ID related stuff
388
401
jobidonly_re = re .compile (r'[0-9]+' )
@@ -633,10 +646,10 @@ def calc(row):
633
646
'_ReqGPUS' : ExtractField ('ReqGpus' , 'ReqTRES' , 'gres/gpu' , float_metric ),
634
647
'Comment' : nullstr_strip , # Slurm Comment field (at Aalto used for GPU stats)
635
648
#'_GPUMem': slurmGPUMem, # GPU mem extracted from comment field
636
- #'_GPUEff ': slurmGPUEff , # GPU utilization (0.0 to 1.0) extracted from comment field
649
+ '_GpuEff ' : slurmGPUEff2 , # GPU utilization (0.0 to 1.0) from AllocTRES()
637
650
#'_NGPU': slurmGPUCount, # Number of GPUs, extracted from comment field
638
651
'_NGpus' : ExtractField ('NGpus' , 'AllocTRES' , 'gres/gpu' , float_metric ),
639
- '_GpuUtil' : ExtractField ('GpuUtil' , 'TRESUsageInAve' , 'gres/gpuutil' , float_metric , wrap = lambda x : x / 100. ),
652
+ '_GpuUtil' : ExtractField ('GpuUtil' , 'TRESUsageInAve' , 'gres/gpuutil' , float_metric , wrap = lambda x : x / 100. ), # can be >100 for multi-GPU.
640
653
'_GpuMem' : ExtractField ('GpuMem2' , 'TRESUsageInAve' , 'gres/gpumem' , float_metric ),
641
654
'_GpuUtilTot' : ExtractField ('GpuUtilTot' , 'TRESUsageInTot' , 'gres/gpuutil' , float_metric ),
642
655
'_GpuMemTot' : ExtractField ('GpuMemTot' , 'TRESUsageInTot' , 'gres/gpumem' , float_metric ),
@@ -909,7 +922,8 @@ def infer_type(cd):
909
922
'max(NGpus) AS NGpus, '
910
923
'max(NGpus)*max(Elapsed) AS gpu_s_reserved, '
911
924
'max(NGpus)*max(Elapsed)*max(GPUutil) AS gpu_s_used, '
912
- 'max(GPUutil)/max(NGpus) AS GPUeff, ' # Individual job with highest use (check this)
925
+ #'max(GPUutil)/max(NGpus) AS GPUeff, ' # Individual job with highest use (check this)
926
+ 'max(GPUEff) AS GPUeff, ' # Individual job with highest use (check this)
913
927
'max(GPUMem) AS GPUMem, '
914
928
'MaxDiskRead, '
915
929
'MaxDiskWrite, '
@@ -1032,8 +1046,8 @@ def compact_table():
1032
1046
)
1033
1047
1034
1048
1035
- SACCT_DEFAULT_FIELDS = 'JobID,User,State,Start,End,Partition,ExitCodeRaw,NodeList,NCPUS,CPUtime,CPUEff,AllocMem,TotalMem,MemEff,ReqGPUS,GPUUtil ,TotDiskRead,TotDiskWrite,ReqTRES,AllocTRES,TRESUsageInTot,TRESUsageOutTot'
1036
- SACCT_DEFAULT_FIELDS_LONG = 'JobID,User,State,Start,End,Elapsed,Partition,ExitCodeRaw,NodeList,NCPUS,CPUtime,CPUEff,AllocMem,TotalMem,MemEff,ReqMem,MaxRSS,ReqGPUS,GPUUtil,TotDiskRead,TotDiskWrite,ReqTRES,AllocTRES,TRESUsageInTot,TRESUsageOutTot'
1049
+ SACCT_DEFAULT_FIELDS = 'JobID,User,State,Start,End,Partition,ExitCodeRaw,NodeList,NCPUS,CPUtime,CPUEff,AllocMem,TotalMem,MemEff,ReqGPUS,GPUEff ,TotDiskRead,TotDiskWrite,ReqTRES,AllocTRES,TRESUsageInTot,TRESUsageOutTot'
1050
+ SACCT_DEFAULT_FIELDS_LONG = 'JobID,User,State,Start,End,Elapsed,Partition,ExitCodeRaw,NodeList,NCPUS,CPUtime,CPUEff,AllocMem,TotalMem,MemEff,ReqMem,MaxRSS,ReqGPUS,GPUEff, GPUUtil,TotDiskRead,TotDiskWrite,ReqTRES,AllocTRES,TRESUsageInTot,TRESUsageOutTot'
1037
1051
COMPLETED_STATES = 'CA,CD,DL,F,NF,OOM,PR,RV,TO'
1038
1052
def sacct_cli (argv = sys .argv [1 :], csv_input = None ):
1039
1053
"""A command line that uses slurm2sql to give an sacct-like interface."""
0 commit comments