@@ -467,6 +467,23 @@ def calc(row):
467
467
raise ValueError ('unknown memory type: %s' % reqmem_type )
468
468
return mem_max / nodemem
469
469
470
+ RE_TRES_MEM = re .compile (rf'\bmem=([^,]*)\b' )
471
+ class slurmMemEff2 (linefunc ):
472
+ """Slurm memory efficiency (using AllocTRES and TRESUsageInTot columns).
473
+
474
+ This *does* work in new enough Slurm.
475
+ """
476
+ # https://github.com/SchedMD/slurm/blob/master/contribs/seff/seff
477
+ type = 'real'
478
+ @staticmethod
479
+ def calc (row ):
480
+ m_used = RE_TRES_MEM .search (row ['TRESUsageInTot' ])
481
+ m_alloc = RE_TRES_MEM .search (row ['AllocTRES' ])
482
+ if m_alloc and m_used :
483
+ return float_bytes (m_used .group (1 )) / float_bytes (m_alloc .group (1 ))
484
+ return None
485
+
486
+
470
487
class slurmCPUEff (linefunc ):
471
488
# This matches the seff tool currently:
472
489
# https://github.com/SchedMD/slurm/blob/master/contribs/seff/seff
@@ -589,6 +606,9 @@ def calc(row):
589
606
'MinCPUTask' : nullstr ,
590
607
591
608
# Memory related
609
+ '_TotalMem' : ExtractField ('TotalMem' , 'TRESUsageInTot' , 'mem' , float_bytes ),
610
+ '_AllocMem' : ExtractField ('AllocMem' , 'AllocTRES' , 'mem' , float_bytes ),
611
+ '_MemEff' : slurmMemEff2 , # Calculated from AllocTRES and TRESUsageInTot
592
612
'ReqMem' : float_bytes , # Requested mem, value from slurm. Sum across all nodes
593
613
'_ReqMemNode' : slurmMemNode , # Mem per node, computed
594
614
'_ReqMemCPU' : slurmMemCPU , # Mem per cpu, computed
@@ -598,7 +618,6 @@ def calc(row):
598
618
'MaxRSSTask' : nullstr ,
599
619
'MaxPages' : int_metric ,
600
620
'MaxVMSize' : slurmmem ,
601
- #'_MemEff': slurmMemEff, # Slurm memory efficiency - see above for why this doesn't work
602
621
603
622
# Disk related
604
623
'AveDiskRead' : int_bytes ,
@@ -882,9 +901,11 @@ def infer_type(cd):
882
901
'max(cputime) AS cpu_s_reserved, '
883
902
'max(totalcpu) AS cpu_s_used, '
884
903
'max(ReqMemNode) AS MemReq, '
885
- 'max(ReqMemNode*Elapsed) AS mem_s_reserved, ' # highest of any job
904
+ 'max(AllocMem) AS AllocMem, '
905
+ 'max(TotalMem) AS TotalMem, '
886
906
'max(MaxRSS) AS MaxRSS, '
887
- 'max(MaxRSS) / max(ReqMemNode) AS MemEff, '
907
+ 'max(MemEff) AS MemEff, '
908
+ 'max(AllocMem*Elapsed) AS mem_s_reserved, ' # highest of any job
888
909
'max(NGpus) AS NGpus, '
889
910
'max(NGpus)*max(Elapsed) AS gpu_s_reserved, '
890
911
'max(NGpus)*max(Elapsed)*max(GPUutil) AS gpu_s_used, '
@@ -1011,7 +1032,8 @@ def compact_table():
1011
1032
)
1012
1033
1013
1034
1014
- SACCT_DEFAULT_FIELDS = 'JobID,User,State,Start,End,Partition,ExitCodeRaw,NodeList,NCPUS,CPUtime,CPUEff,ReqMem,MaxRSS,ReqGPUS,GPUUtil,TotDiskRead,TotDiskWrite,ReqTRES,AllocTRES,TRESUsageInTot,TRESUsageOutTot'
1035
+ SACCT_DEFAULT_FIELDS = 'JobID,User,State,Start,End,Partition,ExitCodeRaw,NodeList,NCPUS,CPUtime,CPUEff,AllocMem,TotalMem,MemEff,ReqGPUS,GPUUtil,TotDiskRead,TotDiskWrite,ReqTRES,AllocTRES,TRESUsageInTot,TRESUsageOutTot'
1036
+ SACCT_DEFAULT_FIELDS_LONG = 'JobID,User,State,Start,End,Elapsed,Partition,ExitCodeRaw,NodeList,NCPUS,CPUtime,CPUEff,AllocMem,TotalMem,MemEff,ReqMem,MaxRSS,ReqGPUS,GPUUtil,TotDiskRead,TotDiskWrite,ReqTRES,AllocTRES,TRESUsageInTot,TRESUsageOutTot'
1015
1037
COMPLETED_STATES = 'CA,CD,DL,F,NF,OOM,PR,RV,TO'
1016
1038
def sacct_cli (argv = sys .argv [1 :], csv_input = None ):
1017
1039
"""A command line that uses slurm2sql to give an sacct-like interface."""
@@ -1026,7 +1048,7 @@ def sacct_cli(argv=sys.argv[1:], csv_input=None):
1026
1048
parser .add_argument ('--db' ,
1027
1049
help = "Read from this DB. Don't import new data." )
1028
1050
parser .add_argument ('--output' , '-o' , default = SACCT_DEFAULT_FIELDS ,
1029
- help = "Fields to output (comma separated list, use '*' for all fields). NOT safe from SQL injection" )
1051
+ help = "Fields to output (comma separated list, use '*' for all fields). NOT safe from SQL injection. If 'long' then some longer default list " )
1030
1052
parser .add_argument ('--format' , '-f' , default = compact_table (),
1031
1053
help = "Output format (see tabulate formats: https://pypi.org/project/tabulate/ (default simple)" )
1032
1054
parser .add_argument ('--order' ,
@@ -1048,6 +1070,8 @@ def sacct_cli(argv=sys.argv[1:], csv_input=None):
1048
1070
if args .quiet :
1049
1071
logging .lastResort .setLevel (logging .WARN )
1050
1072
LOG .debug (args )
1073
+ if args .output == 'long' :
1074
+ args .output = SACCT_DEFAULT_FIELDS_LONG
1051
1075
1052
1076
sacct_filter = process_sacct_filter (args , sacct_filter )
1053
1077
@@ -1079,8 +1103,6 @@ def seff_cli(argv=sys.argv[1:], csv_input=None):
1079
1103
jobs, use "--completed -S now-1week" (a start time must be
1080
1104
given with --completed because of how sacct works).
1081
1105
1082
- MemReqGiB is amount requested per node (to compare with MaxRSSGiB).
1083
-
1084
1106
This only queries jobs with an End time (unlike most other commands).
1085
1107
1086
1108
If a single argument is given, and it
@@ -1140,8 +1162,8 @@ def seff_cli(argv=sys.argv[1:], csv_input=None):
1140
1162
round(sum(Elapsed*NCPUS)/86400,1) AS cpu_day,
1141
1163
printf("%2.0f%%", 100*sum(Elapsed*NCPUS*CPUEff)/sum(Elapsed*NCPUS)) AS CPUEff,
1142
1164
1143
- round(sum(Elapsed*MemReq )/1073741824/86400,1) AS mem_GiB_day,
1144
- printf("%2.0f%%", 100*sum(Elapsed*MemReq *MemEff)/sum(Elapsed*MemReq )) AS MemEff,
1165
+ round(sum(Elapsed*AllocMem )/1073741824/86400,1) AS mem_GiB_day,
1166
+ printf("%2.0f%%", 100*sum(Elapsed*AllocMem *MemEff)/sum(Elapsed*AllocMem )) AS MemEff,
1145
1167
1146
1168
round(sum(Elapsed*NGPUs)/86400,1) AS gpu_day,
1147
1169
iif(sum(NGpus), printf("%2.0f%%", 100*sum(Elapsed*NGPUs*GPUeff)/sum(Elapsed*NGPUs)), NULL) AS GPUEff,
@@ -1169,8 +1191,8 @@ def seff_cli(argv=sys.argv[1:], csv_input=None):
1169
1191
NCPUS,
1170
1192
printf("%3.0f%%",round(CPUeff, 2)*100) AS "CPUeff",
1171
1193
1172
- round(MemReq /1073741824,2) AS MemReqGiB ,
1173
- round(MaxRSS /1073741824,2) AS MaxRSSGiB ,
1194
+ round(AllocMem /1073741824,2) AS MemAllocGiB ,
1195
+ round(TotalMem /1073741824,2) AS MemTotGiB ,
1174
1196
printf("%3.0f%%",round(MemEff,2)*100) AS MemEff,
1175
1197
1176
1198
NGpus,
0 commit comments