Merge pull request #5993 from aldbr/cherry-pick-2-b2c7cbff9-integration

fstagni · web-flow · commit 1e5cb5fd061b · 2022-04-04T11:01:47.000+02:00
[sweep:integration] dirac-wms-get-cpu-norm: support multi-core allocations
diff --git a/src/DIRAC/WorkloadManagementSystem/scripts/dirac_wms_cpu_normalization.py b/src/DIRAC/WorkloadManagementSystem/scripts/dirac_wms_cpu_normalization.py
@@ -11,10 +11,8 @@
       CPUNormalizationFactor = 23.7 # corrected value (by JobScheduling/CPUNormalizationCorrection)
       DB12measured = 15.4
     }
-
-DB12measured is up to now wrote down but never used.
 """
-from db12 import single_dirac_benchmark
+from db12 import multiple_dirac_benchmark
 
 import DIRAC
 from DIRAC.Core.Base.Script import Script
@@ -25,36 +23,55 @@
 
 @Script()
 def main():
+    Script.registerSwitch("N:", "NumberOfProcessors=", "Run n parallel copies of the benchmark")
     Script.registerSwitch("U", "Update", "Update dirac.cfg with the resulting value")
     Script.registerSwitch("R:", "Reconfig=", "Update given configuration file with the resulting value")
     Script.parseCommandLine(ignoreErrors=True)
 
     update = False
     configFile = None
+    numberOfProcessors = 0
 
     for unprocSw in Script.getUnprocessedSwitches():
         if unprocSw[0] in ("U", "Update"):
             update = True
         elif unprocSw[0] in ("R", "Reconfig"):
             configFile = unprocSw[1]
+        elif unprocSw[0] in ("N", "NumberOfProcessors"):
+            try:
+                numberOfProcessors = int(unprocSw[1])
+            except ValueError:
+                gLogger.warn("Cannot make benchmark measurements: NumberOfProcessors is not a number")
+
+    # if numberOfProcessors has not been provided, try to get it from the configuration
+    if not numberOfProcessors:
+        numberOfProcessors = gConfig.getValue("/Resources/Computing/CEDefaults/NumberOfProcessors", 1)
+
+    gLogger.info("Computing benchmark measurements on", "%d processor(s)..." % numberOfProcessors)
 
     # we want to get the logs coming from db12
     gLogger.enableLogsFromExternalLibs()
-    result = single_dirac_benchmark()
+
+    # multiprocessor allocations generally have a CPU Power lower than single core one.
+    # in order to avoid having wrong estimations, we run multiple copies of the benchmark simultaneously
+    result = multiple_dirac_benchmark(numberOfProcessors)
 
     if result is None:
         gLogger.error("Cannot make benchmark measurements")
         DIRAC.exit(1)
 
-    db12Measured = round(result["NORM"], 1)
+    # we take a conservative approach and use the minimum value returned as the CPU Power
+    db12Result = min(result["raw"])
+    # because hardware is continuously evolving, original benchmark scores might need a correction
     corr = Operations().getValue("JobScheduling/CPUNormalizationCorrection", 1.0)
-    norm = round(result["NORM"] / corr, 1)
 
-    gLogger.notice("Estimated CPU power is %.1f HS06" % norm)
+    gLogger.info("Applying a correction on the CPU power:", corr)
+    cpuPower = round(db12Result / corr, 1)
+
+    gLogger.notice("Estimated CPU power is %.1f HS06" % cpuPower)
 
     if update:
-        gConfig.setOptionValue("/LocalSite/CPUNormalizationFactor", norm)
-        gConfig.setOptionValue("/LocalSite/DB12measured", db12Measured)
+        gConfig.setOptionValue("/LocalSite/CPUNormalizationFactor", cpuPower)
 
         if configFile:
             gConfig.dumpLocalCFGToFile(configFile)