Merge pull request #46625 from AdrianoDee/lumi_das_n_events_100k

cmsbuild · web-flow · commit 51ce6f284755 · 2024-11-08T21:46:31.000+01:00
Add Lumisection Ouputs for `das-up-to-nevents.py`
diff --git a/Configuration/PyReleaseValidation/python/MatrixUtil.py b/Configuration/PyReleaseValidation/python/MatrixUtil.py
@@ -134,10 +134,10 @@ def das(self, das_options, dataset):
         elif self.skimEvents:
             from os import getenv
             if getenv("JENKINS_PREFIX") is not None:
-                # to be assured that whatever happens the files are only those at CERN
-                command = "das-up-to-nevents.py -d %s -e %d -pc"%(dataset,self.events)
+                # to be sure that whatever happens the files are only those at CERN
+                command = "das-up-to-nevents.py -d %s -e %d -pc -l lumi_ranges.txt"%(dataset,self.events)
             else:
-                command = "das-up-to-nevents.py -d %s -e %d"%(dataset,self.events)
+                command = "das-up-to-nevents.py -d %s -e %d -l lumi_ranges.txt"%(dataset,self.events)
         # Run filter on DAS output 
         if self.ib_blacklist:
             command += " | grep -E -v "
diff --git a/Configuration/PyReleaseValidation/python/WorkFlowRunner.py b/Configuration/PyReleaseValidation/python/WorkFlowRunner.py
@@ -137,7 +137,14 @@ def closeCmd(i,ID):
                         isInputOk = False
                  
                 inFile = 'filelist:' + basename(dasOutputPath)
+
+                if com.skimEvents:
+                    lumiRangeFile='step%d_lumiRanges.log'%(istep,)
+                    cmd2 = preamble + "mv lumi_ranges.txt " + lumiRangeFile
+                    retStep = self.doCmd(cmd2)
+
                 print("---")
+
             else:
                 #chaining IO , which should be done in WF object already and not using stepX.root but <stepName>.root
                 cmd += com
diff --git a/Configuration/PyReleaseValidation/python/relval_data_highstats.py b/Configuration/PyReleaseValidation/python/relval_data_highstats.py
@@ -23,7 +23,8 @@
             wf_number = wf_number + offset_pd * p_n
             wf_number = wf_number + offset_events * evs 
             wf_number = round(wf_number,6)
-            step_name = "Run" + pd + era.split("Run")[1] + "_10k"
+
+            step_name = "Run" + pd.replace("ParkingDouble","Park2") + era.split("Run")[1] + "_" + e_key
             y = str(base_wf)
             suff = "ZB_" if "ZeroBias" in step_name else ""
             workflows[wf_number] = ['',[step_name,'HLTDR3_' + y,'RECONANORUN3_' + suff + 'reHLT_'+y,'HARVESTRUN3_' + suff + y]]
@@ -40,7 +41,8 @@
             wf_number = wf_number + offset_pd * p_n
             wf_number = wf_number + offset_events * evs 
             wf_number = round(wf_number,6)
-            step_name = "Run" + pd + era.split("Run")[1] + "_10k"
+
+            step_name = "Run" + pd.replace("ParkingDouble","Park2") + era.split("Run")[1] + "_" + e_key
             y = str(base_wf) + "B" if "2023B" in era else str(base_wf)
             suff = "ZB_" if "ZeroBias" in step_name else ""
             workflows[wf_number] = ['',[step_name,'HLTDR3_' + y,'RECONANORUN3_' + suff + 'reHLT_'+y,'HARVESTRUN3_' + suff + y]]
diff --git a/Configuration/PyReleaseValidation/python/relval_standard.py b/Configuration/PyReleaseValidation/python/relval_standard.py
@@ -583,11 +583,12 @@
         wf_number = wf_number + offset_pd * p_n
         wf_number = wf_number + 0.0001 * 0.01 
         wf_number = round(wf_number,6)
-        step_name = "Run" + pd + era.split("Run")[1] + "_10k"
+
+        step_name = "Run" + pd.replace("ParkingDouble","Park2") + era.split("Run")[1] + "_10k"
         y = str(base_wf)
         suff = "ZB_" if "ZeroBias" in step_name else ""
         workflows[wf_number] = ['',[step_name,'HLTDR3_' + y,'RECONANORUN3_' + suff + 'reHLT_'+y,'HARVESTRUN3_' + suff + y]]
-     
+
 # 2023
 base_wf = 2023
 for e_n,era in enumerate(['Run2023D']):
@@ -597,11 +598,11 @@
         wf_number = wf_number + offset_pd * p_n
         wf_number = wf_number + 0.0001 * 0.01
         wf_number = round(wf_number,6)
-        step_name = "Run" + pd + era.split("Run")[1] + "_10k"
+
+        step_name = "Run" + pd.replace("ParkingDouble","Park2") + era.split("Run")[1] + "_10k"
         y = str(base_wf) + "B" if "2023B" in era else str(base_wf)
         suff = "ZB_" if "ZeroBias" in step_name else ""
         workflows[wf_number] = ['',[step_name,'HLTDR3_' + y,'RECONANORUN3_' + suff + 'reHLT_'+y,'HARVESTRUN3_' + suff + y]]
-        
 
 # 2022
 base_wf = 2022
diff --git a/Configuration/PyReleaseValidation/python/relval_steps.py b/Configuration/PyReleaseValidation/python/relval_steps.py
@@ -45,8 +45,8 @@
 steps = Steps()
 
 #### Event to runs
-event_steps = [0.01,0.05,0.15,0.25,0.5,1] #in millions
-event_steps_k = ["10k","50k","150k","250k","500k","1M"]
+event_steps = [0.01,0.05,0.1,0.15,0.25,0.5,1] #in millions
+event_steps_k = ["10k","50k","100k","150k","250k","500k","1M"] ##TODO add an helper to convert the numbers to strings
 event_steps_dict = dict(zip(event_steps_k,event_steps))
 #### Production test section ####
 steps['ProdMinBias']=merge([{'cfg':'MinBias_8TeV_pythia8_TuneCUETP8M1_cff','--relval':'9000,300'},step1Defaults])
@@ -653,7 +653,7 @@
     for pd in pds_2024:
         dataset = "/" + pd + "/" + era + "-v1/RAW"
         for e_key,evs in event_steps_dict.items():
-            step_name = "Run" + pd + era.split("Run")[1] + "_" + e_key
+            step_name = "Run" + pd.replace("ParkingDouble","Park2") + era.split("Run")[1] + "_" + e_key
             steps[step_name] = {'INPUT':InputInfo(dataSet=dataset,label=era.split("Run")[1],events=int(evs*1e6), skimEvents=True, location='STD')}
 
 ###2023 
@@ -665,7 +665,7 @@
     for pd in pds_2023:
         dataset = "/" + pd + "/" + era + "-v1/RAW"
         for e_key,evs in event_steps_dict.items():
-            step_name = "Run" + pd + era.split("Run")[1] + "_" + e_key
+            step_name = "Run" + pd.replace("ParkingDouble","Park2") + era.split("Run")[1] + "_" + e_key
             steps[step_name] = {'INPUT':InputInfo(dataSet=dataset,label=era.split("Run")[1],events=int(evs*1e6), skimEvents=True, location='STD')}
 
 ###2022 
diff --git a/Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py b/Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py
@@ -11,6 +11,8 @@
 import os
 import json
 import sys
+import itertools
+import json
 
 ## Helpers
 base_cert_url = "https://cms-service-dqmdc.web.cern.ch/CAF/certification/"
@@ -27,6 +29,13 @@ def get_url_clean(url):
     
     return BeautifulSoup(buffer.getvalue(), "lxml").text
 
+def get_lumi_ranges(i):
+    result = []
+    for _, b in itertools.groupby(enumerate(i), lambda pair: pair[1] - pair[0]):
+        b = list(b)
+        result.append([b[0][1],b[-1][1]]) 
+    return result
+
 def das_do_command(cmd):
     out = subprocess.check_output(cmd, shell=True, executable="/bin/bash").decode('utf8')
     return out.split("\n")
@@ -90,6 +99,7 @@ def no_intersection():
     parser.add_argument('--pandas', '-pd',action='store_true',help="Store the whole dataset (no event or threshold cut) in a csv") 
     parser.add_argument('--proxy','-p', help='Allow to parse a x509 proxy if needed', type=str, default=None)
     parser.add_argument('--site','-s', help='Only data at specific site', type=str, default=None)
+    parser.add_argument('--lumis','-l', help='Output file for lumi ranges for the selected files (if black no lumiranges calculated)', type=str, default=None)
     parser.add_argument('--precheck','-pc', action='store_true', help='Check run per run before building the dataframes, to avoid huge caching.')
     args = parser.parse_args()
 
@@ -106,6 +116,7 @@ def no_intersection():
     threshold = args.threshold
     outfile   = args.outfile
     site      = args.site
+    lumis     = args.lumis
 
     ## get the greatest golden json
     year = dataset.split("Run")[1][2:4] # from 20XX to XX
@@ -222,8 +233,14 @@ def no_intersection():
         df = df[df["events"] <= events] #jump too big files
         df.loc[:,"sum_evs"] = df.loc[:,"events"].cumsum()
         df = df[df["sum_evs"] < events]
-            
+        
     files = df.file
+    
+    if lumis is not None:
+        lumi_ranges = { int(r) : list(get_lumi_ranges(np.sort(np.concatenate(df.loc[df["run"]==r,"lumis"].values).ravel()).tolist())) for r in np.unique(df.run.values).tolist()}
+        
+        with open(lumis, 'w') as fp:
+            json.dump(lumi_ranges, fp)
 
     if outfile is not None:
         with open(outfile, 'w') as f:
@@ -234,4 +251,4 @@ def no_intersection():
 
     sys.exit(0)
 
-    
+