Add lumisection output options, 100k wfs

AdrianoDee · AdrianoDee · commit 710330f64a2b · 2024-11-07T15:49:23.000+01:00
diff --git a/Configuration/PyReleaseValidation/python/relval_steps.py b/Configuration/PyReleaseValidation/python/relval_steps.py
@@ -45,8 +45,8 @@
 steps = Steps()
 
 #### Event to runs
-event_steps = [0.01,0.05,0.15,0.25,0.5,1] #in millions
-event_steps_k = ["10k","50k","150k","250k","500k","1M"]
+event_steps = [0.01,0.05,0.1,0.15,0.25,0.5,1] #in millions
+event_steps_k = ["10k","50k","100k","150k","250k","500k","1M"] ##TODO add an helper to convert the numbers to strings
 event_steps_dict = dict(zip(event_steps_k,event_steps))
 #### Production test section ####
 steps['ProdMinBias']=merge([{'cfg':'MinBias_8TeV_pythia8_TuneCUETP8M1_cff','--relval':'9000,300'},step1Defaults])
diff --git a/Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py b/Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py
@@ -11,6 +11,8 @@
 import os
 import json
 import sys
+import itertools
+import json
 
 ## Helpers
 base_cert_url = "https://cms-service-dqmdc.web.cern.ch/CAF/certification/"
@@ -27,6 +29,13 @@ def get_url_clean(url):
     
     return BeautifulSoup(buffer.getvalue(), "lxml").text
 
+def get_lumi_ranges(i):
+    result = []
+    for _, b in itertools.groupby(enumerate(i), lambda pair: pair[1] - pair[0]):
+        b = list(b)
+        result.append([b[0][1],b[-1][1]]) 
+    return result
+
 def das_do_command(cmd):
     out = subprocess.check_output(cmd, shell=True, executable="/bin/bash").decode('utf8')
     return out.split("\n")
@@ -90,6 +99,7 @@ def no_intersection():
     parser.add_argument('--pandas', '-pd',action='store_true',help="Store the whole dataset (no event or threshold cut) in a csv") 
     parser.add_argument('--proxy','-p', help='Allow to parse a x509 proxy if needed', type=str, default=None)
     parser.add_argument('--site','-s', help='Only data at specific site', type=str, default=None)
+    parser.add_argument('--lumis','-l', help='Output file for lumi ranges for the selected files (if black no lumiranges calculated)', type=str, default=None)
     parser.add_argument('--precheck','-pc', action='store_true', help='Check run per run before building the dataframes, to avoid huge caching.')
     args = parser.parse_args()
 
@@ -106,6 +116,7 @@ def no_intersection():
     threshold = args.threshold
     outfile   = args.outfile
     site      = args.site
+    lumis     = args.lumis
 
     ## get the greatest golden json
     year = dataset.split("Run")[1][2:4] # from 20XX to XX
@@ -134,7 +145,7 @@ def no_intersection():
     if web_fallback:
         cert_url = base_cert_url + cert_type + "/"
         json_list = get_url_clean(cert_url).split("\n")
-        json_list = [c for c in json_list if "Golden" in c and "era" not in c]
+        json_list = [c for c in json_list if "Golden" in c and "era" not in c and "Cert_C" in c]
         json_list = [[cc for cc in c.split(" ") if cc.startswith("Cert_C") and cc.endswith("json")][0] for c in json_list]
 
     # the larger the better, assuming file naming schema 
@@ -222,8 +233,14 @@ def no_intersection():
         df = df[df["events"] <= events] #jump too big files
         df.loc[:,"sum_evs"] = df.loc[:,"events"].cumsum()
         df = df[df["sum_evs"] < events]
-            
+        
     files = df.file
+    
+    if lumis is not None:
+        lumi_ranges = { int(r) : list(get_lumi_ranges(np.sort(np.concatenate(df.loc[df["run"]==r,"lumis"].values).ravel()).tolist())) for r in np.unique(df.run.values)}
+        
+        with open(lumis, 'w') as fp:
+            json.dump(lumi_ranges, fp)
 
     if outfile is not None:
         with open(outfile, 'w') as f:
@@ -234,4 +251,4 @@ def no_intersection():
 
     sys.exit(0)
 
-    
+