Skip to content

Commit 51ce6f2

Browse files
authored
Merge pull request #46625 from AdrianoDee/lumi_das_n_events_100k
Add Lumisection Ouputs for `das-up-to-nevents.py`
2 parents 1bd97a6 + 2a6b872 commit 51ce6f2

File tree

6 files changed

+42
-15
lines changed

6 files changed

+42
-15
lines changed

Configuration/PyReleaseValidation/python/MatrixUtil.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -134,10 +134,10 @@ def das(self, das_options, dataset):
134134
elif self.skimEvents:
135135
from os import getenv
136136
if getenv("JENKINS_PREFIX") is not None:
137-
# to be assured that whatever happens the files are only those at CERN
138-
command = "das-up-to-nevents.py -d %s -e %d -pc"%(dataset,self.events)
137+
# to be sure that whatever happens the files are only those at CERN
138+
command = "das-up-to-nevents.py -d %s -e %d -pc -l lumi_ranges.txt"%(dataset,self.events)
139139
else:
140-
command = "das-up-to-nevents.py -d %s -e %d"%(dataset,self.events)
140+
command = "das-up-to-nevents.py -d %s -e %d -l lumi_ranges.txt"%(dataset,self.events)
141141
# Run filter on DAS output
142142
if self.ib_blacklist:
143143
command += " | grep -E -v "

Configuration/PyReleaseValidation/python/WorkFlowRunner.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,14 @@ def closeCmd(i,ID):
137137
isInputOk = False
138138

139139
inFile = 'filelist:' + basename(dasOutputPath)
140+
141+
if com.skimEvents:
142+
lumiRangeFile='step%d_lumiRanges.log'%(istep,)
143+
cmd2 = preamble + "mv lumi_ranges.txt " + lumiRangeFile
144+
retStep = self.doCmd(cmd2)
145+
140146
print("---")
147+
141148
else:
142149
#chaining IO , which should be done in WF object already and not using stepX.root but <stepName>.root
143150
cmd += com

Configuration/PyReleaseValidation/python/relval_data_highstats.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@
2323
wf_number = wf_number + offset_pd * p_n
2424
wf_number = wf_number + offset_events * evs
2525
wf_number = round(wf_number,6)
26-
step_name = "Run" + pd + era.split("Run")[1] + "_10k"
26+
27+
step_name = "Run" + pd.replace("ParkingDouble","Park2") + era.split("Run")[1] + "_" + e_key
2728
y = str(base_wf)
2829
suff = "ZB_" if "ZeroBias" in step_name else ""
2930
workflows[wf_number] = ['',[step_name,'HLTDR3_' + y,'RECONANORUN3_' + suff + 'reHLT_'+y,'HARVESTRUN3_' + suff + y]]
@@ -40,7 +41,8 @@
4041
wf_number = wf_number + offset_pd * p_n
4142
wf_number = wf_number + offset_events * evs
4243
wf_number = round(wf_number,6)
43-
step_name = "Run" + pd + era.split("Run")[1] + "_10k"
44+
45+
step_name = "Run" + pd.replace("ParkingDouble","Park2") + era.split("Run")[1] + "_" + e_key
4446
y = str(base_wf) + "B" if "2023B" in era else str(base_wf)
4547
suff = "ZB_" if "ZeroBias" in step_name else ""
4648
workflows[wf_number] = ['',[step_name,'HLTDR3_' + y,'RECONANORUN3_' + suff + 'reHLT_'+y,'HARVESTRUN3_' + suff + y]]

Configuration/PyReleaseValidation/python/relval_standard.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -583,11 +583,12 @@
583583
wf_number = wf_number + offset_pd * p_n
584584
wf_number = wf_number + 0.0001 * 0.01
585585
wf_number = round(wf_number,6)
586-
step_name = "Run" + pd + era.split("Run")[1] + "_10k"
586+
587+
step_name = "Run" + pd.replace("ParkingDouble","Park2") + era.split("Run")[1] + "_10k"
587588
y = str(base_wf)
588589
suff = "ZB_" if "ZeroBias" in step_name else ""
589590
workflows[wf_number] = ['',[step_name,'HLTDR3_' + y,'RECONANORUN3_' + suff + 'reHLT_'+y,'HARVESTRUN3_' + suff + y]]
590-
591+
591592
# 2023
592593
base_wf = 2023
593594
for e_n,era in enumerate(['Run2023D']):
@@ -597,11 +598,11 @@
597598
wf_number = wf_number + offset_pd * p_n
598599
wf_number = wf_number + 0.0001 * 0.01
599600
wf_number = round(wf_number,6)
600-
step_name = "Run" + pd + era.split("Run")[1] + "_10k"
601+
602+
step_name = "Run" + pd.replace("ParkingDouble","Park2") + era.split("Run")[1] + "_10k"
601603
y = str(base_wf) + "B" if "2023B" in era else str(base_wf)
602604
suff = "ZB_" if "ZeroBias" in step_name else ""
603605
workflows[wf_number] = ['',[step_name,'HLTDR3_' + y,'RECONANORUN3_' + suff + 'reHLT_'+y,'HARVESTRUN3_' + suff + y]]
604-
605606

606607
# 2022
607608
base_wf = 2022

Configuration/PyReleaseValidation/python/relval_steps.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@
4545
steps = Steps()
4646

4747
#### Event to runs
48-
event_steps = [0.01,0.05,0.15,0.25,0.5,1] #in millions
49-
event_steps_k = ["10k","50k","150k","250k","500k","1M"]
48+
event_steps = [0.01,0.05,0.1,0.15,0.25,0.5,1] #in millions
49+
event_steps_k = ["10k","50k","100k","150k","250k","500k","1M"] ##TODO add an helper to convert the numbers to strings
5050
event_steps_dict = dict(zip(event_steps_k,event_steps))
5151
#### Production test section ####
5252
steps['ProdMinBias']=merge([{'cfg':'MinBias_8TeV_pythia8_TuneCUETP8M1_cff','--relval':'9000,300'},step1Defaults])
@@ -653,7 +653,7 @@
653653
for pd in pds_2024:
654654
dataset = "/" + pd + "/" + era + "-v1/RAW"
655655
for e_key,evs in event_steps_dict.items():
656-
step_name = "Run" + pd + era.split("Run")[1] + "_" + e_key
656+
step_name = "Run" + pd.replace("ParkingDouble","Park2") + era.split("Run")[1] + "_" + e_key
657657
steps[step_name] = {'INPUT':InputInfo(dataSet=dataset,label=era.split("Run")[1],events=int(evs*1e6), skimEvents=True, location='STD')}
658658

659659
###2023
@@ -665,7 +665,7 @@
665665
for pd in pds_2023:
666666
dataset = "/" + pd + "/" + era + "-v1/RAW"
667667
for e_key,evs in event_steps_dict.items():
668-
step_name = "Run" + pd + era.split("Run")[1] + "_" + e_key
668+
step_name = "Run" + pd.replace("ParkingDouble","Park2") + era.split("Run")[1] + "_" + e_key
669669
steps[step_name] = {'INPUT':InputInfo(dataSet=dataset,label=era.split("Run")[1],events=int(evs*1e6), skimEvents=True, location='STD')}
670670

671671
###2022

Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
import os
1212
import json
1313
import sys
14+
import itertools
15+
import json
1416

1517
## Helpers
1618
base_cert_url = "https://cms-service-dqmdc.web.cern.ch/CAF/certification/"
@@ -27,6 +29,13 @@ def get_url_clean(url):
2729

2830
return BeautifulSoup(buffer.getvalue(), "lxml").text
2931

32+
def get_lumi_ranges(i):
33+
result = []
34+
for _, b in itertools.groupby(enumerate(i), lambda pair: pair[1] - pair[0]):
35+
b = list(b)
36+
result.append([b[0][1],b[-1][1]])
37+
return result
38+
3039
def das_do_command(cmd):
3140
out = subprocess.check_output(cmd, shell=True, executable="/bin/bash").decode('utf8')
3241
return out.split("\n")
@@ -90,6 +99,7 @@ def no_intersection():
9099
parser.add_argument('--pandas', '-pd',action='store_true',help="Store the whole dataset (no event or threshold cut) in a csv")
91100
parser.add_argument('--proxy','-p', help='Allow to parse a x509 proxy if needed', type=str, default=None)
92101
parser.add_argument('--site','-s', help='Only data at specific site', type=str, default=None)
102+
parser.add_argument('--lumis','-l', help='Output file for lumi ranges for the selected files (if black no lumiranges calculated)', type=str, default=None)
93103
parser.add_argument('--precheck','-pc', action='store_true', help='Check run per run before building the dataframes, to avoid huge caching.')
94104
args = parser.parse_args()
95105

@@ -106,6 +116,7 @@ def no_intersection():
106116
threshold = args.threshold
107117
outfile = args.outfile
108118
site = args.site
119+
lumis = args.lumis
109120

110121
## get the greatest golden json
111122
year = dataset.split("Run")[1][2:4] # from 20XX to XX
@@ -222,8 +233,14 @@ def no_intersection():
222233
df = df[df["events"] <= events] #jump too big files
223234
df.loc[:,"sum_evs"] = df.loc[:,"events"].cumsum()
224235
df = df[df["sum_evs"] < events]
225-
236+
226237
files = df.file
238+
239+
if lumis is not None:
240+
lumi_ranges = { int(r) : list(get_lumi_ranges(np.sort(np.concatenate(df.loc[df["run"]==r,"lumis"].values).ravel()).tolist())) for r in np.unique(df.run.values).tolist()}
241+
242+
with open(lumis, 'w') as fp:
243+
json.dump(lumi_ranges, fp)
227244

228245
if outfile is not None:
229246
with open(outfile, 'w') as f:
@@ -234,4 +251,4 @@ def no_intersection():
234251

235252
sys.exit(0)
236253

237-
254+

0 commit comments

Comments
 (0)