Skip to content

Commit 3914bc8

Browse files
authored
Merge pull request #45774 from AdrianoDee/patch-14
Remove Duplicate RelVals from `relval_highstats.py` and Smaller Queries for `das-up-to-nevents.py`
2 parents 79a93e0 + ea90f7e commit 3914bc8

File tree

6 files changed

+72
-48
lines changed

6 files changed

+72
-48
lines changed

Configuration/PyReleaseValidation/python/MatrixUtil.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,9 @@ def das(self, das_options, dataset):
133133
command = "dasgoclient %s --query '%s'" % (das_options, self.queries(dataset)[0])
134134
elif self.skimEvents:
135135
from os import getenv
136-
if getenv("CMSSW_USE_IBEOS","false")=="true":
136+
if getenv("JENKINS_PREFIX") is not None:
137137
# to be assured that whatever happens the files are only those at CERN
138-
command = "das-up-to-nevents.py -d %s -e %d -s T2_CH_CERN"%(dataset,self.events)
138+
command = "das-up-to-nevents.py -d %s -e %d -pc"%(dataset,self.events)
139139
else:
140140
command = "das-up-to-nevents.py -d %s -e %d"%(dataset,self.events)
141141
# Run filter on DAS output

Configuration/PyReleaseValidation/python/relval_data_highstats.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
for e_n,era in enumerate(eras_2024):
1717
for p_n,pd in enumerate(pds_2024):
1818
for e_key,evs in event_steps_dict.items():
19-
if "50k" == e_key: # already defined in relval_standard
19+
if "10k" == e_key: # already defined in relval_standard
2020
continue
2121
wf_number = base_wf_number_2024
2222
wf_number = wf_number + offset_era * e_n

Configuration/PyReleaseValidation/python/relval_highstats.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -86,29 +86,3 @@
8686
workflows[134.99601] = ['',['RunJetHT2015HLHS','HLTDR2_25ns','RECODR2_25nsreHLT_HIPM','HARVESTDR2']]
8787
workflows[134.99602] = ['',['RunZeroBias2015HLHS','HLTDR2_25ns','RECODR2_25nsreHLT_HIPM','HARVESTDR2']]
8888
workflows[134.99603] = ['',['RunSingleMu2015HLHS','HLTDR2_25ns','RECODR2_25nsreHLT_HIPM','HARVESTDR2']]
89-
90-
91-
92-
## 2024 Data Higher Stats Workflows
93-
## with 150k, 250k, 500k or 1M events each
94-
95-
base_wf_number_2024 = 2024.0
96-
offset_era = 0.1 # less than 10 eras
97-
offset_pd = 0.001 # less than 100 pds
98-
offset_events = 0.0001 # less than 10 event setups (50k,150k,250k,500k)
99-
100-
for e_n,era in enumerate(eras_2024):
101-
for p_n,pd in enumerate(pds_2024):
102-
for e_key,evs in event_steps_dict.items():
103-
if "50k" in e_key: # already defined in relval_standard
104-
continue
105-
wf_number = base_wf_number_2024
106-
wf_number = wf_number + offset_era * e_n
107-
wf_number = wf_number + offset_pd * p_n
108-
wf_number = wf_number + offset_events * evs
109-
wf_number = round(wf_number,6)
110-
step_name = "Run" + pd + era.split("Run")[1] + "_" + e_key
111-
workflows[wf_number] = ['',[step_name,'HLTDR3_2024','AODNANORUN3_reHLT_2024','HARVESTRUN3_2024']]
112-
113-
114-

Configuration/PyReleaseValidation/python/relval_standard.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -559,19 +559,21 @@
559559
workflows[142.901] = ['',['RunUPC2023','RECODR3_2024_UPC','HARVESTDPROMPTR3']]
560560
workflows[142.902] = ['',['RunUPC2023','RECODR3_2024_HIN','HARVESTDPROMPTR3']]
561561

562-
## 2024 Data Workflows
562+
## 2024 Data Workflows
563+
# for a limited set of eras and PDs not to overflow the IB matrices
564+
#
563565
base_wf_number_2024 = 2024.0
564566
offset_era = 0.1 # less than 10 eras
565567
offset_pd = 0.001 # less than 100 pds
566568

567-
for e_n,era in enumerate(eras_2024):
568-
for p_n,pd in enumerate(pds_2024):
569+
for e_n,era in enumerate(['Run2024D','Run2024C']):
570+
for p_n,pd in enumerate(['JetMET0','ZeroBias']):
569571
wf_number = base_wf_number_2024
570572
wf_number = wf_number + offset_era * e_n
571573
wf_number = wf_number + offset_pd * p_n
572-
wf_number = wf_number + 0.0001 * 0.05
574+
wf_number = wf_number + 0.0001 * 0.01
573575
wf_number = round(wf_number,6)
574-
step_name = "Run" + pd + era.split("Run")[1] + "_50k"
576+
step_name = "Run" + pd + era.split("Run")[1] + "_10k"
575577
workflows[wf_number] = ['',[step_name,'HLTDR3_2024','AODNANORUN3_reHLT_2024','HARVESTRUN3_2024']]
576578

577579
### fastsim ###

Configuration/PyReleaseValidation/python/relval_steps.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@
4545
steps = Steps()
4646

4747
#### Event to runs
48-
event_steps = [0.05,0.15,0.25,0.5,1] #in millions
49-
event_steps_k = ["50k","150k","250k","500k","1M"]
48+
event_steps = [0.01,0.05,0.15,0.25,0.5,1] #in millions
49+
event_steps_k = ["10k","50k","150k","250k","500k","1M"]
5050
event_steps_dict = dict(zip(event_steps_k,event_steps))
5151
#### Production test section ####
5252
steps['ProdMinBias']=merge([{'cfg':'MinBias_8TeV_pythia8_TuneCUETP8M1_cff','--relval':'9000,300'},step1Defaults])

Configuration/PyReleaseValidation/scripts/das-up-to-nevents.py

Lines changed: 60 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,9 @@ def das_file_site(dataset, site):
4040

4141
def das_file_data(dataset,opt=""):
4242
cmd = "dasgoclient --query='file dataset=%s %s| grep file.name, file.nevents'"%(dataset,opt)
43-
4443
out = das_do_command(cmd)
4544
out = [np.array(r.split(" "))[[0,3]] for r in out if len(r) > 0]
46-
45+
4746
df = pd.DataFrame(out,columns=["file","events"])
4847
df.events = df.events.values.astype(int)
4948

@@ -59,6 +58,28 @@ def das_lumi_data(dataset,opt=""):
5958

6059
return df
6160

61+
def das_run_events_data(dataset,run,opt=""):
62+
cmd = "dasgoclient --query='file dataset=%s run=%s %s | sum(file.nevents) '"%(dataset,run,opt)
63+
out = das_do_command(cmd)[0]
64+
65+
out = [o for o in out.split(" ") if "sum" not in o]
66+
out = int([r.split(" ") for r in out if len(r)>0][0][0])
67+
68+
return out
69+
70+
def das_run_data(dataset,opt=""):
71+
cmd = "dasgoclient --query='run dataset=%s %s '"%(dataset,opt)
72+
out = das_do_command(cmd)
73+
74+
return out
75+
76+
def no_intersection():
77+
print("No intersection between:")
78+
print(" - json : ", best_json)
79+
print(" - dataset: ", dataset)
80+
print("Exiting.")
81+
sys.exit(1)
82+
6283
if __name__ == '__main__':
6384

6485
parser = argparse.ArgumentParser()
@@ -69,6 +90,7 @@ def das_lumi_data(dataset,opt=""):
6990
parser.add_argument('--pandas', '-pd',action='store_true',help="Store the whole dataset (no event or threshold cut) in a csv")
7091
parser.add_argument('--proxy','-p', help='Allow to parse a x509 proxy if needed', type=str, default=None)
7192
parser.add_argument('--site','-s', help='Only data at specific site', type=str, default=None)
93+
parser.add_argument('--precheck','-pc', action='store_true', help='Check run per run before building the dataframes, to avoid huge caching.')
7294
args = parser.parse_args()
7395

7496
if args.proxy is not None:
@@ -77,6 +99,8 @@ def das_lumi_data(dataset,opt=""):
7799
print("No X509 proxy set. Exiting.")
78100
sys.exit(1)
79101

102+
## Check if we are in the cms-bot "environment"
103+
testing = "JENKINS_PREFIX" in os.environ
80104
dataset = args.dataset
81105
events = args.events
82106
threshold = args.threshold
@@ -97,6 +121,7 @@ def das_lumi_data(dataset,opt=""):
97121
cert_path = base_cert_path + cert_type + "/"
98122
web_fallback = False
99123

124+
## if we have access to eos we get from there ...
100125
if os.path.isdir(cert_path):
101126
json_list = os.listdir(cert_path)
102127
if len(json_list) == 0:
@@ -105,7 +130,7 @@ def das_lumi_data(dataset,opt=""):
105130
json_list = [c for c in json_list if c.startswith("Cert_C") and c.endswith("json")]
106131
else:
107132
web_fallback = True
108-
133+
## ... if not we go to the website
109134
if web_fallback:
110135
cert_url = base_cert_url + cert_type + "/"
111136
json_list = get_url_clean(cert_url).split("\n")
@@ -132,12 +157,39 @@ def das_lumi_data(dataset,opt=""):
132157
R = R + [f for f in range(r[0],r[1]+1)]
133158
golden_flat[k] = R
134159

160+
# let's just check there's an intersection between the
161+
# dataset and the json
162+
data_runs = das_run_data(dataset)
163+
golden_data_runs = [r for r in data_runs if r in golden_flat]
164+
165+
if (len(golden_data_runs)==0):
166+
no_intersection()
167+
135168
# building the dataframe, cleaning for bad lumis
136-
df = das_lumi_data(dataset).merge(das_file_data(dataset),on="file",how="inner") # merge file informations with run and lumis
137-
df = df[df["run"].isin(list(golden.keys()))] # skim for golden runs
169+
golden_data_runs_tocheck = golden_data_runs
170+
das_opt = ""
171+
if testing or args.precheck:
172+
golden_data_runs_tocheck = []
173+
# Here we check run per run.
174+
# This implies more dasgoclient queries, but smaller outputs
175+
# useful when running the IB/PR tests not to have huge
176+
# query results that have to be cached.
177+
178+
sum_events = 0
179+
180+
for r in golden_data_runs:
181+
sum_events = sum_events + int(das_run_events_data(dataset,r))
182+
golden_data_runs_tocheck.append(r)
183+
if events > 0 and sum_events > events:
184+
break
185+
186+
das_opt = "run in %s"%(str([int(g) for g in golden_data_runs_tocheck]))
187+
188+
df = das_lumi_data(dataset,opt=das_opt).merge(das_file_data(dataset,opt=das_opt),on="file",how="inner") # merge file informations with run and lumis
189+
138190
df["lumis"] = [[int(ff) for ff in f.replace("[","").replace("]","").split(",")] for f in df.lumis.values]
139191
df_rs = []
140-
for r in golden_flat:
192+
for r in golden_data_runs_tocheck:
141193
cut = (df["run"] == r)
142194
if not any(cut):
143195
continue
@@ -152,12 +204,8 @@ def das_lumi_data(dataset,opt=""):
152204
n_lumis = np.array([len(l) for l in df_r.lumis])
153205
df_rs.append(df_r[good_lumis==n_lumis])
154206

155-
if len(df_rs) == 0:
156-
print("No intersection between:")
157-
print(" - json : ", best_json)
158-
print(" - dataset: ", dataset)
159-
print("Exiting.")
160-
sys.exit(1)
207+
if (len(df_rs)==0):
208+
no_intersection()
161209

162210
df = pd.concat(df_rs)
163211
df.loc[:,"min_lumi"] = [min(f) for f in df.lumis]

0 commit comments

Comments
 (0)