@@ -40,10 +40,9 @@ def das_file_site(dataset, site):
4040
4141def das_file_data (dataset ,opt = "" ):
4242 cmd = "dasgoclient --query='file dataset=%s %s| grep file.name, file.nevents'" % (dataset ,opt )
43-
4443 out = das_do_command (cmd )
4544 out = [np .array (r .split (" " ))[[0 ,3 ]] for r in out if len (r ) > 0 ]
46-
45+
4746 df = pd .DataFrame (out ,columns = ["file" ,"events" ])
4847 df .events = df .events .values .astype (int )
4948
@@ -59,6 +58,28 @@ def das_lumi_data(dataset,opt=""):
5958
6059 return df
6160
61+ def das_run_events_data (dataset ,run ,opt = "" ):
62+ cmd = "dasgoclient --query='file dataset=%s run=%s %s | sum(file.nevents) '" % (dataset ,run ,opt )
63+ out = das_do_command (cmd )[0 ]
64+
65+ out = [o for o in out .split (" " ) if "sum" not in o ]
66+ out = int ([r .split (" " ) for r in out if len (r )> 0 ][0 ][0 ])
67+
68+ return out
69+
70+ def das_run_data (dataset ,opt = "" ):
71+ cmd = "dasgoclient --query='run dataset=%s %s '" % (dataset ,opt )
72+ out = das_do_command (cmd )
73+
74+ return out
75+
76+ def no_intersection ():
77+ print ("No intersection between:" )
78+ print (" - json : " , best_json )
79+ print (" - dataset: " , dataset )
80+ print ("Exiting." )
81+ sys .exit (1 )
82+
6283if __name__ == '__main__' :
6384
6485 parser = argparse .ArgumentParser ()
@@ -69,6 +90,7 @@ def das_lumi_data(dataset,opt=""):
6990 parser .add_argument ('--pandas' , '-pd' ,action = 'store_true' ,help = "Store the whole dataset (no event or threshold cut) in a csv" )
7091 parser .add_argument ('--proxy' ,'-p' , help = 'Allow to parse a x509 proxy if needed' , type = str , default = None )
7192 parser .add_argument ('--site' ,'-s' , help = 'Only data at specific site' , type = str , default = None )
93+ parser .add_argument ('--precheck' ,'-pc' , action = 'store_true' , help = 'Check run per run before building the dataframes, to avoid huge caching.' )
7294 args = parser .parse_args ()
7395
7496 if args .proxy is not None :
@@ -77,6 +99,8 @@ def das_lumi_data(dataset,opt=""):
7799 print ("No X509 proxy set. Exiting." )
78100 sys .exit (1 )
79101
102+ ## Check if we are in the cms-bot "environment"
103+ testing = "JENKINS_PREFIX" in os .environ
80104 dataset = args .dataset
81105 events = args .events
82106 threshold = args .threshold
@@ -97,6 +121,7 @@ def das_lumi_data(dataset,opt=""):
97121 cert_path = base_cert_path + cert_type + "/"
98122 web_fallback = False
99123
124+ ## if we have access to eos we get from there ...
100125 if os .path .isdir (cert_path ):
101126 json_list = os .listdir (cert_path )
102127 if len (json_list ) == 0 :
@@ -105,7 +130,7 @@ def das_lumi_data(dataset,opt=""):
105130 json_list = [c for c in json_list if c .startswith ("Cert_C" ) and c .endswith ("json" )]
106131 else :
107132 web_fallback = True
108-
133+ ## ... if not we go to the website
109134 if web_fallback :
110135 cert_url = base_cert_url + cert_type + "/"
111136 json_list = get_url_clean (cert_url ).split ("\n " )
@@ -132,12 +157,39 @@ def das_lumi_data(dataset,opt=""):
132157 R = R + [f for f in range (r [0 ],r [1 ]+ 1 )]
133158 golden_flat [k ] = R
134159
160+ # let's just check there's an intersection between the
161+ # dataset and the json
162+ data_runs = das_run_data (dataset )
163+ golden_data_runs = [r for r in data_runs if r in golden_flat ]
164+
165+ if (len (golden_data_runs )== 0 ):
166+ no_intersection ()
167+
135168 # building the dataframe, cleaning for bad lumis
136- df = das_lumi_data (dataset ).merge (das_file_data (dataset ),on = "file" ,how = "inner" ) # merge file informations with run and lumis
137- df = df [df ["run" ].isin (list (golden .keys ()))] # skim for golden runs
169+ golden_data_runs_tocheck = golden_data_runs
170+ das_opt = ""
171+ if testing or args .precheck :
172+ golden_data_runs_tocheck = []
173+ # Here we check run per run.
174+ # This implies more dasgoclient queries, but smaller outputs
175+ # useful when running the IB/PR tests not to have huge
176+ # query results that have to be cached.
177+
178+ sum_events = 0
179+
180+ for r in golden_data_runs :
181+ sum_events = sum_events + int (das_run_events_data (dataset ,r ))
182+ golden_data_runs_tocheck .append (r )
183+ if events > 0 and sum_events > events :
184+ break
185+
186+ das_opt = "run in %s" % (str ([int (g ) for g in golden_data_runs_tocheck ]))
187+
188+ df = das_lumi_data (dataset ,opt = das_opt ).merge (das_file_data (dataset ,opt = das_opt ),on = "file" ,how = "inner" ) # merge file informations with run and lumis
189+
138190 df ["lumis" ] = [[int (ff ) for ff in f .replace ("[" ,"" ).replace ("]" ,"" ).split ("," )] for f in df .lumis .values ]
139191 df_rs = []
140- for r in golden_flat :
192+ for r in golden_data_runs_tocheck :
141193 cut = (df ["run" ] == r )
142194 if not any (cut ):
143195 continue
@@ -152,12 +204,8 @@ def das_lumi_data(dataset,opt=""):
152204 n_lumis = np .array ([len (l ) for l in df_r .lumis ])
153205 df_rs .append (df_r [good_lumis == n_lumis ])
154206
155- if len (df_rs ) == 0 :
156- print ("No intersection between:" )
157- print (" - json : " , best_json )
158- print (" - dataset: " , dataset )
159- print ("Exiting." )
160- sys .exit (1 )
207+ if (len (df_rs )== 0 ):
208+ no_intersection ()
161209
162210 df = pd .concat (df_rs )
163211 df .loc [:,"min_lumi" ] = [min (f ) for f in df .lumis ]
0 commit comments