Skip to content

Commit 594052e

Browse files
benedikt-voelkelBenedikt Volkel
andauthored
Update steering (#652)
* -d option is required now when running do_entire_analysis.py since it has become impossible for a long time now that a database could be found via a default path * overwrite database parameters on the fly passing --database-overwrite <path/to/overwrite.yaml> where parts of the nominal database structure can be replayed in <path/to/overwrite.yaml>. These supersede the nominal. E.g. an overwrite.yml containing ml: opt: filename_fonll: "<new/path/to/fonll>" would introduce another FONLL file for this run. NOTE: There is NO "case" key as it is present at the very top of the nominal databases Co-authored-by: Benedikt Volkel <[email protected]>
1 parent c3a0170 commit 594052e

File tree

9 files changed

+33
-32
lines changed

9 files changed

+33
-32
lines changed

machine_learning_hep/config.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
from itertools import product
2020
from machine_learning_hep.logger import get_logger
21+
from machine_learning_hep.do_variations import modify_dictionary
2122

2223

2324
# disable pylint unused-argument because this is done already in view of updating the
@@ -41,21 +42,28 @@ def update_config(database: dict, run_config: dict, database_overwrite=None): #
4142

4243
logger = get_logger()
4344

45+
# Extract the case
46+
case = list(database.keys())[0]
47+
database = database[case]
48+
4449
# First overwrite as required by the user
4550
# To be implemented
4651
if database_overwrite:
4752
logger.info("Updating database fields with custom user input")
53+
modify_dictionary(database, database_overwrite)
4854

49-
50-
# Extract the case
51-
case = list(database.keys())[0]
52-
database = database[case]
53-
54-
# If not an ML analysis, append "_std" to paths where necessary
55+
# If not an ML analysis...
5556
if not database["doml"]:
56-
logger.info("Not an ML analysis, adjust output paths")
57+
logger.info("Not an ML analysis, adjust paths and settings accordingly")
58+
# ...append "_std" to paths where necessary
5759
data_mc = ("data", "mc")
5860
pkl_keys = ("pkl_skimmed_dec", "pkl_skimmed_decmerged")
5961
for keys in product(data_mc, pkl_keys):
6062
database["mlapplication"][keys[0]][keys[1]][:] = \
6163
[f"{path}_std" for path in database["mlapplication"][keys[0]][keys[1]]]
64+
# ...set the ML working point all to 0
65+
for k in data_mc:
66+
database["mlapplication"]["probcutpresel"][k][:] = \
67+
[0] * len(database["mlapplication"]["probcutpresel"][k])
68+
database["mlapplication"]["probcutoptimal"][:] \
69+
= [0] * len(database["mlapplication"]["probcutoptimal"])

machine_learning_hep/processer.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -151,9 +151,6 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles,
151151
self.lpt_model = datap["mlapplication"]["modelsperptbin"]
152152
self.dirmodel = datap["ml"]["mlout"]
153153
self.lpt_model = appendmainfoldertolist(self.dirmodel, self.lpt_model)
154-
if not self.doml:
155-
datap["mlapplication"]["probcutpresel"][self.mcordata] = [0 for _ in self.lpt_anbinmin]
156-
datap["mlapplication"]["probcutoptimal"] = [0 for _ in self.lpt_anbinmin]
157154

158155
self.lpt_probcutpre = datap["mlapplication"]["probcutpresel"][self.mcordata]
159156
self.lpt_probcutfin = datap["mlapplication"]["probcutoptimal"]

machine_learning_hep/steer_analysis.py

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@
7373
print("##############################")
7474

7575

76-
def do_entire_analysis(data_config: dict, data_param: dict, data_model: dict, run_param: dict): # pylint: disable=too-many-locals, too-many-statements, too-many-branches
76+
def do_entire_analysis(data_config: dict, data_param: dict, data_param_overwrite: dict, # pylint: disable=too-many-locals, too-many-statements, too-many-branches
77+
data_model: dict, run_param: dict):
7778

7879
# Disable any graphical stuff. No TCanvases opened and shown by default
7980
gROOT.SetBatch(True)
@@ -85,7 +86,7 @@ def do_entire_analysis(data_config: dict, data_param: dict, data_model: dict, ru
8586
case = list(data_param.keys())[0]
8687

8788
# Update database accordingly if needed
88-
update_config(data_param, data_config)
89+
update_config(data_param, data_config, data_param_overwrite)
8990

9091
dodownloadalice = data_config["download"]["alice"]["activate"]
9192
doconversionmc = data_config["conversion"]["mc"]["activate"]
@@ -138,8 +139,8 @@ def do_entire_analysis(data_config: dict, data_param: dict, data_model: dict, ru
138139
do_syst_prob_cross = data_config["systematics"]["cutvar"]["probvariationcross"]
139140
dosystptshape = data_config["systematics"]["mcptshape"]["activate"]
140141
doanaperperiod = data_config["analysis"]["doperperiod"]
141-
142142
typean = data_config["analysis"]["type"]
143+
143144
dojetstudies = data_config["analysis"]["dojetstudies"]
144145

145146
dirpklmc = data_param[case]["multi"]["mc"]["pkl"]
@@ -441,7 +442,7 @@ def do_entire_analysis(data_config: dict, data_param: dict, data_model: dict, ru
441442
syst_mgr.analyze(*ml_syst_steps)
442443

443444

444-
def load_config(user_path: str, default_path: tuple) -> dict:
445+
def load_config(user_path: str, default_path=None) -> dict:
445446
"""
446447
Quickly extract either configuration given by user and fall back to package default if no user
447448
config given.
@@ -451,15 +452,16 @@ def load_config(user_path: str, default_path: tuple) -> dict:
451452
Returns:
452453
dictionary built from YAML
453454
"""
454-
logger = get_logger()
455+
if not user_path and not default_path:
456+
return None
457+
455458
stream = None
456-
if user_path is None:
457-
stream = resource_stream(default_path[0], default_path[1])
458-
else:
459+
if user_path:
459460
if not exists(user_path):
460-
logger_string = f"The file {user_path} does not exist."
461-
logger.fatal(logger_string)
461+
get_logger().fatal("The file %s does not exist", user_path)
462462
stream = open(user_path)
463+
else:
464+
stream = resource_stream(default_path[0], default_path[1])
463465
return yaml.safe_load(stream)
464466

465467
def main():
@@ -474,7 +476,9 @@ def main():
474476
parser.add_argument("--run-config", "-r", dest="run_config",
475477
help="the run configuration to be used")
476478
parser.add_argument("--database-analysis", "-d", dest="database_analysis",
477-
help="analysis database to be used")
479+
help="analysis database to be used", required=True)
480+
parser.add_argument("--database-overwrite", dest="database_overwrite",
481+
help="overwrite fields in analysis database")
478482
parser.add_argument("--database-ml-models", dest="database_ml_models",
479483
help="ml model database to be used")
480484
parser.add_argument("--database-run-list", dest="database_run_list",
@@ -490,15 +494,13 @@ def main():
490494
pkg_data = "machine_learning_hep.data"
491495
pkg_data_run_config = "machine_learning_hep.submission"
492496
run_config = load_config(args.run_config, (pkg_data_run_config, "default_complete.yml"))
493-
case = run_config["case"]
494497
if args.type_ana is not None:
495498
run_config["analysis"]["type"] = args.type_ana
496499

497-
db_analysis_default_name = f"database_ml_parameters_{case}.yml"
498-
print(args.database_analysis)
499-
db_analysis = load_config(args.database_analysis, (pkg_data, db_analysis_default_name))
500+
db_analysis = load_config(args.database_analysis)
501+
db_analysis_overwrite = load_config(args.database_overwrite)
500502
db_ml_models = load_config(args.database_ml_models, (pkg_data, "config_model_parameters.yml"))
501503
db_run_list = load_config(args.database_run_list, (pkg_data, "database_run_list.yml"))
502504

503505
# Run the chain
504-
do_entire_analysis(run_config, db_analysis, db_ml_models, db_run_list)
506+
do_entire_analysis(run_config, db_analysis, db_analysis_overwrite, db_ml_models, db_run_list)

machine_learning_hep/submission/default_ana.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
case: XXXX
21
download:
32
alice:
43
activate: false

machine_learning_hep/submission/default_analyzer.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
case: XXXX # used to find the database file unless specified explicitly as do_entire_analysis -d database_analysis
21
download:
32
alice:
43
activate: false

machine_learning_hep/submission/default_apply.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
case: XXXX
21
download:
32
alice:
43
activate: false

machine_learning_hep/submission/default_pre.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
case: XXXX
21
download:
32
alice:
43
activate: false

machine_learning_hep/submission/default_systematics.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
case: XXXX # used to find the database file unless specified explicitly as do_entire_analysis -d database_analysis
21
download:
32
alice:
43
activate: false

machine_learning_hep/submission/default_train.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
case: XXXX
21
download:
32
alice:
43
activate: false

0 commit comments

Comments
 (0)