3434from holodeck .constants import YR
3535import holodeck .librarian
3636import holodeck .librarian .combine
37+ from holodeck import log
3738from holodeck .librarian import (
3839 lib_tools , ARGS_CONFIG_FNAME , PSPACE_DOMAIN_EXTREMA , DIRNAME_LIBRARY_SIMS , DIRNAME_DOMAIN_SIMS
3940)
4445# FILES_COPY_TO_OUTPUT = [__file__, holo.librarian.__file__, holo.param_spaces.__file__]
4546FILES_COPY_TO_OUTPUT = []
4647
47- comm = None
48+ # comm = None
4849
4950
5051def main (): # noqa : ignore complexity warning
@@ -75,16 +76,19 @@ def main(): # noqa : ignore complexity warning
7576 try :
7677 from mpi4py import MPI
7778 comm = MPI .COMM_WORLD
79+ log .info (f"Loaded MPI communicator: { comm .rank = } { comm .size = } { log .comm_rank = } " )
7880 except ModuleNotFoundError as err :
7981 comm = None
80- holo . log .error (f"failed to load `mpi4py` in { __file__ } : { err } " )
81- holo . log .error ("`mpi4py` may not be included in the standard `requirements.txt` file." )
82- holo . log .error ("Check if you have `mpi4py` installed, and if not, please install it." )
82+ log .error (f"failed to load `mpi4py` in { __file__ } : { err } " )
83+ log .error ("`mpi4py` may not be included in the standard `requirements.txt` file." )
84+ log .error ("Check if you have `mpi4py` installed, and if not, please install it." )
8385 raise err
8486
8587 # ---- setup arguments / settings, loggers, and outputs
8688
8789 if comm .rank == 0 :
90+ log .warning (f"Running { __file__ } : { comm .rank = } { comm .size = } | { sys .argv = } " )
91+ log .debug ("Setting up argparse..." )
8892 args = _setup_argparse ()
8993 else :
9094 args = None
@@ -93,12 +97,12 @@ def main(): # noqa : ignore complexity warning
9397 args = comm .bcast (args , root = 0 )
9498
9599 # setup log instance, separate for all processes
96- log = _setup_log ( comm , args )
97- args . log = log
100+ log . debug ( "Setting up log..." )
101+ _setup_log ( comm , args )
98102
99103 if comm .rank == 0 :
100104
101- # get parameter-space class
105+ # get parameter-space class (created new, or load previous save when `args.resume`)
102106 space = _setup_param_space (args )
103107
104108 # copy certain files to output directory
@@ -114,14 +118,15 @@ def main(): # noqa : ignore complexity warning
114118 if args .resume :
115119 args , config_fname = load_config_from_path (args .output , log )
116120 log .warning (f"Loaded configuration save from { config_fname } " )
121+ # `args.resume` may be set to `False` after loading from save; reset to True
117122 args .resume = True
118123 # Save parameter space and args/configuration to output directory
119124 else :
120125 space_fname = space .save (args .output )
121- log .info (f"saved parameter space { space } to { space_fname } " )
126+ log .info (f"Saved parameter space { space } to { space_fname } " )
122127
123128 config_fname = _save_config (args )
124- log .info (f"saved configuration to { config_fname } " )
129+ log .info (f"Saved configuration to { config_fname } " )
125130
126131 # ---- Split simulations for all processes
127132
@@ -156,12 +161,11 @@ def main(): # noqa : ignore complexity warning
156161 # If we've loaded a new `args`, then share to all processes from rank=0
157162 if args .resume :
158163 args = comm .bcast (args , root = 0 )
159- args .log = log
160164
161165 log .info (
162- f"param_space={ args .param_space } , parameters={ space .nparameters } , samples={ args .nsamples } \n "
163- f"sam_shape={ args .sam_shape } , nreals={ args .nreals } \n "
164- f"nfreqs={ args .nfreqs } , pta_dur={ args .pta_dur } [yr]\n "
166+ f"param_space={ args .param_space } , parameters={ space .nparameters } , samples={ args .nsamples } , "
167+ f"sam_shape={ args .sam_shape } , nreals={ args .nreals } , "
168+ f"nfreqs={ args .nfreqs } , pta_dur={ args .pta_dur } [yr]"
165169 )
166170
167171 # ---- distribute jobs to processors
@@ -174,11 +178,11 @@ def main(): # noqa : ignore complexity warning
174178 # ---- iterate over each processors' jobs
175179
176180 beg = datetime .now ()
177- log .info (f"beginning tasks at { beg } " )
181+ log .debug (f"beginning tasks at { beg } " )
178182 failures = 0
179183 num_done = 0
180184 for sim_num in iterator :
181- log .info (f"{ comm .rank = } { sim_num = } " )
185+ log .debug (f"{ comm .rank = } { sim_num = } " )
182186
183187 # Domain: Vary only one parameter at a time to explore the domain
184188 if args .domain :
@@ -203,7 +207,7 @@ def main(): # noqa : ignore complexity warning
203207 for kk , vv in params .items ():
204208 msg .append (f"{ kk } ={ vv :.4e} " )
205209 msg = ", " .join (msg )
206- log .info (msg )
210+ log .debug (msg )
207211
208212 rv , _sim_fname = run_sam_at_pspace_params (args , space , sim_num , params )
209213
@@ -226,9 +230,9 @@ def main(): # noqa : ignore complexity warning
226230 comm .barrier ()
227231
228232 if (comm .rank == 0 ):
229- log .info ( "Concatenating outputs into single file" )
233+ log .warning ( "Combining simulation files into single library file" )
230234 holo .librarian .combine .sam_lib_combine (args .output , log , library = (not args .domain ))
231- log .info ("Concatenation completed" )
235+ log .info ("Library combination completed. " )
232236
233237 return
234238
@@ -274,25 +278,45 @@ def run_sam_at_pspace_params(args, space, pnum, params):
274278 produced that contains a single key: 'fail'. This designates the file as a failure.
275279
276280 """
277- log = args .log
278281
279282 # ---- get output filename for this simulation, check if already exists
280283
281284 library_flag = not args .domain
282285 sim_fname = lib_tools ._get_sim_fname (args .output_sims , pnum , library = library_flag )
283286
284287 beg = datetime .now ()
285- log .info (f"{ pnum = } :: { sim_fname = } beginning at { beg } " )
288+ log .info (f"{ pnum = } :: { params = } beginning at { beg } " )
289+ log .info (f"file exists: { sim_fname .is_file ()} | '{ sim_fname } '" )
286290
287291 if sim_fname .exists ():
288- log .info (f"File { sim_fname } already exists. { args .recreate = } " )
289- temp = np .load (sim_fname )
290- data_keys = list (temp .keys ())
292+ log .info (f"Sim file already exists, { args .recreate = } | ' { sim_fname } ' " )
293+ data = np .load (sim_fname )
294+ data_keys = list (data .keys ())
291295
292296 if 'fail' in data_keys :
293- log .info ("Existing file was a failure, re-attempting..." )
297+ log .info (f"Existing file was a failure, re-attempting... ({ data_keys = } )" )
298+
294299 # skip existing files unless we specifically want to recreate them
295300 elif not args .recreate :
301+
302+ # Make sure parameters are consistent with expectations
303+ params_array = np .array ([params [pn ] for pn in space .param_names ])
304+ file_params = data ['params' ]
305+ file_param_names = data ['param_names' ]
306+ if not np .all ([fpn == pn for fpn , pn in zip (file_param_names , space .param_names )]):
307+ err = f"Mismatch between space param names and loaded parmeter names! { sim_fname = } "
308+ log .exception (err )
309+ log .exception (f"{ space .param_names = } " )
310+ log .exception (f"{ file_param_names = } " )
311+ raise RuntimeError (err )
312+
313+ if not np .allclose (file_params , params_array ):
314+ err = f"Mismatch between space param names and loaded parmeter names! { sim_fname = } "
315+ log .exception (err )
316+ log .exception (f"{ space .param_names = } " )
317+ log .exception (f"{ file_param_names = } " )
318+ raise RuntimeError (err )
319+
296320 return True , sim_fname
297321
298322 # ---- run Model
@@ -309,10 +333,11 @@ def run_sam_at_pspace_params(args, space, pnum, params):
309333 )
310334 data ['params' ] = np .array ([params [pn ] for pn in space .param_names ])
311335 data ['param_names' ] = space .param_names
312-
313336 rv = True
337+ log .debug ("Completed model successfully." )
338+
314339 except Exception as err :
315- log .exception (f"`run_model` FAILED on { pnum = } \n " )
340+ log .exception (f"`run_model` FAILED on { pnum = } with { params = } " )
316341 log .exception (err )
317342 rv = False
318343 # failed simulations get an output file with a single key: 'fail'
@@ -448,7 +473,6 @@ def _setup_argparse(*args, **kwargs):
448473 # ---- Create output directories as needed
449474
450475 output .mkdir (parents = True , exist_ok = True )
451- holo .utils .mpi_print (f"output path: { output } " )
452476 args .output = output
453477
454478 if args .domain :
@@ -479,7 +503,7 @@ def _setup_param_space(args):
479503 For 'resume' runs, load a saved parameter-space instance.
480504
481505 """
482- log = args .log
506+ # log = args.log
483507
484508 # ---- Determine and load the parameter-space class
485509
@@ -507,10 +531,11 @@ def _setup_param_space(args):
507531
508532 if args .resume :
509533 # Load pspace object from previous save
510- log .info (f"{ args .resume = } attempting to load pspace { space_class = } from { args .output = } " )
511- space , space_fname = holo .librarian .load_pspace_from_path (args .output , space_class = space_class , log = log )
534+ log .info (f"{ args .resume = } : attempting to load pspace { space_class = } from { args .output = } " )
535+ space , space_fname = holo .librarian .load_pspace_from_path (args .output , space_class = space_class )
512536 log .warning (f"Loaded param-space save from { space_fname } " )
513537 else :
538+ log .info (f"Constructing a new parameter space from { space_class = } ({ args .resume = } )" )
514539 # we don't use standard samples when constructing a parameter-space 'domain'
515540 nsamples = None if args .domain else args .nsamples
516541 space = space_class (log , nsamples , args .sam_shape , args .seed )
@@ -543,7 +568,7 @@ def _save_config(args):
543568 with open (fname , 'w' ) as out :
544569 json .dump (config , out )
545570
546- args . log .warning (f"Saved to { fname } - { holo .utils .get_file_size (fname )} " )
571+ log .warning (f"Saved to { fname } - { holo .utils .get_file_size (fname )} " )
547572
548573 return fname
549574
@@ -554,7 +579,7 @@ def load_config_from_path(path, log):
554579 with open (fname , 'r' ) as inp :
555580 config = json .load (inp )
556581
557- log .info ("Loaded configuration from {fname}" )
582+ log .info (f "Loaded configuration from { fname } " )
558583
559584 pop_keys = [
560585 'holodeck_version' , 'holodeck_librarian_version' , 'holodeck_git_hash' , 'created'
@@ -572,23 +597,14 @@ def load_config_from_path(path, log):
572597
573598
574599def _setup_log (comm , args ):
575- """Setup up the logging module logger for output messaging.
576-
577- Arguemnts
578- ---------
579- comm
580- args
581600
582- Returns
583- -------
584- log : ``logging.Logger`` instance
601+ # ---- setup logger level
585602
586- """
587- beg = datetime . now ( )
603+ log_lvl = args . verbose if comm . rank == 0 else holo . logger . ERROR
604+ holo . set_log_level ( log_lvl )
588605
589- # ---- setup name of log file
606+ # ---- set name of log file
590607
591- str_time = f"{ beg .strftime ('%Y%m%d-%H%M%S' )} "
592608 # get the path to the directory containing the `holodeck` module
593609 # e.g.: "/Users/lzkelley/Programs/nanograv/holodeck"
594610 holo_parent = Path (holo .__file__ ).parent .parent
@@ -597,24 +613,16 @@ def _setup_log(comm, args):
597613 log_name = Path (__file__ ).relative_to (holo_parent )
598614 # e.g.: "holodeck.librarian.gen_lib"
599615 log_name = "." .join (log_name .with_suffix ("" ).parts )
600- # e.g.: "holodeck.librarian.gen_lib__20230918-140722"
601- log_name = f"{ log_name } __{ str_time } "
602- # e.g.: "_holodeck.librarian.gen_lib__20230918-140722__r0003"
603- if comm .rank > 0 :
604- log_name = f"_{ log_name } __r{ comm .rank :04d} "
605616
606617 output = args .output_logs
607- fname = f" { output . joinpath ( log_name ) } .log"
618+ holo . log_to_file ( base_name = log_name , path = output )
608619
609- # ---- setup logger
610-
611- log_lvl = args .verbose if comm .rank == 0 else holo .logger .DEBUG
612- tostr = sys .stdout if comm .rank == 0 else False
613- log = holo .logger .get_logger (name = log_name , level_stream = log_lvl , tofile = fname , tostr = tostr )
620+ log .info (f" Processor: rank={ comm .rank = } / size={ comm .size } " )
614621 log .info (f"Output path: { output } " )
615- log .info (f" log: { fname } " )
622+ log .info (f" log: { log . filename } " )
616623 log .info (args )
617- return log
624+
625+ return
618626
619627
620628# ==============================================================================
@@ -626,7 +634,7 @@ def make_plots(args, data, sim_fname):
626634 """Generate diagnostic plots from the given simulation data and save to file.
627635 """
628636 import matplotlib .pyplot as plt
629- log = args .log
637+ # log = args.log
630638 log .info ("generating characteristic strain/psd plots" )
631639 log .info ("generating strain plots" )
632640 plot_fname = args .output_plots .joinpath (sim_fname .name )
@@ -766,6 +774,7 @@ def make_pars_plot(fobs, hc_ss, hc_bg, sspar, bgpar):
766774
767775
768776if __name__ == "__main__" :
777+ holo .set_log_level (holo .log .WARNING )
769778 main ()
770779
771780 #! the below doesn't work for catching errors... maybe because of comm.barrier() calls?
0 commit comments