nmdickson
diff --git a/‎bin/GCfitter‎
Lines changed: 167 additions & 50 deletions b/‎bin/GCfitter‎
Lines changed: 167 additions & 50 deletions
diff --git a/‎fitter/core/data.py‎
Lines changed: 38 additions & 10 deletions b/‎fitter/core/data.py‎
Lines changed: 38 additions & 10 deletions
@@ -19,70 +19,150 @@ move_choices = {
     'gaussianmove': emcee.moves.GaussianMove, 'kdemove': emcee.moves.KDEMove,
 }
 
+bound_choices = {'none', 'single', 'multi', 'balls', 'cubes'}
+sample_choices = {'auto', 'unif', 'rwalk', 'rstagger',
+                  'slice', 'rslice', 'hslice'}
+
+
+def pos_int(arg):
+    '''ensure arg is a positive integer, for use as `type` in ArgumentParser'''
+
+    if not arg.isdigit():
+        mssg = f"invalid positive int value: '{arg}'"
+        raise argparse.ArgumentTypeError(mssg)
+
+    return int(arg)
+
 
 if __name__ == '__main__':
 
+    # ----------------------------------------------------------------------
+    # Command line argument parsing
+    # ----------------------------------------------------------------------
+
     parser = argparse.ArgumentParser(description='fit some GCs')
 
     parser.add_argument('cluster', help='Common name of the cluster to model')
 
-    parser.add_argument('--savedir', default=default_dir,
-                        help='location of saved sampling runs')
-    parser.add_argument('-i', '--initials',
-                        help='alternative JSON file with different intials')
-    parser.add_argument('-p', '--priors', dest='param_priors',
-                        help='alternative JSON file with different priors')
-
-    parser.add_argument('-N', '--Niters', default=2000, type=int,
-                        help='Number of sampling iterations')
-    parser.add_argument('--Nwalkers', default=150, type=int,
-                        help='Number of walkers for MCMC sampler')
-
-    parser.add_argument('--continue', dest='cont_run', action='store_true',
-                        help='Continue from previous saved run')
-    parser.add_argument('--backup', action='store_true',
-                        help='Create continuous backups during run')
+    # ----------------------------------------------------------------------
+    # Common arguments to all samplers
+    # ----------------------------------------------------------------------
 
-    parser.add_argument('--verbose', action='store_true')
-    parser.add_argument('--debug', action='store_true')
+    shared_parser = argparse.ArgumentParser(add_help=False)
 
-    parallel_group = parser.add_mutually_exclusive_group()
-    parallel_group.add_argument("--Ncpu", default=2, type=int,
+    parallel_group = shared_parser.add_mutually_exclusive_group()
+    parallel_group.add_argument("--Ncpu", default=2, type=pos_int,
                                 help="Number of `multiprocessing` processes")
     parallel_group.add_argument("--mpi", action="store_true",
                                 help="Run with MPI rather than multiprocessing")
 
-    parser.add_argument('--fix', dest='fixed_params', nargs='*',
-                        help='Parameters to fix, not estimate from the MCMC')
+    shared_parser.add_argument('--savedir', default=default_dir,
+                               help='location of saved sampling runs')
+    shared_parser.add_argument('-i', '--initials',
+                               help='alternative JSON file '
+                                    'with different intials')
+    shared_parser.add_argument('-p', '--priors', dest='param_priors',
+                               help='alternative JSON file '
+                                    'with different priors')
 
-    parser.add_argument('--exclude', dest='excluded_likelihoods', nargs='*',
-                        help='Likelihood components to exclude from posteriors')
+    shared_parser.add_argument('--fix', dest='fixed_params', nargs='*',
+                               help='Parameters to fix, '
+                                    'not estimate from the sampler')
 
-    parser.add_argument('--no-hyperparams', dest='hyperparams',
-                        action='store_false',
-                        help="Don't use Bayesian hyperparams")
+    shared_parser.add_argument('--exclude', nargs='*',
+                               dest='excluded_likelihoods',
+                               help='Likelihood components to '
+                                    'exclude from posteriors')
 
-    parser.add_argument('--strict', nargs='+',
-                        metavar=('[STRICT]', 'LIKELIHOOD'),
-                        help="A (numeric) strictness factor to be applied "
-                             "to each specified likelihood component")
+    shared_parser.add_argument('--no-hyperparams', dest='hyperparams',
+                               action='store_false',
+                               help="Don't use Bayesian hyperparams")
 
-    parser.add_argument('--moves', type=str.lower, nargs='*',
-                        default=['stretchmove'], choices=move_choices.keys(),
-                        help="Alternative MCMC move proposal algorithm to use. "
-                             "Multiple moves will be given equal random weight")
+    shared_parser.add_argument('--strict', nargs='+',
+                               metavar=('[STRICT]', 'LIKELIHOOD'),
+                               help="A (numeric) strictness factor to be "
+                                    "applied to each specified likelihood")
 
-    parser.add_argument('--show-progress', action='store_true', dest='progress',
-                        help="Display emcee's progress bar")
+    shared_parser.add_argument('--verbose', action='store_true')
+    shared_parser.add_argument('--debug', action='store_true')
+
+    # ----------------------------------------------------------------------
+    # Subparsers for each sampler
+    # ----------------------------------------------------------------------
+
+    subparsers = parser.add_subparsers(title="Sampler",
+                                       dest="sampler", required=True,
+                                       help="Which Sampler algorithm to use in "
+                                            "fitting the cluster")
+
+    # ----------------------------------------------------------------------
+    # MCMC sampling with emcee
+    # ----------------------------------------------------------------------
+
+    parser_MCMC = subparsers.add_parser('MCMC', parents=[shared_parser])
+
+    parser_MCMC.add_argument('-N', '--Niters', default=2000, type=pos_int,
+                             help='Number of sampling iterations')
+    parser_MCMC.add_argument('--Nwalkers', default=150, type=pos_int,
+                             help='Number of walkers for MCMC sampler')
+
+    parser_MCMC.add_argument('--moves', type=str.lower, nargs='*',
+                             default=['stretchmove'],
+                             choices=move_choices.keys(),
+                             help="Alternative MCMC move proposal algorithm to "
+                                  "use. Multiple moves will be given equal "
+                                  "random weight")
+
+    parser_MCMC.add_argument('--continue', dest='cont_run', action='store_true',
+                             help='Continue from previous saved run')
+    parser_MCMC.add_argument('--backup', action='store_true',
+                             help='Create continuous backups during run')
+
+    parser_MCMC.add_argument('--show-progress', action='store_true',
+                             dest='progress', help="Display progress bar")
+
+    parser_MCMC.set_defaults(fit_func=fitter.MCMC_fit)
+
+    # ----------------------------------------------------------------------
+    # Nested Sampling with dynesty
+    # ----------------------------------------------------------------------
+    # TODO make the "current_batch" storage optional
+
+    parser_nest = subparsers.add_parser('nested', parents=[shared_parser])
+
+    parser_nest.add_argument('--pfrac', default=1.0, type=float,
+                             help='Posterior weighting fraction f_p')
+    parser_nest.add_argument('--dlogz', default=0.25, type=float,
+                             help='Δln(Z) tolerance initial stopping condition')
+    parser_nest.add_argument('--maxiter', default=None, type=pos_int,
+                             help='Maximum number of iterations allowed. May '
+                                  'end sampling before the stopping conditions '
+                                  'are met')
+    parser_nest.add_argument('--init-maxiter', default=None, type=pos_int,
+                             help='Maximum number of iterations allowed in the '
+                                  'baseline run')
+    parser_nest.add_argument('--N-per-batch', default=100, type=pos_int,
+                             dest='Nlive_per_batch',
+                             help='Number of live points to add each batch')
+    parser_nest.add_argument('--bound-type', default='balls',
+                             choices=bound_choices,
+                             help='Method used to bound sampling on the prior')
+    parser_nest.add_argument('--sample-type', default='auto',
+                             choices=sample_choices,
+                             help='Method used to sample uniformly within the '
+                                  'likelihood, based on the provided bounds')
+
+    parser_nest.set_defaults(fit_func=fitter.nested_fit)
 
     args = parser.parse_args()
 
     # ----------------------------------------------------------------------
-    # Do any args preprocessing necessary for calling fitter
+    # Args preprocessing
     # ----------------------------------------------------------------------
 
-    if args.cont_run:
-        raise NotImplementedError
+    # ----------------------------------------------------------------------
+    # Common arguments
+    # ----------------------------------------------------------------------
 
     if args.initials:
 
@@ -104,13 +184,6 @@ if __name__ == '__main__':
         else:
             parser.error(f"Cannot access '{bnd_file}': No such file")
 
-    pathlib.Path(args.savedir).mkdir(exist_ok=True)
-
-    if debug := args.debug:
-        args.verbose = True
-
-    del args.debug
-
     # TODO could also be a way here for setting `err_on_fail` in the priors
     if args.strict is not None:
         try:
@@ -121,12 +194,51 @@ if __name__ == '__main__':
         if len(args.strict) == 1:
             args.strict.append('*')
 
-    args.moves = [move_choices[mv]() for mv in args.moves]
+    pathlib.Path(args.savedir).mkdir(exist_ok=True)
+
+    # ----------------------------------------------------------------------
+    # MCMC specific arguments
+    # ----------------------------------------------------------------------
+
+    if args.sampler == 'MCMC':
+
+        if args.cont_run:
+            raise NotImplementedError
+
+        args.moves = [move_choices[mv]() for mv in args.moves]
+
+    # ----------------------------------------------------------------------
+    # Nested Sampling specific arguments
+    # ----------------------------------------------------------------------
+
+    elif args.sampler == 'nested':
+
+        # TODO add more of these options
+        args.initial_kwargs = {
+            'maxiter': args.init_maxiter or float('inf'),
+            'nlive': args.Nlive_per_batch,
+            'dlogz': args.dlogz
+        }
+
+        args.batch_kwargs = {
+            'maxiter': args.maxiter or float('inf'),
+            'nlive_new': args.Nlive_per_batch
+        }
+
+        del args.dlogz
+        del args.maxiter
+        del args.init_maxiter
+        del args.Nlive_per_batch
 
     # ----------------------------------------------------------------------
     # Setup logging
     # ----------------------------------------------------------------------
 
+    if debug := args.debug:
+        args.verbose = True
+
+    del args.debug
+
     config = {
         'level': logging.DEBUG if debug else logging.INFO,
         'format': ('%(process)s|%(asctime)s|'
@@ -145,6 +257,11 @@ if __name__ == '__main__':
     # Call fitter
     # ----------------------------------------------------------------------
 
-    print('args:', vars(args))
+    del args.sampler
+
+    fit_func = args.fit_func
+    del args.fit_func
+
+    logging.debug(f"{args=}")
 
-    fitter.fit(**vars(args))
+    fit_func(**vars(args))
@@ -152,6 +152,20 @@ def __repr__(self):
     def __str__(self):
         return f'{self._name} Dataset'
 
+    _citation = None
+
+    def __citation__(self):
+        if self._citation is not None:
+            return self._citation
+        else:
+            try:
+                bibcodes = self.mdata['source'].split(';')
+                self._citation = util.bibcode2cite(bibcodes)
+                return self._citation
+
+            except KeyError:
+                return None
+
     def __contains__(self, key):
         return key in self._dict_variables
 
@@ -182,6 +196,9 @@ def __init__(self, group):
     def variables(self):
         return self._dict_variables
 
+    def cite(self):
+        return self.__citation__()
+
     def build_err(self, varname, model_r, model_val, strict=True):
         '''
         varname is the variable we want to get the error for
@@ -274,6 +291,7 @@ def __getitem__(self, key):
                 mssg = f"Dataset '{key}' does not exist in {self}"
                 raise KeyError(mssg) from err
 
+    # TODO a filter method for finding all datasets matching a pattern
     @property
     def datasets(self):
         return self._dict_datasets
@@ -311,6 +329,17 @@ def _walker(key, obj):
 
         return groups
 
+    def filter_datasets(self, pattern, valid_only=True):
+        # TODO maybe `datasets` and this should only return ds list not dict?
+        #   if thats the case, make `datasets._name` public
+
+        if valid_only:
+            datasets = {key for (key, *_) in self.valid_likelihoods}
+        else:
+            datasets = self.datasets.keys
+
+        return {key: self[key] for key in fnmatch.filter(datasets, pattern)}
+
     def filter_likelihoods(self, patterns, exclude=False, keys_only=False):
         '''filter the valid likelihoods based on list of patterns, matching
         either the dataset name or likelihood function name.
@@ -340,6 +369,7 @@ def get_sources(self, fmt='bibtex'):
 
         fmt : 'bibtex', 'bibcode', 'citep'
         '''
+        # TODO make this use dataset __citation__'s so it doesnt pull each time
 
         res = {}
 
@@ -522,17 +552,10 @@ def _determine_likelihoods(self):
 # --------------------------------------------------------------------------
 
 # TODO The units are *quite* incomplete in Model (10)
+# TODO would be cool to get this to work with limepy's `sampling`
 
 class Model(lp.limepy):
 
-    def __getattr__(self, key):
-        '''If `key` is not defined in the limepy model, try to get it from θ'''
-        try:
-            return self._theta[key]
-        except KeyError as err:
-            msg = f"'{self.__class__.__name__}' object has no attribute '{key}'"
-            raise AttributeError(msg) from err
-
     def _init_mf(self):
 
         m123 = [0.1, 0.5, 1.0, 100]  # Slope breakpoints for imf
@@ -649,6 +672,9 @@ def __init__(self, theta, observations=None, *, verbose=False):
 
         self._theta = theta
 
+        for key, val in self._theta.items():
+            setattr(self, key, val)
+
         # ------------------------------------------------------------------
         # Get mass function
         # ------------------------------------------------------------------
@@ -680,22 +706,24 @@ def __init__(self, theta, observations=None, *, verbose=False):
         # TODO still don't entriely understand when this is to be used
         # mj is middle of mass bins, mes are edges, widths are sizes of bins
         # self.mbin_widths = np.diff(self._mf.mes[-1]) ??
+        # Whats the differences with `mes` and `me`?
         # TODO is this supposed to habe units? I think so
         self.mes_widths = np.diff(self._mf.mes[-1])
 
         # append tracer mass bins (must be appended to end to not affect nms)
         if observations is not None:
 
-            # TODO should only append tracer masses for valid likelihood dsets
+            # TODO should only append tracer masses for valid likelihood dsets?
             tracer_mj = np.unique([
                 dataset.mdata['m'] for dataset in observations.datasets.values()
                 if 'm' in dataset.mdata
             ])
 
-            # TODO shouldn't append multiple of same tracer mass
             mj = np.concatenate((mj, tracer_mj))
             Mj = np.concatenate((Mj, 0.1 * np.ones_like(tracer_mj)))
 
+            self._tracer_bins = slice(self.nms + self.nmr, None)
+
         else:
             logging.warning("No `Observations` given, no tracer masses added")