stan-dev
diff --git a/‎cmdstanpy/cmdstan_args.py‎
Lines changed: 2 additions & 2 deletions b/‎cmdstanpy/cmdstan_args.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎cmdstanpy/install_cxx_toolchain.py‎
Lines changed: 1 addition & 1 deletion b/‎cmdstanpy/install_cxx_toolchain.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cmdstanpy/stanfit/mcmc.py‎
Lines changed: 80 additions & 29 deletions b/‎cmdstanpy/stanfit/mcmc.py‎
Lines changed: 80 additions & 29 deletions
diff --git a/‎cmdstanpy/utils.py‎
Lines changed: 30 additions & 9 deletions b/‎cmdstanpy/utils.py‎
Lines changed: 30 additions & 9 deletions
@@ -828,7 +828,7 @@ def validate(self) -> None:
                     '0 and 2**32-1, found {}.'.format(self.seed)
                 )
             if isinstance(self.seed, int):
-                if self.seed < 0 or self.seed > 2 ** 32 - 1:
+                if self.seed < 0 or self.seed > 2**32 - 1:
                     raise ValueError(
                         'Argument "seed" must be an integer between '
                         '0 and 2**32-1, found {}.'.format(self.seed)
@@ -847,7 +847,7 @@ def validate(self) -> None:
                         )
                     )
                 for seed in self.seed:
-                    if seed < 0 or seed > 2 ** 32 - 1:
+                    if seed < 0 or seed > 2**32 - 1:
                         raise ValueError(
                             'Argument "seed" must be an integer value'
                             ' between 0 and 2**32-1,'
 
@@ -27,7 +27,7 @@
 from cmdstanpy.utils import pushd, validate_dir, wrap_url_progress_hook
 
 EXTENSION = '.exe' if platform.system() == 'Windows' else ''
-IS_64BITS = sys.maxsize > 2 ** 32
+IS_64BITS = sys.maxsize > 2**32
 
 
 def usage() -> None:
 
@@ -43,7 +43,6 @@
     get_logger,
     scan_generated_quantities_csv,
 )
-
 from .metadata import InferenceMetadata
 from .runset import RunSet
 
@@ -78,31 +77,34 @@ def __init__(
         assert isinstance(
             sampler_args, SamplerArgs
         )  # make the typechecker happy
-        iter_sampling = sampler_args.iter_sampling
-        if iter_sampling is None:
-            self._iter_sampling = _CMDSTAN_SAMPLING
-        else:
-            self._iter_sampling = iter_sampling
-        iter_warmup = sampler_args.iter_warmup
-        if iter_warmup is None:
-            self._iter_warmup = _CMDSTAN_WARMUP
-        else:
-            self._iter_warmup = iter_warmup
-        thin = sampler_args.thin
-        if thin is None:
-            self._thin: int = _CMDSTAN_THIN
-        else:
-            self._thin = thin
+        self._iter_sampling: int = _CMDSTAN_SAMPLING
+        if sampler_args.iter_sampling is not None:
+            self._iter_sampling = sampler_args.iter_sampling
+        self._iter_warmup: int = _CMDSTAN_WARMUP
+        if sampler_args.iter_warmup is not None:
+            self._iter_warmup = sampler_args.iter_warmup
+        self._thin: int = _CMDSTAN_THIN
+        if sampler_args.thin is not None:
+            self._thin = sampler_args.thin
         self._is_fixed_param = sampler_args.fixed_param
         self._save_warmup = sampler_args.save_warmup
         self._sig_figs = runset._args.sig_figs
+
         # info from CSV values, instantiated lazily
+        self._draws: np.ndarray = np.array(())
+        # only valid when not is_fixed_param
         self._metric: np.ndarray = np.array(())
         self._step_size: np.ndarray = np.array(())
-        self._draws: np.ndarray = np.array(())
+        self._divergences: np.ndarray = np.zeros(self.runset.chains, dtype=int)
+        self._max_treedepths: np.ndarray = np.zeros(
+            self.runset.chains, dtype=int
+        )
+
         # info from CSV initial comments and header
         config = self._validate_csv_files()
         self._metadata: InferenceMetadata = InferenceMetadata(config)
+        if not self._is_fixed_param:
+            self._check_sampler_diagnostics()
 
     def __repr__(self) -> str:
         repr = 'CmdStanMCMC: model={} chains={}{}'.format(
@@ -171,13 +173,15 @@ def column_names(self) -> Tuple[str, ...]:
     @property
     def metric_type(self) -> Optional[str]:
         """
-        Metric type used for adaptation, either 'diag_e' or 'dense_e'.
+        Metric type used for adaptation, either 'diag_e' or 'dense_e', according
+        to CmdStan arg 'metric'.
         When sampler algorithm 'fixed_param' is specified, metric_type is None.
         """
-        if self._is_fixed_param:
-            return None
-        # cmdstan arg name
-        return self._metadata.cmdstan_config['metric']  # type: ignore
+        return (
+            self._metadata.cmdstan_config['metric']
+            if not self._is_fixed_param
+            else None
+        )
 
     @property
     def metric(self) -> Optional[np.ndarray]:
@@ -192,8 +196,7 @@ def metric(self) -> Optional[np.ndarray]:
                 'Unit diagnonal metric, inverse mass matrix size unknown.'
             )
             return None
-        if self._draws.shape == (0,):
-            self._assemble_draws()
+        self._assemble_draws()
         return self._metric
 
     @property
@@ -202,11 +205,8 @@ def step_size(self) -> Optional[np.ndarray]:
         Step size used by sampler for each chain.
         When sampler algorithm 'fixed_param' is specified, step size is None.
         """
-        if self._is_fixed_param:
-            return None
-        if self._step_size.shape == (0,):
-            self._assemble_draws()
-        return self._step_size
+        self._assemble_draws()
+        return self._step_size if not self._is_fixed_param else None
 
     @property
     def thin(self) -> int:
@@ -215,6 +215,23 @@ def thin(self) -> int:
         """
         return self._thin
 
+    @property
+    def divergences(self) -> Optional[np.ndarray]:
+        """
+        Per-chain total number of post-warmup divergent iterations.
+        When sampler algorithm 'fixed_param' is specified, returns None.
+        """
+        return self._divergences if not self._is_fixed_param else None
+
+    @property
+    def max_treedepths(self) -> Optional[np.ndarray]:
+        """
+        Per-chain total number of post-warmup iterations where the NUTS sampler
+        reached the maximum allowed treedepth.
+        When sampler algorithm 'fixed_param' is specified, returns None.
+        """
+        return self._max_treedepths if not self._is_fixed_param else None
+
     def draws(
         self, *, inc_warmup: bool = False, concat_chains: bool = False
     ) -> np.ndarray:
@@ -263,6 +280,7 @@ def _validate_csv_files(self) -> Dict[str, Any]:
         Checks that Stan CSV output files for all chains are consistent
         and returns dict containing config and column names.
 
+        Tabulates sampling iters which are divergent or at max treedepth
         Raises exception when inconsistencies detected.
         """
         dzero = {}
@@ -276,6 +294,9 @@ def _validate_csv_files(self) -> Dict[str, Any]:
                     save_warmup=self._save_warmup,
                     thin=self._thin,
                 )
+                if not self._is_fixed_param:
+                    self._divergences[i] = dzero['ct_divergences']
+                    self._max_treedepths[i] = dzero['ct_max_treedepth']
             else:
                 drest = check_sampler_csv(
                     path=self.runset.csv_files[i],
@@ -312,13 +333,43 @@ def _validate_csv_files(self) -> Dict[str, Any]:
                                 drest[key],
                             )
                         )
+                if not self._is_fixed_param:
+                    self._divergences[i] = drest['ct_divergences']
+                    self._max_treedepths[i] = drest['ct_max_treedepth']
         return dzero
 
+    def _check_sampler_diagnostics(self) -> None:
+        """
+        Warn if any iterations ended in divergences or hit maxtreedepth.
+        """
+        if np.any(self._divergences) or np.any(self._max_treedepths):
+            diagnostics = ['Some chains may have failed to converge.']
+            ct_iters = self.metadata.cmdstan_config['num_samples']
+            for i in range(self.runset._chains):
+                if self._divergences[i] > 0:
+                    diagnostics.append(
+                        f'Chain {i + 1} had {self._divergences[i]} '
+                        'divergent transitions '
+                        f'({((self._divergences[i]/ct_iters)*100):.1f}%)'
+                    )
+                if self._max_treedepths[i] > 0:
+                    diagnostics.append(
+                        f'Chain {i + 1} had {self._max_treedepths[i]} '
+                        'iterations at max treedepth '
+                        f'({((self._max_treedepths[i]/ct_iters)*100):.1f}%)'
+                    )
+            diagnostics.append(
+                'Use function "diagnose()" to see further information.'
+            )
+            get_logger().warning('\n\t'.join(diagnostics))
+
     def _assemble_draws(self) -> None:
         """
         Allocates and populates the step size, metric, and sample arrays
         by parsing the validated stan_csv files.
         """
+        if self._draws.shape != (0,):
+            return
         num_draws = self.num_draws_sampling
         sampling_iter_start = 0
         if self._save_warmup:
 
@@ -68,7 +68,12 @@ def get_logger() -> logging.Logger:
         # add a default handler to the logger to INFO and higher
         handler = logging.StreamHandler()
         handler.setLevel(logging.INFO)
-        handler.setFormatter(logging.Formatter(logging.BASIC_FORMAT))
+        handler.setFormatter(
+            logging.Formatter(
+                '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+                "%H:%M:%S",
+            )
+        )
         logger.addHandler(handler)
     return logger
 
@@ -172,7 +177,7 @@ def cmdstan_path() -> str:
     Validate, then return CmdStan directory path.
     """
     cmdstan = ''
-    if 'CMDSTAN' in os.environ:
+    if 'CMDSTAN' in os.environ and len(os.environ['CMDSTAN']) > 0:
         cmdstan = os.environ['CMDSTAN']
     else:
         cmdstan_dir = os.path.expanduser(os.path.join('~', _DOT_CMDSTAN))
@@ -291,7 +296,7 @@ def cxx_toolchain_path(
         if os.path.exists(os.path.join(toolchain_root, 'mingw64')):
             compiler_path = os.path.join(
                 toolchain_root,
-                'mingw64' if (sys.maxsize > 2 ** 32) else 'mingw32',
+                'mingw64' if (sys.maxsize > 2**32) else 'mingw32',
                 'bin',
             )
             if os.path.exists(compiler_path):
@@ -315,7 +320,7 @@ def cxx_toolchain_path(
         elif os.path.exists(os.path.join(toolchain_root, 'mingw_64')):
             compiler_path = os.path.join(
                 toolchain_root,
-                'mingw_64' if (sys.maxsize > 2 ** 32) else 'mingw_32',
+                'mingw_64' if (sys.maxsize > 2**32) else 'mingw_32',
                 'bin',
             )
             if os.path.exists(compiler_path):
@@ -367,7 +372,7 @@ def cxx_toolchain_path(
                 if version not in ('35', '3.5', '3'):
                     compiler_path = os.path.join(
                         toolchain_root,
-                        'mingw64' if (sys.maxsize > 2 ** 32) else 'mingw32',
+                        'mingw64' if (sys.maxsize > 2**32) else 'mingw32',
                         'bin',
                     )
                     if os.path.exists(compiler_path):
@@ -392,7 +397,7 @@ def cxx_toolchain_path(
                 else:
                     compiler_path = os.path.join(
                         toolchain_root,
-                        'mingw_64' if (sys.maxsize > 2 ** 32) else 'mingw_32',
+                        'mingw_64' if (sys.maxsize > 2**32) else 'mingw_32',
                         'bin',
                     )
                     if os.path.exists(compiler_path):
@@ -649,7 +654,7 @@ def scan_sampler_csv(path: str, is_fixed_param: bool = False) -> Dict[str, Any]:
             if not is_fixed_param:
                 lineno = scan_warmup_iters(fd, dict, lineno)
                 lineno = scan_hmc_params(fd, dict, lineno)
-            lineno = scan_sampling_iters(fd, dict, lineno)
+            lineno = scan_sampling_iters(fd, dict, lineno, is_fixed_param)
         except ValueError as e:
             raise ValueError("Error in reading csv file: " + path) from e
     return dict
@@ -952,13 +957,21 @@ def scan_hmc_params(
 
 
 def scan_sampling_iters(
-    fd: TextIO, config_dict: Dict[str, Any], lineno: int
+    fd: TextIO, config_dict: Dict[str, Any], lineno: int, is_fixed_param: bool
 ) -> int:
     """
     Parse sampling iteration, save number of iterations to config_dict.
+    Also save number of divergences, max_treedepth hits
     """
     draws_found = 0
     num_cols = len(config_dict['column_names'])
+    if not is_fixed_param:
+        idx_divergent = config_dict['column_names'].index('divergent__')
+        idx_treedepth = config_dict['column_names'].index('treedepth__')
+        max_treedepth = config_dict['max_depth']
+        ct_divergences = 0
+        ct_max_treedepth = 0
+
     cur_pos = fd.tell()
     line = fd.readline().strip()
     while len(line) > 0 and not line.startswith('#'):
@@ -976,8 +989,16 @@ def scan_sampling_iters(
             )
         cur_pos = fd.tell()
         line = fd.readline().strip()
-    config_dict['draws_sampling'] = draws_found
+        if not is_fixed_param:
+            ct_divergences += int(data[idx_divergent])  # type: ignore
+            if int(data[idx_treedepth]) == max_treedepth:  # type: ignore
+                ct_max_treedepth += 1
+
     fd.seek(cur_pos)
+    config_dict['draws_sampling'] = draws_found
+    if not is_fixed_param:
+        config_dict['ct_divergences'] = ct_divergences
+        config_dict['ct_max_treedepth'] = ct_max_treedepth
     return lineno
Original file line number	Diff line number	Diff line change
`@@ -828,7 +828,7 @@ def validate(self) -> None:`
`828`	`828`	`'0 and 2**32-1, found {}.'.format(self.seed)`
`829`	`829`	`)`
`830`	`830`	`if isinstance(self.seed, int):`
`831`		`- if self.seed < 0 or self.seed > 2 ** 32 - 1:`
	`831`	`+ if self.seed < 0 or self.seed > 2**32 - 1:`
`832`	`832`	`raise ValueError(`
`833`	`833`	`'Argument "seed" must be an integer between '`
`834`	`834`	`'0 and 2**32-1, found {}.'.format(self.seed)`
`@@ -847,7 +847,7 @@ def validate(self) -> None:`
`847`	`847`	`)`
`848`	`848`	`)`
`849`	`849`	`for seed in self.seed:`
`850`		`- if seed < 0 or seed > 2 ** 32 - 1:`
	`850`	`+ if seed < 0 or seed > 2**32 - 1:`
`851`	`851`	`raise ValueError(`
`852`	`852`	`'Argument "seed" must be an integer value'`
`853`	`853`	`' between 0 and 2**32-1,'`