diff --git a/cmdstanpy/cmdstan_args.py b/cmdstanpy/cmdstan_args.py index a37ebc17..671db7a0 100644 --- a/cmdstanpy/cmdstan_args.py +++ b/cmdstanpy/cmdstan_args.py @@ -10,7 +10,7 @@ import numpy as np from numpy.random import default_rng -from cmdstanpy.utils import cmdstan_path, cmdstan_version_before, get_logger +from cmdstanpy.utils import get_logger OptionalPath = str | os.PathLike | None @@ -748,15 +748,6 @@ def validate(self) -> None: 'Argument "sig_figs" must be an integer between 1 and 18,' ' found {}'.format(self.sig_figs) ) - # TODO: remove at some future release - if cmdstan_version_before(2, 25): - self.sig_figs = None - get_logger().warning( - 'Argument "sig_figs" invalid for CmdStan versions < 2.25, ' - 'using version %s in directory %s', - os.path.basename(cmdstan_path()), - os.path.dirname(cmdstan_path()), - ) if self.seed is None: rng = default_rng() diff --git a/cmdstanpy/compilation.py b/cmdstanpy/compilation.py index ae3a857d..54e099fe 100644 --- a/cmdstanpy/compilation.py +++ b/cmdstanpy/compilation.py @@ -13,13 +13,7 @@ from typing import Any, Iterable from cmdstanpy.utils import get_logger -from cmdstanpy.utils.cmdstan import ( - EXTENSION, - cmdstan_path, - cmdstan_version, - cmdstan_version_before, - stanc_path, -) +from cmdstanpy.utils.cmdstan import EXTENSION, cmdstan_path, stanc_path from cmdstanpy.utils.command import do_command from cmdstanpy.utils.filesystem import SanitizedOrTmpFilePath @@ -463,38 +457,15 @@ def format_stan_file( ) if canonicalize: - if cmdstan_version_before(2, 29): - if isinstance(canonicalize, bool): - cmd.append('--print-canonical') - else: - raise ValueError( - "Invalid arguments passed for current CmdStan" - + " version({})\n".format( - cmdstan_version() or "Unknown" - ) - + "--canonicalize requires 2.29 or higher" - ) + if isinstance(canonicalize, str): + cmd.append('--canonicalize=' + canonicalize) + elif isinstance(canonicalize, Iterable): + cmd.append('--canonicalize=' + ','.join(canonicalize)) else: - if isinstance(canonicalize, str): - cmd.append('--canonicalize=' + canonicalize) - elif isinstance(canonicalize, Iterable): - cmd.append('--canonicalize=' + ','.join(canonicalize)) - else: - cmd.append('--print-canonical') - - # before 2.29, having both --print-canonical - # and --auto-format printed twice - if not (cmdstan_version_before(2, 29) and canonicalize): - cmd.append('--auto-format') + cmd.append('--print-canonical') - if not cmdstan_version_before(2, 29): - cmd.append(f'--max-line-length={max_line_length}') - elif max_line_length != 78: - raise ValueError( - "Invalid arguments passed for current CmdStan version" - + " ({})\n".format(cmdstan_version() or "Unknown") - + "--max-line-length requires 2.29 or higher" - ) + cmd.append('--auto-format') + cmd.append(f'--max-line-length={max_line_length}') out = subprocess.run(cmd, capture_output=True, text=True, check=True) if out.stderr: diff --git a/cmdstanpy/install_cxx_toolchain.py b/cmdstanpy/install_cxx_toolchain.py index 4c056ead..4fbf86b9 100644 --- a/cmdstanpy/install_cxx_toolchain.py +++ b/cmdstanpy/install_cxx_toolchain.py @@ -236,7 +236,7 @@ def get_toolchain_name() -> str: return '' -# TODO(2.0): drop 3.5 support +# TODO(2.0): consider something other than RTools def get_url(version: str) -> str: """Return URL for toolchain.""" url = '' diff --git a/cmdstanpy/model.py b/cmdstanpy/model.py index 9ae7f9ef..f195eb87 100644 --- a/cmdstanpy/model.py +++ b/cmdstanpy/model.py @@ -2,18 +2,16 @@ import io import os -import platform import re import shutil import subprocess import sys import tempfile import threading -from collections import OrderedDict from concurrent.futures import ThreadPoolExecutor from io import StringIO from multiprocessing import cpu_count -from typing import Any, Callable, Mapping, Sequence, TypeVar +from typing import Any, Callable, Mapping, Sequence import numpy as np import pandas as pd @@ -37,15 +35,12 @@ CmdStanMLE, CmdStanPathfinder, CmdStanVB, + PrevFit, RunSet, from_csv, ) -from cmdstanpy.utils import ( - cmdstan_path, - cmdstan_version_before, - do_command, - get_logger, -) +from cmdstanpy.utils import do_command, get_logger +from cmdstanpy.utils.cmdstan import cmdstan_version_before, windows_tbb_path from cmdstanpy.utils.filesystem import ( temp_inits, temp_metrics, @@ -56,7 +51,6 @@ from . import progress as progbar OptionalPath = str | os.PathLike | None -Fit = TypeVar('Fit', CmdStanMCMC, CmdStanMLE, CmdStanVB) class CmdStanModel: @@ -118,6 +112,8 @@ def __init__( self._fixed_param = False + windows_tbb_path() + if exe_file is not None: self._exe_file = os.path.realpath(os.path.expanduser(exe_file)) if not os.path.exists(self._exe_file): @@ -164,33 +160,26 @@ def __init__( ) # try to detect models w/out parameters, needed for sampler - if (not cmdstan_version_before(2, 27)) and cmdstan_version_before( - 2, 36 - ): + if cmdstan_version_before(2, 36): model_info = self.src_info() if 'parameters' in model_info: self._fixed_param |= len(model_info['parameters']) == 0 - if platform.system() == 'Windows': - try: - do_command(['where.exe', 'tbb.dll'], fd_out=None) - except RuntimeError: - # Add tbb to the $PATH on Windows - libtbb = os.environ.get('STAN_TBB') - if libtbb is None: - libtbb = os.path.join( - cmdstan_path(), 'stan', 'lib', 'stan_math', 'lib', 'tbb' - ) - get_logger().debug("Adding TBB (%s) to PATH", libtbb) - os.environ['PATH'] = ';'.join( - list( - OrderedDict.fromkeys( - [libtbb] + os.environ.get('PATH', '').split(';') - ) - ) - ) - else: - get_logger().debug("TBB already found in load path") + # check CmdStan version compatibility + exe_info = None + try: + exe_info = self.exe_info() + # pylint: disable=broad-except + except Exception as e: + get_logger().warning( + 'Could not get exe info for model %s, error: %s', + self._name, + str(e), + ) + if cmdstan_version_before(2, 35, exe_info): + raise RuntimeError( + "This version of CmdStanPy requires CmdStan 2.35 or higher." + ) def __repr__(self) -> str: return ( @@ -238,7 +227,7 @@ def src_info(self) -> dict[str, Any]: If stanc is older than 2.27 or if the stan file cannot be found, returns an empty dictionary. """ - if self.stan_file is None or cmdstan_version_before(2, 27): + if self.stan_file is None: return {} return compilation.src_info(str(self.stan_file), self._stanc_options) @@ -404,12 +393,6 @@ def optimize( jacobian=jacobian, ) - if jacobian and cmdstan_version_before(2, 32, self.exe_info()): - raise ValueError( - "Jacobian adjustment for optimization is only supported " - "in CmdStan 2.32 and above." - ) - with ( temp_single_json(data) as _data, temp_inits(inits, allow_multiple=False) as _inits, @@ -734,34 +717,23 @@ def sample( if chains == 1: force_one_process_per_chain = True - if ( - force_one_process_per_chain is None - and not cmdstan_version_before(2, 28, info_dict) - and stan_threads == 'true' - ): + if force_one_process_per_chain is None and stan_threads == 'true': one_process_per_chain = False num_threads = parallel_chains * num_threads parallel_procs = 1 if force_one_process_per_chain is False: - if not cmdstan_version_before(2, 28, info_dict): - one_process_per_chain = False - num_threads = parallel_chains * num_threads - parallel_procs = 1 - if stan_threads == 'false': - get_logger().warning( - 'Stan program not compiled for threading, ' - 'process will run chains sequentially. ' - 'For multi-chain parallelization, recompile ' - 'the model with argument ' - '"cpp_options={\'STAN_THREADS\':\'TRUE\'}.' - ) - else: + one_process_per_chain = False + num_threads = parallel_chains * num_threads + parallel_procs = 1 + if stan_threads == 'false': get_logger().warning( - 'Installed version of CmdStan cannot multi-process ' - 'chains, will run %d processes. ' - 'Run "install_cmdstan" to upgrade to latest version.', - chains, + 'Stan program not compiled for threading, ' + 'process will run chains sequentially. ' + 'For multi-chain parallelization, recompile ' + 'the model with argument ' + '"cpp_options={\'STAN_THREADS\':\'TRUE\'}.' ) + os.environ['STAN_NUM_THREADS'] = str(num_threads) if chain_ids is None: @@ -958,7 +930,7 @@ def sample( def generate_quantities( self, data: Mapping[str, Any] | str | os.PathLike | None = None, - previous_fit: Fit | list[str] | None = None, + previous_fit: PrevFit | list[str] | None = None, seed: int | None = None, gq_output_dir: OptionalPath = None, sig_figs: int | None = None, @@ -966,7 +938,7 @@ def generate_quantities( refresh: int | None = None, time_fmt: str = "%Y%m%d%H%M%S", timeout: float | None = None, - ) -> CmdStanGQ[Fit]: + ) -> CmdStanGQ[PrevFit]: """ Run CmdStan's generate_quantities method which runs the generated quantities block of a model given an existing sample. @@ -1032,7 +1004,16 @@ def generate_quantities( :return: CmdStanGQ object """ - if isinstance(previous_fit, (CmdStanMCMC, CmdStanMLE, CmdStanVB)): + if isinstance( + previous_fit, + ( + CmdStanMCMC, + CmdStanMLE, + CmdStanVB, + CmdStanLaplace, + CmdStanPathfinder, + ), + ): fit_object = previous_fit fit_csv_files = previous_fit.runset.csv_files elif isinstance(previous_fit, list): @@ -1042,7 +1023,7 @@ def generate_quantities( ) try: fit_csv_files = previous_fit - fit_object: Fit = from_csv(fit_csv_files) # type: ignore + fit_object: PrevFit = from_csv(fit_csv_files) # type: ignore except ValueError as e: raise ValueError( 'Invalid sample from Stan CSV files, error:\n\t{}\n\t' @@ -1064,11 +1045,6 @@ def generate_quantities( 'to generate additional quantities of interest.' ) elif isinstance(fit_object, CmdStanMLE): - if cmdstan_version_before(2, 31): - raise RuntimeError( - "Method generate_quantities was not " - "available for non-HMC until CmdStan 2.31" - ) chains = 1 chain_ids = [1] if fit_object._save_iterations: @@ -1077,11 +1053,6 @@ def generate_quantities( 'to generate additional quantities of interest.' ) else: # isinstance(fit_object, CmdStanVB) - if cmdstan_version_before(2, 31): - raise RuntimeError( - "Method generate_quantities was not " - "available for non-HMC until CmdStan 2.31" - ) chains = 1 chain_ids = [1] @@ -1492,19 +1463,6 @@ def pathfinder( """ exe_info = self.exe_info() - if cmdstan_version_before(2, 33, exe_info): - raise ValueError( - "Method 'pathfinder' not available for CmdStan versions " - "before 2.33" - ) - - if (not psis_resample or not calculate_lp) and cmdstan_version_before( - 2, 34, exe_info - ): - raise ValueError( - "Arguments 'psis_resample' and 'calculate_lp' are only " - "available for CmdStan versions 2.34 and later" - ) if num_threads is not None: if ( @@ -1613,11 +1571,6 @@ def log_prob( unconstrained parameters of the model. """ - if cmdstan_version_before(2, 31, self.exe_info()): - raise ValueError( - "Method 'log_prob' not available for CmdStan versions " - "before 2.31" - ) with ( temp_single_json(data) as _data, temp_single_json(params) as _params, @@ -1729,11 +1682,7 @@ def laplace_sample( :return: A :class:`CmdStanLaplace` object. """ - if cmdstan_version_before(2, 32, self.exe_info()): - raise ValueError( - "Method 'laplace_sample' not available for CmdStan versions " - "before 2.32" - ) + if opt_args is not None and mode is not None: raise ValueError( "Cannot specify both 'opt_args' and 'mode' arguments" diff --git a/cmdstanpy/stanfit/__init__.py b/cmdstanpy/stanfit/__init__.py index 310e0a4e..9f44d842 100644 --- a/cmdstanpy/stanfit/__init__.py +++ b/cmdstanpy/stanfit/__init__.py @@ -13,7 +13,7 @@ ) from cmdstanpy.utils import check_sampler_csv, get_logger, stancsv -from .gq import CmdStanGQ +from .gq import CmdStanGQ, PrevFit from .laplace import CmdStanLaplace from .mcmc import CmdStanMCMC from .metadata import InferenceMetadata @@ -31,6 +31,7 @@ "CmdStanGQ", "CmdStanLaplace", "CmdStanPathfinder", + "PrevFit", ] diff --git a/cmdstanpy/stanfit/gq.py b/cmdstanpy/stanfit/gq.py index 6a1bfc63..962e1d3b 100644 --- a/cmdstanpy/stanfit/gq.py +++ b/cmdstanpy/stanfit/gq.py @@ -35,16 +35,25 @@ stancsv, ) +from .laplace import CmdStanLaplace from .mcmc import CmdStanMCMC from .metadata import InferenceMetadata from .mle import CmdStanMLE +from .pathfinder import CmdStanPathfinder from .runset import RunSet from .vb import CmdStanVB -Fit = TypeVar('Fit', CmdStanMCMC, CmdStanMLE, CmdStanVB) +PrevFit = TypeVar( + 'PrevFit', + CmdStanMCMC, + CmdStanMLE, + CmdStanVB, + CmdStanLaplace, + CmdStanPathfinder, +) -class CmdStanGQ(Generic[Fit]): +class CmdStanGQ(Generic[PrevFit]): """ Container for outputs from CmdStan generate_quantities run. Created by :meth:`CmdStanModel.generate_quantities`. @@ -53,7 +62,7 @@ class CmdStanGQ(Generic[Fit]): def __init__( self, runset: RunSet, - previous_fit: Fit, + previous_fit: PrevFit, ) -> None: """Initialize object.""" if not runset.method == Method.GENERATE_QUANTITIES: @@ -63,7 +72,7 @@ def __init__( ) self.runset = runset - self.previous_fit: Fit = previous_fit + self.previous_fit: PrevFit = previous_fit self._draws: np.ndarray = np.array(()) self._metadata = self._validate_csv_files() @@ -401,7 +410,12 @@ def draws_pd( @overload def draws_xr( - self: CmdStanGQ[CmdStanMLE] | CmdStanGQ[CmdStanVB], + self: ( + CmdStanGQ[CmdStanMLE] + | CmdStanGQ[CmdStanVB] + | CmdStanGQ[CmdStanLaplace] + | CmdStanGQ[CmdStanPathfinder] + ), vars: str | list[str] | None = None, inc_warmup: bool = False, inc_sample: bool = False, @@ -565,10 +579,7 @@ def stan_variable(self, var: str, **kwargs: bool) -> np.ndarray: + ", ".join(model_var_names | gq_var_names) ) if var not in gq_var_names: - # TODO(2.0) atleast1d may not be needed - return np.atleast_1d( # type: ignore - self.previous_fit.stan_variable(var, **kwargs) - ) + return self.previous_fit.stan_variable(var, **kwargs) # is gq variable self._assemble_generated_quantities() @@ -653,11 +664,14 @@ def _draws_start(self, inc_warmup: bool) -> tuple[int, int]: num_draws = opt_iters else: draw1 = opt_iters - 1 - else: # CmdStanVB: + elif isinstance(p_fit, CmdStanVB): draw1 = 1 # skip mean num_draws = p_fit.variational_sample.shape[0] if inc_warmup: num_draws += 1 + else: + num_draws = p_fit.draws().shape[0] + draw1 = 0 return draw1, num_draws @@ -676,12 +690,14 @@ def _previous_draws(self, inc_warmup: bool) -> np.ndarray: return np.atleast_2d( # type: ignore p_fit.optimized_params_np, )[:, None] - else: # CmdStanVB: + elif isinstance(p_fit, CmdStanVB): if inc_warmup: return np.vstack( [p_fit.variational_params_np, p_fit.variational_sample] )[:, None] return p_fit.variational_sample[:, None] + else: # CmdStanLaplace, CmdStanPathfinder + return p_fit.draws()[:, None, :] def _previous_draws_pd( self, vars: list[str], inc_warmup: bool @@ -700,8 +716,12 @@ def _previous_draws_pd( return p_fit.optimized_iterations_pd[sel] # type: ignore else: return p_fit.optimized_params_pd[sel] - else: # CmdStanVB: + elif isinstance(p_fit, CmdStanVB): return p_fit.variational_sample_pd[sel] + elif isinstance(p_fit, CmdStanLaplace): + return p_fit.draws_pd(vars or None) + else: # CmdStanPathfinder + return pd.DataFrame(p_fit.draws(), columns=p_fit.column_names)[sel] def save_csvfiles(self, dir: str | None = None) -> None: """ @@ -718,11 +738,3 @@ def save_csvfiles(self, dir: str | None = None) -> None: cmdstanpy.from_csv """ self.runset.save_csvfiles(dir) - - # TODO(2.0): remove - @property - def mcmc_sample(self) -> CmdStanMCMC | CmdStanMLE | CmdStanVB: - get_logger().warning( - "Property `mcmc_sample` is deprecated, use `previous_fit` instead" - ) - return self.previous_fit diff --git a/cmdstanpy/stanfit/laplace.py b/cmdstanpy/stanfit/laplace.py index 314cccf6..ad818bfa 100644 --- a/cmdstanpy/stanfit/laplace.py +++ b/cmdstanpy/stanfit/laplace.py @@ -24,7 +24,6 @@ # TODO list: # - docs and example notebook -# - make sure features like standalone GQ are updated/working class CmdStanLaplace: @@ -35,10 +34,10 @@ def __init__(self, runset: RunSet, mode: CmdStanMLE) -> None: 'Wrong runset method, expecting laplace runset, ' 'found method {}'.format(runset.method) ) - self._runset = runset + self.runset = runset self._mode = mode self._draws: np.ndarray = np.array(()) - self._metadata = InferenceMetadata.from_csv(self._runset.csv_files[0]) + self._metadata = InferenceMetadata.from_csv(self.runset.csv_files[0]) def create_inits( self, seed: int | None = None, chains: int = 4 @@ -77,10 +76,10 @@ def _assemble_draws(self) -> None: if self._draws.shape != (0,): return - csv_file = self._runset.csv_files[0] + csv_file = self.runset.csv_files[0] try: *_, draws = stancsv.parse_comments_header_and_draws( - self._runset.csv_files[0] + self.runset.csv_files[0] ) self._draws = stancsv.csv_bytes_list_to_numpy(draws) except Exception as exc: @@ -269,14 +268,14 @@ def __repr__(self) -> str: ['\t' + line for line in repr(self.mode).splitlines()] )[1:] rep = 'CmdStanLaplace: model={} \nmode=({})\n{}'.format( - self._runset.model, + self.runset.model, mode, - self._runset._args.method_args.compose(0, cmd=[]), + self.runset._args.method_args.compose(0, cmd=[]), ) rep = '{}\n csv_files:\n\t{}\n output_files:\n\t{}'.format( rep, - '\n\t'.join(self._runset.csv_files), - '\n\t'.join(self._runset.stdout_files), + '\n\t'.join(self.runset.csv_files), + '\n\t'.join(self.runset.stdout_files), ) return rep @@ -322,4 +321,4 @@ def save_csvfiles(self, dir: str | None = None) -> None: stanfit.RunSet.save_csvfiles cmdstanpy.from_csv """ - self._runset.save_csvfiles(dir) + self.runset.save_csvfiles(dir) diff --git a/cmdstanpy/stanfit/mcmc.py b/cmdstanpy/stanfit/mcmc.py index 7400db9f..e6c6c966 100644 --- a/cmdstanpy/stanfit/mcmc.py +++ b/cmdstanpy/stanfit/mcmc.py @@ -24,7 +24,6 @@ build_xarray_data, check_sampler_csv, cmdstan_path, - cmdstan_version_before, create_named_text_file, do_command, flatten_chains, @@ -223,16 +222,6 @@ def metric_type(self) -> str | None: else None ) - # TODO(2.0): remove - @property - def metric(self) -> np.ndarray | None: - """Deprecated. Use ``.inv_metric`` instead.""" - get_logger().warning( - 'The "metric" property is deprecated, use "inv_metric" instead. ' - 'This will be the same quantity, but with a more accurate name.' - ) - return self.inv_metric - @property def inv_metric(self) -> np.ndarray | None: """ @@ -535,9 +524,7 @@ def summary( dir=_TMPDIR, prefix=tmp_csv_file, suffix='.csv', name_only=True ) csv_str = '--csv_filename={}'.format(tmp_csv_path) - # TODO: remove at some future release - if cmdstan_version_before(2, 24): - csv_str = '--csv_file={}'.format(tmp_csv_path) + cmd = [ cmd_path, percentiles_str, diff --git a/cmdstanpy/stanfit/pathfinder.py b/cmdstanpy/stanfit/pathfinder.py index 8549c78c..1159ad27 100644 --- a/cmdstanpy/stanfit/pathfinder.py +++ b/cmdstanpy/stanfit/pathfinder.py @@ -23,9 +23,9 @@ def __init__(self, runset: RunSet): 'Wrong runset method, expecting Pathfinder runset, ' 'found method {}'.format(runset.method) ) - self._runset = runset + self.runset = runset self._draws: np.ndarray = np.array(()) - self._metadata = InferenceMetadata.from_csv(self._runset.csv_files[0]) + self._metadata = InferenceMetadata.from_csv(self.runset.csv_files[0]) def create_inits( self, seed: int | None = None, chains: int = 4 @@ -62,13 +62,13 @@ def create_inits( def __repr__(self) -> str: rep = 'CmdStanPathfinder: model={}{}'.format( - self._runset.model, - self._runset._args.method_args.compose(0, cmd=[]), + self.runset.model, + self.runset._args.method_args.compose(0, cmd=[]), ) rep = '{}\n csv_files:\n\t{}\n output_files:\n\t{}'.format( rep, - '\n\t'.join(self._runset.csv_files), - '\n\t'.join(self._runset.stdout_files), + '\n\t'.join(self.runset.csv_files), + '\n\t'.join(self.runset.stdout_files), ) return rep @@ -76,10 +76,10 @@ def _assemble_draws(self) -> None: if self._draws.shape != (0,): return - csv_file = self._runset.csv_files[0] + csv_file = self.runset.csv_files[0] try: *_, draws = stancsv.parse_comments_header_and_draws( - self._runset.csv_files[0] + self.runset.csv_files[0] ) self._draws = stancsv.csv_bytes_list_to_numpy(draws) except Exception as exc: @@ -228,4 +228,4 @@ def save_csvfiles(self, dir: str | None = None) -> None: stanfit.RunSet.save_csvfiles cmdstanpy.from_csv """ - self._runset.save_csvfiles(dir) + self.runset.save_csvfiles(dir) diff --git a/cmdstanpy/utils/cmdstan.py b/cmdstanpy/utils/cmdstan.py index 0c27cb6d..77c1894d 100644 --- a/cmdstanpy/utils/cmdstan.py +++ b/cmdstanpy/utils/cmdstan.py @@ -12,6 +12,7 @@ from tqdm.auto import tqdm from cmdstanpy import _DOT_CMDSTAN +from cmdstanpy.utils.command import do_command from .. import progress as progbar from .logging import get_logger @@ -37,8 +38,6 @@ def determine_linux_arch() -> str: arch = "armel" else: arch = "armhf" - elif machine == "mips64": - arch = "mips64el" elif machine == "ppc64el" or machine == "ppc64le": arch = "ppc64el" elif machine == "s390x": @@ -433,6 +432,29 @@ def cxx_toolchain_path( return compiler_path, tool_path +def windows_tbb_path() -> None: + if platform.system() == 'Windows': + try: + do_command(['where.exe', 'tbb.dll'], fd_out=None) + except RuntimeError: + # Add tbb to the $PATH on Windows + libtbb = os.environ.get('STAN_TBB') + if libtbb is None: + libtbb = os.path.join( + cmdstan_path(), 'stan', 'lib', 'stan_math', 'lib', 'tbb' + ) + get_logger().debug("Adding TBB (%s) to PATH", libtbb) + os.environ['PATH'] = ';'.join( + list( + OrderedDict.fromkeys( + [libtbb] + os.environ.get('PATH', '').split(';') + ) + ) + ) + else: + get_logger().debug("TBB already found in load path") + + def install_cmdstan( version: str | None = None, dir: str | None = None, diff --git a/cmdstanpy_tutorial.ipynb b/cmdstanpy_tutorial.ipynb index 38401605..cdf3d40c 100644 --- a/cmdstanpy_tutorial.ipynb +++ b/cmdstanpy_tutorial.ipynb @@ -79,12 +79,6 @@ "or whichever name you used for your environment (following `-n`\n", "above).\n", "\n", - "Note that CmdStan is only available on conda for versions\n", - "2.27.0 and newer. If you require an older version, you must use\n", - "one of the following methods to install it. If you require a\n", - "version of CmdStan *newer* than 2.27.0, but not the latest,\n", - "you can install it in the standard conda way by specifying\n", - "`cmdstan==VERSION` in the install command.\n", "\n", "#### Pip (non-Conda) users \n", "\n", diff --git a/docsrc/installation.rst b/docsrc/installation.rst index 17ab059c..0f528712 100644 --- a/docsrc/installation.rst +++ b/docsrc/installation.rst @@ -1,11 +1,13 @@ Installation ============ -CmdStanPy is a pure-Python3 package which wraps CmdStan, +CmdStanPy is a pure-Python package which wraps CmdStan, the command-line interface to Stan which is written in C++. -Therefore, in addition to Python3, -CmdStanPy requires a modern C++ toolchain in order to build and run Stan models. -There are several ways to install CmdStanPy and the underlying CmdStan components. + +Therefore, CmdStanPy requires a modern C++ toolchain in order to build and run +Stan models, as well as a recent version of CmdStan (released within the year +prior to the release of the CmdStanPy version being used). There are several +ways to install CmdStanPy and the underlying CmdStan components. * You can download CmdStanPy, CmdStan, and the C++ toolchain from conda-forge. @@ -55,12 +57,8 @@ run the command By default, the latest release of CmdStan is installed. -If you require a specific release of CmdStan, CmdStan versions -2.26.1 and *newer* can be installed by specifying +If you require a specific release of CmdStan, versions can be installed by specifying ``cmdstan==VERSION`` in the install command. -Versions before 2.26.1 are not available from conda -but can be downloaded from the CmdStan -`releases `_ page. A Conda environment is a directory that contains a specific collection of Conda packages. To see the locations of your conda environments, use the command @@ -206,14 +204,14 @@ can be used to override these defaults: .. code-block:: bash - install_cmdstan -d my_local_cmdstan -v 2.33.0 + install_cmdstan -d my_local_cmdstan -v 2.36.0 ls -F my_local_cmdstan Alternate Linux Architectures ............................. CmdStan can be installed on Linux for the following non-x86 architectures: -``arm64``, ``armel``, ``armhf``, ``mips64el``, ``ppc64el`` and ``s390x``. +``arm64``, ``armel``, ``armhf``, ``ppc64el`` and ``s390x``. CmdStanPy will do its best to determine which of these is applicable for your machine when running ``install_cmdstan``. If the wrong choice is made, or if you diff --git a/test/conftest.py b/test/conftest.py index a58870b3..bc676ecf 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,4 +1,5 @@ """The global configuration for the test suite""" + import os import subprocess from typing import Generator diff --git a/test/test_cmdstan_args.py b/test/test_cmdstan_args.py index 9d340a07..b275fb9f 100644 --- a/test/test_cmdstan_args.py +++ b/test/test_cmdstan_args.py @@ -1,9 +1,7 @@ """CmdStan argument tests""" -import logging import os import platform -from test import check_present from time import time import numpy as np @@ -20,7 +18,6 @@ SamplerArgs, VariationalArgs, ) -from cmdstanpy.utils import cmdstan_version_before HERE = os.path.dirname(os.path.abspath(__file__)) DATAFILES_PATH = os.path.join(HERE, 'data') @@ -586,51 +583,35 @@ def test_args_bad() -> None: ) -def test_args_sig_figs(caplog: pytest.LogCaptureFixture) -> None: +def test_args_sig_figs() -> None: sampler_args = SamplerArgs() cmdstan_path() # sets os.environ['CMDSTAN'] - if cmdstan_version_before(2, 25): - with caplog.at_level(logging.WARNING): - logging.getLogger() - CmdStanArgs( - model_name='bernoulli', - model_exe='bernoulli.exe', - chain_ids=[1, 2, 3, 4], - sig_figs=12, - method_args=sampler_args, - ) - expect = ( - 'Argument "sig_figs" invalid for CmdStan versions < 2.25, using ' - f'version {os.path.basename(cmdstan_path())} in directory ' - f'{os.path.dirname(cmdstan_path())}' + + cmdstan_args = CmdStanArgs( + model_name='bernoulli', + model_exe='bernoulli.exe', + chain_ids=[1, 2, 3, 4], + sig_figs=12, + method_args=sampler_args, + ) + cmd = cmdstan_args.compose_command(idx=0, csv_file='bern-output-1.csv') + assert 'sig_figs=' in ' '.join(cmd) + with pytest.raises(ValueError): + CmdStanArgs( + model_name='bernoulli', + model_exe='bernoulli.exe', + chain_ids=[1, 2, 3, 4], + sig_figs=-1, + method_args=sampler_args, ) - check_present(caplog, ('cmdstanpy', 'WARNING', expect)) - else: - cmdstan_args = CmdStanArgs( + with pytest.raises(ValueError): + CmdStanArgs( model_name='bernoulli', model_exe='bernoulli.exe', chain_ids=[1, 2, 3, 4], - sig_figs=12, + sig_figs=20, method_args=sampler_args, ) - cmd = cmdstan_args.compose_command(idx=0, csv_file='bern-output-1.csv') - assert 'sig_figs=' in ' '.join(cmd) - with pytest.raises(ValueError): - CmdStanArgs( - model_name='bernoulli', - model_exe='bernoulli.exe', - chain_ids=[1, 2, 3, 4], - sig_figs=-1, - method_args=sampler_args, - ) - with pytest.raises(ValueError): - CmdStanArgs( - model_name='bernoulli', - model_exe='bernoulli.exe', - chain_ids=[1, 2, 3, 4], - sig_figs=20, - method_args=sampler_args, - ) def test_args_fitted_params() -> None: diff --git a/test/test_compilation.py b/test/test_compilation.py index 4f010296..731a5c8b 100644 --- a/test/test_compilation.py +++ b/test/test_compilation.py @@ -4,14 +4,12 @@ import io import logging import os -from test import check_present, raises_nested +from test import check_present from typing import Any -from unittest.mock import MagicMock, patch import pytest from cmdstanpy.compilation import CompilerOptions, format_stan_file -from cmdstanpy.utils import cmdstan_version_before HERE = os.path.dirname(os.path.abspath(__file__)) DATAFILES_PATH = os.path.join(HERE, 'data') @@ -227,19 +225,3 @@ def test_model_format_options() -> None: formatted = sys_stdout.getvalue() assert formatted.count('{') == 3 assert formatted.count('(') == 1 - - -@patch( - 'cmdstanpy.utils.cmdstan.cmdstan_version', - MagicMock(return_value=(2, 27)), -) -def test_format_old_version() -> None: - assert cmdstan_version_before(2, 28) - - stan = os.path.join(DATAFILES_PATH, 'format_me.stan') - with raises_nested(RuntimeError, r"--canonicalize"): - format_stan_file(stan, canonicalize='braces') - with raises_nested(RuntimeError, r"--max-line"): - format_stan_file(stan, max_line_length=88) - - format_stan_file(stan, canonicalize=True) diff --git a/test/test_generate_quantities.py b/test/test_generate_quantities.py index c65907b2..82c59b24 100644 --- a/test/test_generate_quantities.py +++ b/test/test_generate_quantities.py @@ -8,7 +8,6 @@ import pickle import shutil from test import check_present, without_import -from unittest.mock import MagicMock, patch import numpy as np import pandas as pd @@ -578,7 +577,7 @@ def test_from_optimization() -> None: # stan_variable theta = bern_gqs.stan_variable(var='theta') - assert theta.shape == (1,) + assert theta.shape == () y_rep = bern_gqs.stan_variable(var='y_rep') assert y_rep.shape == (1, 10) @@ -627,7 +626,7 @@ def test_opt_save_iterations(caplog: pytest.LogCaptureFixture) -> None: # stan_variable theta = bern_gqs.stan_variable(var='theta') - assert theta.shape == (1,) + assert theta.shape == () y_rep = bern_gqs.stan_variable(var='y_rep') assert y_rep.shape == (1, 10) theta = bern_gqs.stan_variable(var='theta', inc_iterations=True) @@ -774,18 +773,12 @@ def test_vb_xarray() -> None: _ = bern_gqs.draws_xr() -@patch( - 'cmdstanpy.utils.cmdstan.cmdstan_version', - MagicMock(return_value=(2, 27)), -) -def test_from_non_hmc_old() -> None: +def test_from_pathfinder() -> None: stan = os.path.join(DATAFILES_PATH, 'bernoulli.stan') bern_model = CmdStanModel(stan_file=stan) jdata = os.path.join(DATAFILES_PATH, 'bernoulli.data.json') - bern_fit_v = bern_model.variational( + bern_fit = bern_model.pathfinder( data=jdata, - show_console=True, - require_converged=False, seed=12345, ) @@ -793,13 +786,65 @@ def test_from_non_hmc_old() -> None: stan = os.path.join(DATAFILES_PATH, 'bernoulli_ppc.stan') model = CmdStanModel(stan_file=stan) - with pytest.raises(RuntimeError, match="2.31"): - model.generate_quantities(data=jdata, previous_fit=bern_fit_v) + bern_gqs = model.generate_quantities(data=jdata, previous_fit=bern_fit) + + assert bern_gqs.runset._args.method == Method.GENERATE_QUANTITIES + assert 'CmdStanGQ: model=bernoulli_ppc' in repr(bern_gqs) + assert 'method=generate_quantities' in repr(bern_gqs) + assert bern_gqs.runset.chains == 1 + assert bern_gqs.runset._retcode(0) == 0 + csv_file = bern_gqs.runset.csv_files[0] + assert os.path.exists(csv_file) + + assert bern_gqs.draws().shape == (1000, 1, 10) + assert bern_gqs.draws(inc_sample=True).shape == (1000, 1, 14) + + # draws_pd() + assert bern_gqs.draws_pd().shape == (1000, 13) + + # stan_variable + theta = bern_gqs.stan_variable(var='theta') + assert theta.shape == (1000,) + y_rep = bern_gqs.stan_variable(var='y_rep') + assert y_rep.shape == (1000, 10) + - bern_fit_opt = bern_model.optimize( +def test_from_laplace() -> None: + stan = os.path.join(DATAFILES_PATH, 'bernoulli.stan') + bern_model = CmdStanModel(stan_file=stan) + jdata = os.path.join(DATAFILES_PATH, 'bernoulli.data.json') + bern_fit = bern_model.laplace_sample( data=jdata, seed=12345, ) - with pytest.raises(RuntimeError, match="2.31"): - model.generate_quantities(data=jdata, previous_fit=bern_fit_opt) + # gq_model + stan = os.path.join(DATAFILES_PATH, 'bernoulli_ppc.stan') + model = CmdStanModel(stan_file=stan) + + bern_gqs = model.generate_quantities(data=jdata, previous_fit=bern_fit) + + assert bern_gqs.runset._args.method == Method.GENERATE_QUANTITIES + assert 'CmdStanGQ: model=bernoulli_ppc' in repr(bern_gqs) + assert 'method=generate_quantities' in repr(bern_gqs) + assert bern_gqs.runset.chains == 1 + assert bern_gqs.runset._retcode(0) == 0 + csv_file = bern_gqs.runset.csv_files[0] + assert os.path.exists(csv_file) + + assert bern_gqs.draws().shape == (1000, 1, 10) + assert bern_gqs.draws(inc_sample=True).shape == (1000, 1, 13) + + # draws_pd() + assert bern_gqs.draws_pd().shape == (1000, 13) + assert ( + bern_gqs.draws_pd(inc_sample=True).shape[1] + == bern_gqs.previous_fit.draws_pd().shape[1] + + bern_gqs.draws_pd().shape[1] + ) + + # stan_variable + theta = bern_gqs.stan_variable(var='theta') + assert theta.shape == (1000,) + y_rep = bern_gqs.stan_variable(var='y_rep') + assert y_rep.shape == (1000, 10) diff --git a/test/test_install_cmdstan.py b/test/test_install_cmdstan.py index 3c798fea..bcddf418 100644 --- a/test/test_install_cmdstan.py +++ b/test/test_install_cmdstan.py @@ -1,4 +1,5 @@ """install_cmdstan test""" + from unittest import mock import pytest diff --git a/test/test_laplace.py b/test/test_laplace.py index 28098a78..82ce9364 100644 --- a/test/test_laplace.py +++ b/test/test_laplace.py @@ -33,7 +33,7 @@ def test_laplace_from_csv() -> None: data={}, seed=1234, ) - fit2 = from_csv(fit._runset.csv_files) + fit2 = from_csv(fit.runset.csv_files) assert isinstance(fit2, cmdstanpy.CmdStanLaplace) assert 'x' in fit2.stan_variables() assert 'y' in fit2.stan_variables() diff --git a/test/test_pathfinder.py b/test/test_pathfinder.py index ebf5e510..2eb81123 100644 --- a/test/test_pathfinder.py +++ b/test/test_pathfinder.py @@ -1,5 +1,5 @@ """ - Tests for the Pathfinder method. +Tests for the Pathfinder method. """ import contextlib diff --git a/test/test_sample.py b/test/test_sample.py index 6571dac4..a3d0f0f2 100644 --- a/test/test_sample.py +++ b/test/test_sample.py @@ -317,19 +317,18 @@ def test_init_types() -> None: force_one_process_per_chain=True, show_progress=False, ) - if not cmdstan_version_before(2, 33): - # https://github.com/stan-dev/cmdstan/pull/1191 - with pytest.raises(RuntimeError): - bern_fit = bern_model.sample( - data=jdata, - chains=2, - seed=12345, - inits=[init_1, init_2], - iter_warmup=100, - iter_sampling=100, - force_one_process_per_chain=False, - show_progress=False, - ) + # https://github.com/stan-dev/cmdstan/pull/1191 + with pytest.raises(RuntimeError): + bern_fit = bern_model.sample( + data=jdata, + chains=2, + seed=12345, + inits=[init_1, init_2], + iter_warmup=100, + iter_sampling=100, + force_one_process_per_chain=False, + show_progress=False, + ) def test_bernoulli_bad() -> None: @@ -471,34 +470,33 @@ def test_num_threads_msgs(caplog: pytest.LogCaptureFixture) -> None: def test_multi_proc_threads(caplog: pytest.LogCaptureFixture) -> None: - # 2.28 compile with cpp_options={'STAN_THREADS':'true'} - if not cmdstan_version_before(2, 28): - logistic_stan = os.path.join(DATAFILES_PATH, 'logistic.stan') - logistic_model = CmdStanModel( - stan_file=logistic_stan, - cpp_options={'STAN_THREADS': 'TRUE'}, - force_compile=True, - ) - info_dict = logistic_model.exe_info() - assert info_dict is not None - assert 'STAN_THREADS' in info_dict - assert info_dict['STAN_THREADS'] == 'true' - logistic_data = os.path.join(DATAFILES_PATH, 'logistic.data.R') - with caplog.at_level(logging.DEBUG): - logging.getLogger() - logistic_model.sample( - data=logistic_data, - chains=4, - parallel_chains=4, - threads_per_chain=5, - iter_sampling=200, - iter_warmup=200, - show_progress=False, - ) - check_present( - caplog, ('cmdstanpy', 'DEBUG', 'running CmdStan, num_threads: 20') + logistic_stan = os.path.join(DATAFILES_PATH, 'logistic.stan') + logistic_model = CmdStanModel( + stan_file=logistic_stan, + cpp_options={'STAN_THREADS': 'TRUE'}, + force_compile=True, + ) + info_dict = logistic_model.exe_info() + assert info_dict is not None + assert 'STAN_THREADS' in info_dict + assert info_dict['STAN_THREADS'] == 'true' + + logistic_data = os.path.join(DATAFILES_PATH, 'logistic.data.R') + with caplog.at_level(logging.DEBUG): + logging.getLogger() + logistic_model.sample( + data=logistic_data, + chains=4, + parallel_chains=4, + threads_per_chain=5, + iter_sampling=200, + iter_warmup=200, + show_progress=False, ) + check_present( + caplog, ('cmdstanpy', 'DEBUG', 'running CmdStan, num_threads: 20') + ) def test_multi_proc_err_msgs() -> None: @@ -666,11 +664,10 @@ def test_sample_no_params() -> None: def test_index_bounds_error() -> None: - if not cmdstan_version_before(2, 27): - oob_stan = os.path.join(DATAFILES_PATH, 'out_of_bounds.stan') - oob_model = CmdStanModel(stan_file=oob_stan) - with pytest.raises(RuntimeError): - oob_model.sample() + oob_stan = os.path.join(DATAFILES_PATH, 'out_of_bounds.stan') + oob_model = CmdStanModel(stan_file=oob_stan) + with pytest.raises(RuntimeError): + oob_model.sample() def test_show_console(stanfile: str = 'bernoulli.stan') -> None: @@ -1652,46 +1649,45 @@ def test_validate() -> None: def test_validate_sample_sig_figs(stanfile: str = 'bernoulli.stan') -> None: - if not cmdstan_version_before(2, 25): - stan = os.path.join(DATAFILES_PATH, stanfile) - bern_model = CmdStanModel(stan_file=stan) + stan = os.path.join(DATAFILES_PATH, stanfile) + bern_model = CmdStanModel(stan_file=stan) - jdata = os.path.join(DATAFILES_PATH, 'bernoulli.data.json') - bern_fit = bern_model.sample( - data=jdata, - chains=1, - seed=12345, - iter_sampling=100, - ) - bern_draws = bern_fit.draws() - theta = format(bern_draws[99, 0, 7], '.18g') - assert not theta.startswith('0.21238045821757600') + jdata = os.path.join(DATAFILES_PATH, 'bernoulli.data.json') + bern_fit = bern_model.sample( + data=jdata, + chains=1, + seed=12345, + iter_sampling=100, + ) + bern_draws = bern_fit.draws() + theta = format(bern_draws[99, 0, 7], '.18g') + assert not theta.startswith('0.21238045821757600') - bern_fit_17 = bern_model.sample( + bern_fit_17 = bern_model.sample( + data=jdata, + chains=1, + seed=12345, + iter_sampling=100, + sig_figs=17, + ) + assert bern_fit_17.draws().size + + with pytest.raises(ValueError): + bern_model.sample( data=jdata, chains=1, seed=12345, iter_sampling=100, - sig_figs=17, + sig_figs=27, ) - assert bern_fit_17.draws().size - with pytest.raises(ValueError): bern_model.sample( data=jdata, chains=1, seed=12345, iter_sampling=100, - sig_figs=27, + sig_figs=-1, ) - with pytest.raises(ValueError): - bern_model.sample( - data=jdata, - chains=1, - seed=12345, - iter_sampling=100, - sig_figs=-1, - ) def test_validate_summary_sig_figs() -> None: @@ -1711,14 +1707,13 @@ def test_validate_summary_sig_figs() -> None: beta1_default = format(sum_default.iloc[1, 0], '.18g') assert beta1_default.startswith('1.3') - if not cmdstan_version_before(2, 25): - sum_17 = fit.summary(sig_figs=17) - beta1_17 = format(sum_17.iloc[1, 0], '.18g') - assert beta1_17.startswith('1.345767078273') + sum_17 = fit.summary(sig_figs=17) + beta1_17 = format(sum_17.iloc[1, 0], '.18g') + assert beta1_17.startswith('1.345767078273') - sum_10 = fit.summary(sig_figs=10) - beta1_10 = format(sum_10.iloc[1, 0], '.18g') - assert beta1_10.startswith('1.34576707') + sum_10 = fit.summary(sig_figs=10) + beta1_10 = format(sum_10.iloc[1, 0], '.18g') + assert beta1_10.startswith('1.34576707') with pytest.raises(ValueError): fit.summary(sig_figs=20)