diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 450c3ef7..d2e6037d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -44,7 +44,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: diff --git a/cmdstanpy/__init__.py b/cmdstanpy/__init__.py index a83f9d9e..f6d3ab55 100644 --- a/cmdstanpy/__init__.py +++ b/cmdstanpy/__init__.py @@ -37,13 +37,13 @@ def _cleanup_tmpdir() -> None: from .utils import ( cmdstan_path, cmdstan_version, + disable_logging, + enable_logging, install_cmdstan, set_cmdstan_path, set_make_env, show_versions, write_stan_json, - enable_logging, - disable_logging, ) __all__ = [ diff --git a/cmdstanpy/cmdstan_args.py b/cmdstanpy/cmdstan_args.py index 07040d6d..80441bae 100644 --- a/cmdstanpy/cmdstan_args.py +++ b/cmdstanpy/cmdstan_args.py @@ -1,10 +1,11 @@ """ CmdStan arguments """ + import os from enum import Enum, auto from time import time -from typing import Any, Dict, List, Mapping, Optional, Union +from typing import Any, Mapping, Optional, Union import numpy as np from numpy.random import default_rng @@ -65,9 +66,9 @@ def __init__( thin: Optional[int] = None, max_treedepth: Optional[int] = None, metric: Union[ - str, Dict[str, Any], List[str], List[Dict[str, Any]], None + str, dict[str, Any], list[str], list[dict[str, Any]], None ] = None, - step_size: Union[float, List[float], None] = None, + step_size: Union[float, list[float], None] = None, adapt_engaged: bool = True, adapt_delta: Optional[float] = None, adapt_init_phase: Optional[int] = None, @@ -84,7 +85,7 @@ def __init__( self.max_treedepth = max_treedepth self.metric = metric self.metric_type: Optional[str] = None - self.metric_file: Union[str, List[str], None] = None + self.metric_file: Union[str, list[str], None] = None self.step_size = step_size self.adapt_engaged = adapt_engaged self.adapt_delta = adapt_delta @@ -161,8 +162,9 @@ def validate(self, chains: Optional[int]) -> None: ): if self.step_size <= 0: raise ValueError( - 'Argument "step_size" must be > 0, ' - 'found {}.'.format(self.step_size) + 'Argument "step_size" must be > 0, found {}.'.format( + self.step_size + ) ) else: if len(self.step_size) != chains: @@ -217,9 +219,9 @@ def validate(self, chains: Optional[int]) -> None: ) ) if all(isinstance(elem, dict) for elem in self.metric): - metric_files: List[str] = [] + metric_files: list[str] = [] for i, metric in enumerate(self.metric): - metric_dict: Dict[str, Any] = metric # type: ignore + metric_dict: dict[str, Any] = metric # type: ignore if 'inv_metric' not in metric_dict: raise ValueError( 'Entry "inv_metric" not found in metric dict ' @@ -343,7 +345,7 @@ def validate(self, chains: Optional[int]) -> None: 'When fixed_param=True, cannot specify adaptation parameters.' ) - def compose(self, idx: int, cmd: List[str]) -> List[str]: + def compose(self, idx: int, cmd: list[str]) -> list[str]: """ Compose CmdStan command for method-specific non-default arguments. """ @@ -467,7 +469,7 @@ def validate(self, _chains: Optional[int] = None) -> None: positive_float(self.tol_param, 'tol_param') positive_int(self.history_size, 'history_size') - def compose(self, _idx: int, cmd: List[str]) -> List[str]: + def compose(self, _idx: int, cmd: list[str]) -> list[str]: """compose command string for CmdStan for non-default arg values.""" cmd.append('method=optimize') if self.algorithm: @@ -511,7 +513,7 @@ def validate(self, _chains: Optional[int] = None) -> None: raise ValueError(f'Invalid path for mode file: {self.mode}') positive_int(self.draws, 'draws') - def compose(self, _idx: int, cmd: List[str]) -> List[str]: + def compose(self, _idx: int, cmd: list[str]) -> list[str]: """compose command string for CmdStan for non-default arg values.""" cmd.append('method=laplace') cmd.append(f'mode={self.mode}') @@ -579,7 +581,7 @@ def validate(self, _chains: Optional[int] = None) -> None: positive_int(self.num_draws, 'num_draws') positive_int(self.num_elbo_draws, 'num_elbo_draws') - def compose(self, _idx: int, cmd: List[str]) -> List[str]: + def compose(self, _idx: int, cmd: list[str]) -> list[str]: """compose command string for CmdStan for non-default arg values.""" cmd.append('method=pathfinder') @@ -624,12 +626,13 @@ def compose(self, _idx: int, cmd: List[str]) -> List[str]: class GenerateQuantitiesArgs: """Arguments needed for generate_quantities method.""" - def __init__(self, csv_files: List[str]) -> None: + def __init__(self, csv_files: list[str]) -> None: """Initialize object.""" self.sample_csv_files = csv_files def validate( - self, chains: Optional[int] = None # pylint: disable=unused-argument + self, + chains: Optional[int] = None, # pylint: disable=unused-argument ) -> None: """ Check arguments correctness and consistency. @@ -642,7 +645,7 @@ def validate( 'Invalid path for sample csv file: {}'.format(csv) ) - def compose(self, idx: int, cmd: List[str]) -> List[str]: + def compose(self, idx: int, cmd: list[str]) -> list[str]: """ Compose CmdStan command for method-specific non-default arguments. """ @@ -681,7 +684,8 @@ def __init__( self.output_samples = output_samples def validate( - self, chains: Optional[int] = None # pylint: disable=unused-argument + self, + chains: Optional[int] = None, # pylint: disable=unused-argument ) -> None: """ Check arguments correctness and consistency. @@ -705,7 +709,7 @@ def validate( positive_int(self.output_samples, 'output_samples') # pylint: disable=unused-argument - def compose(self, idx: int, cmd: List[str]) -> List[str]: + def compose(self, idx: int, cmd: list[str]) -> list[str]: """ Compose CmdStan command for method-specific non-default arguments. """ @@ -747,7 +751,7 @@ def __init__( self, model_name: str, model_exe: OptionalPath, - chain_ids: Optional[List[int]], + chain_ids: Optional[list[int]], method_args: Union[ SamplerArgs, OptimizeArgs, @@ -757,8 +761,8 @@ def __init__( PathfinderArgs, ], data: Union[Mapping[str, Any], str, None] = None, - seed: Union[int, List[int], None] = None, - inits: Union[int, float, str, List[str], None] = None, + seed: Union[int, list[int], None] = None, + inits: Union[int, float, str, list[str], None] = None, output_dir: OptionalPath = None, sig_figs: Optional[int] = None, save_latent_dynamics: bool = False, @@ -959,11 +963,11 @@ def compose_command( *, diagnostic_file: Optional[str] = None, profile_file: Optional[str] = None, - ) -> List[str]: + ) -> list[str]: """ Compose CmdStan command for non-default arguments. """ - cmd: List[str] = [] + cmd: list[str] = [] if idx is not None and self.chain_ids is not None: if idx < 0 or idx > len(self.chain_ids) - 1: raise ValueError( diff --git a/cmdstanpy/compilation.py b/cmdstanpy/compilation.py index 8db33316..ee4a5665 100644 --- a/cmdstanpy/compilation.py +++ b/cmdstanpy/compilation.py @@ -11,7 +11,7 @@ from copy import copy from datetime import datetime from pathlib import Path -from typing import Any, Dict, Iterable, List, Optional, Union +from typing import Any, Iterable, Optional, Union from cmdstanpy.utils import get_logger from cmdstanpy.utils.cmdstan import ( @@ -81,8 +81,8 @@ class CompilerOptions: def __init__( self, *, - stanc_options: Optional[Dict[str, Any]] = None, - cpp_options: Optional[Dict[str, Any]] = None, + stanc_options: Optional[dict[str, Any]] = None, + cpp_options: Optional[dict[str, Any]] = None, user_header: OptionalPath = None, ) -> None: """Initialize object.""" @@ -116,12 +116,12 @@ def is_empty(self) -> bool: ) @property - def stanc_options(self) -> Dict[str, Union[bool, int, str, Iterable[str]]]: + def stanc_options(self) -> dict[str, Union[bool, int, str, Iterable[str]]]: """Stanc compiler options.""" return self._stanc_options @property - def cpp_options(self) -> Dict[str, Union[bool, int]]: + def cpp_options(self) -> dict[str, Union[bool, int]]: """C++ compiler options.""" return self._cpp_options @@ -165,8 +165,8 @@ def validate_stanc_opts(self) -> None: del self._stanc_options[deprecated] else: get_logger().warning( - 'compiler option "%s" is deprecated and ' - 'should not be used', + 'compiler option "%s" is deprecated and should ' + 'not be used', deprecated, ) for key, val in self._stanc_options.items(): @@ -225,8 +225,8 @@ def validate_cpp_opts(self) -> None: val = self._cpp_options[key] if not isinstance(val, int) or val < 0: raise ValueError( - f'{key} must be a non-negative integer value,' - f' found {val}.' + f'{key} must be a non-negative integer ' + f'value, found {val}.' ) def validate_user_header(self) -> None: @@ -298,7 +298,7 @@ def add_include_path(self, path: str) -> None: elif path not in self._stanc_options['include-paths']: self._stanc_options['include-paths'].append(path) - def compose_stanc(self, filename_in_msg: Optional[str]) -> List[str]: + def compose_stanc(self, filename_in_msg: Optional[str]) -> list[str]: opts = [] if filename_in_msg is not None: @@ -322,7 +322,7 @@ def compose_stanc(self, filename_in_msg: Optional[str]) -> List[str]: opts.append(f'--{key}') return opts - def compose(self, filename_in_msg: Optional[str] = None) -> List[str]: + def compose(self, filename_in_msg: Optional[str] = None) -> list[str]: """ Format makefile options as list of strings. @@ -344,7 +344,7 @@ def compose(self, filename_in_msg: Optional[str] = None) -> List[str]: def src_info( stan_file: str, compiler_options: CompilerOptions -) -> Dict[str, Any]: +) -> dict[str, Any]: """ Get source info for Stan program file. @@ -363,15 +363,15 @@ def src_info( f"Failed to get source info for Stan model " f"'{stan_file}'. Console:\n{proc.stderr}" ) - result: Dict[str, Any] = json.loads(proc.stdout) + result: dict[str, Any] = json.loads(proc.stdout) return result def compile_stan_file( src: Union[str, Path], force: bool = False, - stanc_options: Optional[Dict[str, Any]] = None, - cpp_options: Optional[Dict[str, Any]] = None, + stanc_options: Optional[dict[str, Any]] = None, + cpp_options: Optional[dict[str, Any]] = None, user_header: OptionalPath = None, ) -> str: """ @@ -480,7 +480,7 @@ def compile_stan_file( "If the issue persists please open a bug report" ) raise ValueError( - f"Failed to compile Stan model '{src}'. " f"Console:\n{console}" + f"Failed to compile Stan model '{src}'. Console:\n{console}" ) return str(exe_target) @@ -492,7 +492,7 @@ def format_stan_file( canonicalize: Union[bool, str, Iterable[str]] = False, max_line_length: int = 78, backup: bool = True, - stanc_options: Optional[Dict[str, Any]] = None, + stanc_options: Optional[dict[str, Any]] = None, ) -> None: """ Run stanc's auto-formatter on the model code. Either saves directly diff --git a/cmdstanpy/install_cmdstan.py b/cmdstanpy/install_cmdstan.py index 0ee829e4..12db9cbc 100644 --- a/cmdstanpy/install_cmdstan.py +++ b/cmdstanpy/install_cmdstan.py @@ -17,6 +17,7 @@ --cores: int, number of cores to use when building, defaults to 1 -c, --compiler : flag, add C++ compiler to path (Windows only) """ + import argparse import json import os @@ -30,7 +31,7 @@ from collections import OrderedDict from pathlib import Path from time import sleep -from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Union +from typing import TYPE_CHECKING, Any, Callable, Optional, Union from tqdm.auto import tqdm @@ -85,7 +86,7 @@ def is_windows() -> bool: EXTENSION = '.exe' if is_windows() else '' -def get_headers() -> Dict[str, str]: +def get_headers() -> dict[str, str]: """Create headers dictionary.""" headers = {} GITHUB_PAT = os.environ.get("GITHUB_PAT") # pylint:disable=invalid-name @@ -287,13 +288,13 @@ def build(verbose: bool = False, progress: bool = True, cores: int = 1) -> None: raise CmdStanInstallError(f'Command "make build" failed\n{str(e)}') if not os.path.exists(os.path.join('bin', 'stansummary' + EXTENSION)): raise CmdStanInstallError( - f'bin/stansummary{EXTENSION} not found' - ', please rebuild or report a bug!' + f'bin/stansummary{EXTENSION} not found, please rebuild or ' + 'report a bug!' ) if not os.path.exists(os.path.join('bin', 'diagnose' + EXTENSION)): raise CmdStanInstallError( - f'bin/stansummary{EXTENSION} not found' - ', please rebuild or report a bug!' + f'bin/stansummary{EXTENSION} not found, please rebuild or ' + 'report a bug!' ) if is_windows(): @@ -417,8 +418,8 @@ def install_version( ) if overwrite and os.path.exists('.'): print( - 'Overwrite requested, remove existing build of version ' - '{}'.format(cmdstan_version) + 'Overwrite requested, remove existing build ' + 'of version {}'.format(cmdstan_version) ) clean_all(verbose) print('Rebuilding version {}'.format(cmdstan_version)) @@ -496,10 +497,8 @@ def retrieve_version(version: str, progress: bool = True) -> None: break except urllib.error.HTTPError as e: raise CmdStanRetrieveError( - 'HTTPError: {}\n' - 'Version {} not available from github.com.'.format( - e.code, version - ) + 'HTTPError: {}\nVersion {} not available from ' + 'github.com.'.format(e.code, version) ) from e except urllib.error.URLError as e: print( @@ -645,7 +644,7 @@ def run_install(args: Union[InteractiveSettings, InstallationSettings]) -> None: compile_example(args.verbose) -def parse_cmdline_args() -> Dict[str, Any]: +def parse_cmdline_args() -> dict[str, Any]: parser = argparse.ArgumentParser("install_cmdstan") parser.add_argument( '--interactive', diff --git a/cmdstanpy/install_cxx_toolchain.py b/cmdstanpy/install_cxx_toolchain.py index 19d9ca2a..4c056ead 100644 --- a/cmdstanpy/install_cxx_toolchain.py +++ b/cmdstanpy/install_cxx_toolchain.py @@ -12,6 +12,7 @@ -m --no-make : don't install mingw32-make (Windows RTools 4.0 only) --progress : flag, when specified show progress bar for RTools download """ + import argparse import os import platform @@ -21,7 +22,7 @@ import urllib.request from collections import OrderedDict from time import sleep -from typing import Any, Dict, List +from typing import Any from cmdstanpy import _DOT_CMDSTAN from cmdstanpy.utils import pushd, validate_dir, wrap_url_progress_hook @@ -44,7 +45,7 @@ def usage() -> None: ) -def get_config(dir: str, silent: bool) -> List[str]: +def get_config(dir: str, silent: bool) -> list[str]: """Assemble config info.""" config = [] if platform.system() == 'Windows': @@ -260,7 +261,7 @@ def get_toolchain_version(name: str, version: str) -> str: return toolchain_folder -def run_rtools_install(args: Dict[str, Any]) -> None: +def run_rtools_install(args: dict[str, Any]) -> None: """Main.""" if platform.system() not in {'Windows'}: raise NotImplementedError( @@ -333,7 +334,7 @@ def run_rtools_install(args: Dict[str, Any]) -> None: install_mingw32_make(toolchain_folder, verbose) -def parse_cmdline_args() -> Dict[str, Any]: +def parse_cmdline_args() -> dict[str, Any]: parser = argparse.ArgumentParser() parser.add_argument('--version', '-v', help="version, defaults to latest") parser.add_argument( diff --git a/cmdstanpy/model.py b/cmdstanpy/model.py index 116e11b4..893c352f 100644 --- a/cmdstanpy/model.py +++ b/cmdstanpy/model.py @@ -16,9 +16,7 @@ from typing import ( Any, Callable, - Dict, Iterable, - List, Literal, Mapping, Optional, @@ -115,8 +113,8 @@ def __init__( stan_file: OptionalPath = None, exe_file: OptionalPath = None, force_compile: bool = False, - stanc_options: Optional[Dict[str, Any]] = None, - cpp_options: Optional[Dict[str, Any]] = None, + stanc_options: Optional[dict[str, Any]] = None, + cpp_options: Optional[dict[str, Any]] = None, user_header: OptionalPath = None, *, compile: Union[bool, Literal['force'], None] = None, @@ -277,14 +275,14 @@ def exe_file(self) -> OptionalPath: """Full path to Stan exe file.""" return self._exe_file - def exe_info(self) -> Dict[str, str]: + def exe_info(self) -> dict[str, str]: """ Run model with option 'info'. Parse output statements, which all have form 'key = value' into a Dict. If exe file compiled with CmdStan < 2.27, option 'info' isn't available and the method returns an empty dictionary. """ - result: Dict[str, str] = {} + result: dict[str, str] = {} if self.exe_file is None: return result try: @@ -301,7 +299,7 @@ def exe_info(self) -> Dict[str, str]: get_logger().debug(e) return result - def src_info(self) -> Dict[str, Any]: + def src_info(self) -> dict[str, Any]: """ Run stanc with option '--info'. @@ -360,12 +358,12 @@ def format( ) @property - def stanc_options(self) -> Dict[str, Union[bool, int, str]]: + def stanc_options(self) -> dict[str, Union[bool, int, str]]: """Options to stanc compilers.""" return self._compiler_options._stanc_options @property - def cpp_options(self) -> Dict[str, Union[bool, int]]: + def cpp_options(self) -> dict[str, Union[bool, int]]: """Options to C++ compilers.""" return self._compiler_options._cpp_options @@ -393,8 +391,8 @@ def code(self) -> Optional[str]: def compile( self, force: bool = False, - stanc_options: Optional[Dict[str, Any]] = None, - cpp_options: Optional[Dict[str, Any]] = None, + stanc_options: Optional[dict[str, Any]] = None, + cpp_options: Optional[dict[str, Any]] = None, user_header: OptionalPath = None, override_options: bool = False, *, @@ -618,9 +616,10 @@ def optimize( "in CmdStan 2.32 and above." ) - with temp_single_json(data) as _data, temp_inits( - inits, allow_multiple=False - ) as _inits: + with ( + temp_single_json(data) as _data, + temp_inits(inits, allow_multiple=False) as _inits, + ): args = CmdStanArgs( self._name, self._exe_file, @@ -662,14 +661,14 @@ def sample( chains: Optional[int] = None, parallel_chains: Optional[int] = None, threads_per_chain: Optional[int] = None, - seed: Union[int, List[int], None] = None, - chain_ids: Union[int, List[int], None] = None, + seed: Union[int, list[int], None] = None, + chain_ids: Union[int, list[int], None] = None, inits: Union[ Mapping[str, Any], float, str, - List[str], - List[Mapping[str, Any]], + list[str], + list[Mapping[str, Any]], None, ] = None, iter_warmup: Optional[int] = None, @@ -678,9 +677,9 @@ def sample( thin: Optional[int] = None, max_treedepth: Optional[int] = None, metric: Union[ - str, Dict[str, Any], List[str], List[Dict[str, Any]], None + str, dict[str, Any], list[str], list[dict[str, Any]], None ] = None, - step_size: Union[float, List[float], None] = None, + step_size: Union[float, list[float], None] = None, adapt_engaged: bool = True, adapt_delta: Optional[float] = None, adapt_init_phase: Optional[int] = None, @@ -921,8 +920,8 @@ def sample( parallel_chains = chains elif parallel_chains < 1: raise ValueError( - 'Argument parallel_chains must be a positive integer, ' - 'found {}.'.format(parallel_chains) + 'Argument parallel_chains must be a positive ' + 'integer, found {}.'.format(parallel_chains) ) if threads_per_chain is None: threads_per_chain = 1 @@ -977,8 +976,8 @@ def sample( if isinstance(chain_ids, int): if chain_ids < 1: raise ValueError( - 'Chain_id must be a positive integer value,' - ' found {}.'.format(chain_ids) + 'Chain_id must be a positive integer value, ' + 'found {}.'.format(chain_ids) ) chain_ids = [i + chain_ids for i in range(chains)] else: @@ -1020,10 +1019,11 @@ def sample( fixed_param=fixed_param, ) - with temp_single_json(data) as _data, temp_inits( - inits, id=chain_ids[0] - ) as _inits: - cmdstan_inits: Union[str, List[str], int, float, None] + with ( + temp_single_json(data) as _data, + temp_inits(inits, id=chain_ids[0]) as _inits, + ): + cmdstan_inits: Union[str, list[str], int, float, None] if one_process_per_chain and isinstance(inits, list): # legacy cmdstan_inits = [ f"{_inits[:-5]}_{i}.json" for i in chain_ids # type: ignore @@ -1141,7 +1141,7 @@ def sample( def generate_quantities( self, data: Union[Mapping[str, Any], str, os.PathLike, None] = None, - previous_fit: Union[Fit, List[str], None] = None, + previous_fit: Union[Fit, list[str], None] = None, seed: Optional[int] = None, gq_output_dir: OptionalPath = None, sig_figs: Optional[int] = None, @@ -1150,7 +1150,7 @@ def generate_quantities( time_fmt: str = "%Y%m%d%H%M%S", timeout: Optional[float] = None, *, - mcmc_sample: Union[CmdStanMCMC, List[str], None] = None, + mcmc_sample: Union[CmdStanMCMC, list[str], None] = None, ) -> CmdStanGQ[Fit]: """ Run CmdStan's generate_quantities method which runs the generated @@ -1481,9 +1481,10 @@ def variational( output_samples=draws, ) - with temp_single_json(data) as _data, temp_inits( - inits, allow_multiple=False - ) as _inits: + with ( + temp_single_json(data) as _data, + temp_inits(inits, allow_multiple=False) as _inits, + ): args = CmdStanArgs( self._name, self._exe_file, @@ -1571,7 +1572,7 @@ def pathfinder( calculate_lp: bool = True, # arguments standard to all methods seed: Optional[int] = None, - inits: Union[Dict[str, float], float, str, os.PathLike, None] = None, + inits: Union[dict[str, float], float, str, os.PathLike, None] = None, output_dir: OptionalPath = None, sig_figs: Optional[int] = None, save_profile: bool = False, @@ -1782,7 +1783,7 @@ def pathfinder( def log_prob( self, - params: Union[Dict[str, Any], str, os.PathLike], + params: Union[dict[str, Any], str, os.PathLike], data: Union[Mapping[str, Any], str, os.PathLike, None] = None, *, jacobian: bool = True, @@ -1824,9 +1825,10 @@ def log_prob( "Method 'log_prob' not available for CmdStan versions " "before 2.31" ) - with temp_single_json(data) as _data, temp_single_json( - params - ) as _params: + with ( + temp_single_json(data) as _data, + temp_single_json(params) as _params, + ): cmd = [ str(self.exe_file), "log_prob", @@ -1877,7 +1879,7 @@ def laplace_sample( refresh: Optional[int] = None, time_fmt: str = "%Y%m%d%H%M%S", timeout: Optional[float] = None, - opt_args: Optional[Dict[str, Any]] = None, + opt_args: Optional[dict[str, Any]] = None, ) -> CmdStanLaplace: """ Run a Laplace approximation around the posterior mode. @@ -2120,7 +2122,7 @@ def _timer_target() -> None: @staticmethod @progbar.wrap_callback def _wrap_sampler_progress_hook( - chain_ids: List[int], + chain_ids: list[int], total: int, ) -> Optional[Callable[[str, int], None]]: """ @@ -2130,7 +2132,7 @@ def _wrap_sampler_progress_hook( For the latter, manage array of pbars, update accordingly. """ chain_pat = re.compile(r'(Chain \[(\d+)\] )?Iteration:\s+(\d+)') - pbars: Dict[int, tqdm] = { + pbars: dict[int, tqdm] = { chain_id: tqdm( total=total, desc=f'chain {chain_id}', @@ -2161,7 +2163,7 @@ def progress_hook(line: str, idx: int) -> None: def diagnose( self, - inits: Union[Dict[str, Any], str, os.PathLike, None] = None, + inits: Union[dict[str, Any], str, os.PathLike, None] = None, data: Union[Mapping[str, Any], str, os.PathLike, None] = None, *, epsilon: Optional[float] = None, diff --git a/cmdstanpy/progress.py b/cmdstanpy/progress.py index 0f26a88f..92f57b5a 100644 --- a/cmdstanpy/progress.py +++ b/cmdstanpy/progress.py @@ -1,6 +1,7 @@ """ Record tqdm progress bar fail during session """ + import functools import logging diff --git a/cmdstanpy/stanfit/__init__.py b/cmdstanpy/stanfit/__init__.py index 2ed527c3..bd13495b 100644 --- a/cmdstanpy/stanfit/__init__.py +++ b/cmdstanpy/stanfit/__init__.py @@ -2,7 +2,7 @@ import glob import os -from typing import List, Optional, Union +from typing import Optional, Union from cmdstanpy.cmdstan_args import ( CmdStanArgs, @@ -36,7 +36,7 @@ def from_csv( - path: Union[str, List[str], os.PathLike, None] = None, + path: Union[str, list[str], os.PathLike, None] = None, method: Optional[str] = None, ) -> Union[ CmdStanMCMC, CmdStanMLE, CmdStanVB, CmdStanPathfinder, CmdStanLaplace, None @@ -78,8 +78,8 @@ def from_csv( if splits[0] is not None: if not (os.path.exists(splits[0]) and os.path.isdir(splits[0])): raise ValueError( - 'Invalid path specification, {} ' - ' unknown directory: {}'.format(path, splits[0]) + 'Invalid path specification, {} unknown ' + 'directory: {}'.format(path, splits[0]) ) csvfiles = glob.glob(path) elif isinstance(path, (str, os.PathLike)): @@ -99,8 +99,7 @@ def from_csv( for file in csvfiles: if not (os.path.exists(file) and os.path.splitext(file)[1] == ".csv"): raise ValueError( - 'Bad CSV file path spec,' - ' includes non-csv file: {}'.format(file) + 'Bad CSV file path spec, includes non-csv file: {}'.format(file) ) try: @@ -177,8 +176,9 @@ def from_csv( elif config_dict['method'] == 'optimize': if 'algorithm' not in config_dict: raise ValueError( - "Cannot find optimization algorithm" - " in file {}.".format(csvfiles[0]) + "Cannot find optimization algorithm in file {}.".format( + csvfiles[0] + ) ) algorithm: str = config_dict['algorithm'] # type: ignore save_iterations = config_dict['save_iterations'] == 1 @@ -203,8 +203,9 @@ def from_csv( elif config_dict['method'] == 'variational': if 'algorithm' not in config_dict: raise ValueError( - "Cannot find variational algorithm" - " in file {}.".format(csvfiles[0]) + "Cannot find variational algorithm in file {}.".format( + csvfiles[0] + ) ) variational_args = VariationalArgs( algorithm=config_dict['algorithm'], # type: ignore diff --git a/cmdstanpy/stanfit/gq.py b/cmdstanpy/stanfit/gq.py index 40658ced..40f17893 100644 --- a/cmdstanpy/stanfit/gq.py +++ b/cmdstanpy/stanfit/gq.py @@ -6,14 +6,11 @@ from collections import Counter from typing import ( Any, - Dict, Generic, Hashable, - List, MutableMapping, NoReturn, Optional, - Tuple, TypeVar, Union, overload, @@ -141,12 +138,12 @@ def chains(self) -> int: return self.runset.chains @property - def chain_ids(self) -> List[int]: + def chain_ids(self) -> list[int]: """Chain ids.""" return self.runset.chain_ids @property - def column_names(self) -> Tuple[str, ...]: + def column_names(self) -> tuple[str, ...]: """ Names of generated quantities of interest. """ @@ -230,7 +227,7 @@ def draws( for item, count in Counter(cols_1 + cols_2).items() if count > 1 ] - drop_cols: List[int] = [] + drop_cols: list[int] = [] for dup in dups: drop_cols.extend( self.previous_fit._metadata.stan_vars[dup].columns() @@ -260,7 +257,7 @@ def draws( def draws_pd( self, - vars: Union[List[str], str, None] = None, + vars: Union[list[str], str, None] = None, inc_warmup: bool = False, inc_sample: bool = False, ) -> pd.DataFrame: @@ -318,8 +315,8 @@ def draws_pd( all_columns = ['chain__', 'iter__', 'draw__'] + list(self.column_names) - gq_cols: List[str] = [] - mcmc_vars: List[str] = [] + gq_cols: list[str] = [] + mcmc_vars: list[str] = [] if vars is not None: for var in vars_list: if var in self._metadata.stan_vars: @@ -405,7 +402,7 @@ def draws_pd( @overload def draws_xr( self: Union["CmdStanGQ[CmdStanMLE]", "CmdStanGQ[CmdStanVB]"], - vars: Union[str, List[str], None] = None, + vars: Union[str, list[str], None] = None, inc_warmup: bool = False, inc_sample: bool = False, ) -> NoReturn: @@ -414,7 +411,7 @@ def draws_xr( @overload def draws_xr( self: "CmdStanGQ[CmdStanMCMC]", - vars: Union[str, List[str], None] = None, + vars: Union[str, list[str], None] = None, inc_warmup: bool = False, inc_sample: bool = False, ) -> "xr.Dataset": @@ -422,7 +419,7 @@ def draws_xr( def draws_xr( self, - vars: Union[str, List[str], None] = None, + vars: Union[str, list[str], None] = None, inc_warmup: bool = False, inc_sample: bool = False, ) -> "xr.Dataset": @@ -586,7 +583,7 @@ def stan_variable(self, var: str, **kwargs: bool) -> np.ndarray: out: np.ndarray = self._metadata.stan_vars[var].extract_reshape(draws) return out - def stan_variables(self, **kwargs: bool) -> Dict[str, np.ndarray]: + def stan_variables(self, **kwargs: bool) -> dict[str, np.ndarray]: """ Return a dictionary mapping Stan program variables names to the corresponding numpy.ndarray containing the inferred values. @@ -639,7 +636,7 @@ def _assemble_generated_quantities(self) -> None: ) from exc self._draws = gq_sample - def _draws_start(self, inc_warmup: bool) -> Tuple[int, int]: + def _draws_start(self, inc_warmup: bool) -> tuple[int, int]: draw1 = 0 p_fit = self.previous_fit if isinstance(p_fit, CmdStanMCMC): @@ -689,10 +686,10 @@ def _previous_draws(self, inc_warmup: bool) -> np.ndarray: return p_fit.variational_sample[:, None] def _previous_draws_pd( - self, vars: List[str], inc_warmup: bool + self, vars: list[str], inc_warmup: bool ) -> pd.DataFrame: if vars: - sel: Union[List[str], slice] = vars + sel: Union[list[str], slice] = vars else: sel = slice(None, None) diff --git a/cmdstanpy/stanfit/laplace.py b/cmdstanpy/stanfit/laplace.py index bad0c9a6..7e36d4e7 100644 --- a/cmdstanpy/stanfit/laplace.py +++ b/cmdstanpy/stanfit/laplace.py @@ -2,16 +2,7 @@ Container for the result of running a laplace approximation. """ -from typing import ( - Any, - Dict, - Hashable, - List, - MutableMapping, - Optional, - Tuple, - Union, -) +from typing import Any, Hashable, MutableMapping, Optional, Union import numpy as np import pandas as pd @@ -51,7 +42,7 @@ def __init__(self, runset: RunSet, mode: CmdStanMLE) -> None: def create_inits( self, seed: Optional[int] = None, chains: int = 4 - ) -> Union[List[Dict[str, np.ndarray]], Dict[str, np.ndarray]]: + ) -> Union[list[dict[str, np.ndarray]], dict[str, np.ndarray]]: """ Create initial values for the parameters of the model by randomly selecting draws from the Laplace approximation. @@ -130,7 +121,7 @@ def stan_variable(self, var: str) -> np.ndarray: + ", ".join(self._metadata.stan_vars.keys()) ) - def stan_variables(self) -> Dict[str, np.ndarray]: + def stan_variables(self) -> dict[str, np.ndarray]: """ Return a dictionary mapping Stan program variables names to the corresponding numpy.ndarray containing the inferred values. @@ -152,7 +143,7 @@ def stan_variables(self) -> Dict[str, np.ndarray]: result[name] = self.stan_variable(name) return result - def method_variables(self) -> Dict[str, np.ndarray]: + def method_variables(self) -> dict[str, np.ndarray]: """ Returns a dictionary of all sampler variables, i.e., all output column names ending in `__`. Assumes that all variables @@ -177,7 +168,7 @@ def draws(self) -> np.ndarray: def draws_pd( self, - vars: Union[List[str], str, None] = None, + vars: Union[list[str], str, None] = None, ) -> pd.DataFrame: if vars is not None: if isinstance(vars, str): @@ -206,7 +197,7 @@ def draws_pd( def draws_xr( self, - vars: Union[str, List[str], None] = None, + vars: Union[str, list[str], None] = None, ) -> "xr.Dataset": """ Returns the sampler draws as a xarray Dataset. @@ -308,7 +299,7 @@ def __getstate__(self) -> dict: return self.__dict__ @property - def column_names(self) -> Tuple[str, ...]: + def column_names(self) -> tuple[str, ...]: """ Names of all outputs from the sampler, comprising sampler parameters and all components of all model parameters, transformed parameters, diff --git a/cmdstanpy/stanfit/mcmc.py b/cmdstanpy/stanfit/mcmc.py index 0d0f3e0b..a2d615e5 100644 --- a/cmdstanpy/stanfit/mcmc.py +++ b/cmdstanpy/stanfit/mcmc.py @@ -5,17 +5,7 @@ import math import os from io import StringIO -from typing import ( - Any, - Dict, - Hashable, - List, - MutableMapping, - Optional, - Sequence, - Tuple, - Union, -) +from typing import Any, Hashable, MutableMapping, Optional, Sequence, Union import numpy as np import pandas as pd @@ -98,7 +88,7 @@ def __init__( self._max_treedepths: np.ndarray = np.zeros( self.runset.chains, dtype=int ) - self._chain_time: List[Dict[str, float]] = [] + self._chain_time: list[dict[str, float]] = [] # info from CSV header and initial and final comment blocks config = self._validate_csv_files() @@ -108,7 +98,7 @@ def __init__( def create_inits( self, seed: Optional[int] = None, chains: int = 4 - ) -> Union[List[Dict[str, np.ndarray]], Dict[str, np.ndarray]]: + ) -> Union[list[dict[str, np.ndarray]], dict[str, np.ndarray]]: """ Create initial values for the parameters of the model by randomly selecting draws from the MCMC samples. If the samples @@ -184,7 +174,7 @@ def chains(self) -> int: return self.runset.chains @property - def chain_ids(self) -> List[int]: + def chain_ids(self) -> list[int]: """Chain ids.""" return self.runset.chain_ids @@ -211,7 +201,7 @@ def metadata(self) -> InferenceMetadata: return self._metadata @property - def column_names(self) -> Tuple[str, ...]: + def column_names(self) -> tuple[str, ...]: """ Names of all outputs from the sampler, comprising sampler parameters and all components of all model parameters, transformed parameters, @@ -283,7 +273,7 @@ def max_treedepths(self) -> Optional[np.ndarray]: return self._max_treedepths if not self._is_fixed_param else None @property - def time(self) -> List[Dict[str, float]]: + def time(self) -> list[dict[str, float]]: """ List of per-chain time info scraped from CSV file. Each chain has dict with keys "warmup", "sampling", "total". @@ -332,7 +322,7 @@ def draws( return flatten_chains(self._draws[start_idx:, :, :]) return self._draws[start_idx:, :, :] - def _validate_csv_files(self) -> Dict[str, Any]: + def _validate_csv_files(self) -> dict[str, Any]: """ Checks that Stan CSV output files for all chains are consistent and returns dict containing config and column names. @@ -407,13 +397,13 @@ def _check_sampler_diagnostics(self) -> None: diagnostics.append( f'Chain {i + 1} had {self._divergences[i]} ' 'divergent transitions ' - f'({((self._divergences[i]/ct_iters)*100):.1f}%)' + f'({((self._divergences[i] / ct_iters) * 100):.1f}%)' ) if self._max_treedepths[i] > 0: diagnostics.append( f'Chain {i + 1} had {self._max_treedepths[i]} ' 'iterations at max treedepth ' - f'({((self._max_treedepths[i]/ct_iters)*100):.1f}%)' + f'({((self._max_treedepths[i] / ct_iters) * 100):.1f}%)' ) diagnostics.append( 'Use the "diagnose()" method on the CmdStanMCMC object' @@ -588,7 +578,7 @@ def diagnose(self) -> Optional[str]: def draws_pd( self, - vars: Union[List[str], str, None] = None, + vars: Union[list[str], str, None] = None, inc_warmup: bool = False, ) -> pd.DataFrame: """ @@ -666,7 +656,7 @@ def draws_pd( )[cols] def draws_xr( - self, vars: Union[str, List[str], None] = None, inc_warmup: bool = False + self, vars: Union[str, list[str], None] = None, inc_warmup: bool = False ) -> "xr.Dataset": """ Returns the sampler draws as a xarray Dataset. @@ -791,7 +781,7 @@ def stan_variable( + ", ".join(self._metadata.stan_vars.keys()) ) - def stan_variables(self) -> Dict[str, np.ndarray]: + def stan_variables(self) -> dict[str, np.ndarray]: """ Return a dictionary mapping Stan program variables names to the corresponding numpy.ndarray containing the inferred values. @@ -810,7 +800,7 @@ def stan_variables(self) -> Dict[str, np.ndarray]: result[name] = self.stan_variable(name) return result - def method_variables(self) -> Dict[str, np.ndarray]: + def method_variables(self) -> dict[str, np.ndarray]: """ Returns a dictionary of all sampler variables, i.e., all output column names ending in `__`. Assumes that all variables diff --git a/cmdstanpy/stanfit/metadata.py b/cmdstanpy/stanfit/metadata.py index 61725116..224b7c59 100644 --- a/cmdstanpy/stanfit/metadata.py +++ b/cmdstanpy/stanfit/metadata.py @@ -2,7 +2,7 @@ import copy import os -from typing import Any, Dict, Iterator, Tuple, Union +from typing import Any, Iterator, Union import stanio @@ -17,7 +17,7 @@ class InferenceMetadata: """ def __init__( - self, config: Dict[str, Union[str, int, float, Tuple[str, ...]]] + self, config: dict[str, Union[str, int, float, tuple[str, ...]]] ) -> None: """Initialize object from CSV headers""" self._cmdstan_config = config @@ -48,11 +48,11 @@ def from_csv( def __repr__(self) -> str: return 'Metadata:\n{}\n'.format(self._cmdstan_config) - def __getitem__(self, key: str) -> Union[str, int, float, Tuple[str, ...]]: + def __getitem__(self, key: str) -> Union[str, int, float, tuple[str, ...]]: return self._cmdstan_config[key] @property - def cmdstan_config(self) -> Dict[str, Any]: + def cmdstan_config(self) -> dict[str, Any]: """ Returns a dictionary containing a set of name, value pairs parsed out of the Stan CSV file header. These include the @@ -62,19 +62,19 @@ def cmdstan_config(self) -> Dict[str, Any]: return copy.deepcopy(self._cmdstan_config) @property - def column_names(self) -> Tuple[str, ...]: + def column_names(self) -> tuple[str, ...]: col_names = self['column_names'] return col_names # type: ignore @property - def method_vars(self) -> Dict[str, stanio.Variable]: + def method_vars(self) -> dict[str, stanio.Variable]: """ Method variable names always end in `__`, e.g. `lp__`. """ return self._method_vars @property - def stan_vars(self) -> Dict[str, stanio.Variable]: + def stan_vars(self) -> dict[str, stanio.Variable]: """ These are the user-defined variables in the Stan program. """ diff --git a/cmdstanpy/stanfit/mle.py b/cmdstanpy/stanfit/mle.py index 8f28fc3d..3fb817fb 100644 --- a/cmdstanpy/stanfit/mle.py +++ b/cmdstanpy/stanfit/mle.py @@ -1,7 +1,7 @@ """Container for the result of running optimization""" from collections import OrderedDict -from typing import Dict, Optional, Tuple, Union +from typing import Optional, Union import numpy as np import pandas as pd @@ -59,7 +59,7 @@ def __init__(self, runset: RunSet) -> None: def create_inits( self, seed: Optional[int] = None, chains: int = 4 - ) -> Dict[str, np.ndarray]: + ) -> dict[str, np.ndarray]: """ Create initial values for the parameters of the model from the MLE. @@ -106,7 +106,7 @@ def __getattr__(self, attr: str) -> Union[np.ndarray, float]: raise AttributeError(*e.args) @property - def column_names(self) -> Tuple[str, ...]: + def column_names(self) -> tuple[str, ...]: """ Names of estimated quantities, includes joint log probability, and all parameters, transformed parameters, and generated quantities. @@ -189,7 +189,7 @@ def optimized_iterations_pd(self) -> Optional[pd.DataFrame]: return pd.DataFrame(self._all_iters, columns=self.column_names) @property - def optimized_params_dict(self) -> Dict[str, np.float64]: + def optimized_params_dict(self) -> dict[str, np.float64]: """ Returns all estimates from the optimizer, including `lp__` as a Python Dict. Only returns estimate from final iteration. @@ -273,7 +273,7 @@ def stan_variable( def stan_variables( self, inc_iterations: bool = False - ) -> Dict[str, Union[np.ndarray, float]]: + ) -> dict[str, Union[np.ndarray, float]]: """ Return a dictionary mapping Stan program variables names to the corresponding numpy.ndarray containing the inferred values. diff --git a/cmdstanpy/stanfit/pathfinder.py b/cmdstanpy/stanfit/pathfinder.py index bbedc146..5047c68e 100644 --- a/cmdstanpy/stanfit/pathfinder.py +++ b/cmdstanpy/stanfit/pathfinder.py @@ -2,7 +2,7 @@ Container for the result of running Pathfinder. """ -from typing import Dict, List, Optional, Tuple, Union +from typing import Optional, Union import numpy as np @@ -31,7 +31,7 @@ def __init__(self, runset: RunSet): def create_inits( self, seed: Optional[int] = None, chains: int = 4 - ) -> Union[List[Dict[str, np.ndarray]], Dict[str, np.ndarray]]: + ) -> Union[list[dict[str, np.ndarray]], dict[str, np.ndarray]]: """ Create initial values for the parameters of the model by randomly selecting draws from the Pathfinder approximation. @@ -123,7 +123,7 @@ def stan_variable(self, var: str) -> np.ndarray: + ", ".join(self._metadata.stan_vars.keys()) ) - def stan_variables(self) -> Dict[str, np.ndarray]: + def stan_variables(self) -> dict[str, np.ndarray]: """ Return a dictionary mapping Stan program variables names to the corresponding numpy.ndarray containing the inferred values. @@ -142,7 +142,7 @@ def stan_variables(self) -> Dict[str, np.ndarray]: result[name] = self.stan_variable(name) return result - def method_variables(self) -> Dict[str, np.ndarray]: + def method_variables(self) -> dict[str, np.ndarray]: """ Returns a dictionary of all sampler variables, i.e., all output column names ending in `__`. Assumes that all variables @@ -193,7 +193,7 @@ def metadata(self) -> InferenceMetadata: return self._metadata @property - def column_names(self) -> Tuple[str, ...]: + def column_names(self) -> tuple[str, ...]: """ Names of all outputs from the sampler, comprising sampler parameters and all components of all model parameters, transformed parameters, diff --git a/cmdstanpy/stanfit/runset.py b/cmdstanpy/stanfit/runset.py index de11a461..a96c0ea8 100644 --- a/cmdstanpy/stanfit/runset.py +++ b/cmdstanpy/stanfit/runset.py @@ -9,7 +9,7 @@ import tempfile from datetime import datetime from time import time -from typing import List, Optional +from typing import Optional from cmdstanpy import _TMPDIR from cmdstanpy.cmdstan_args import CmdStanArgs, Method @@ -31,7 +31,7 @@ def __init__( args: CmdStanArgs, chains: int = 1, *, - chain_ids: Optional[List[int]] = None, + chain_ids: Optional[list[int]] = None, time_fmt: str = "%Y%m%d%H%M%S", one_process_per_chain: bool = True, ) -> None: @@ -79,7 +79,7 @@ def __init__( ) # per-chain output files - self._csv_files: List[str] = [''] * chains + self._csv_files: list[str] = [''] * chains self._diagnostic_files = [''] * chains # optional if chains == 1: @@ -148,11 +148,11 @@ def chains(self) -> int: return self._chains @property - def chain_ids(self) -> List[int]: + def chain_ids(self) -> list[int]: """Chain ids.""" return self._chain_ids - def cmd(self, idx: int) -> List[str]: + def cmd(self, idx: int) -> list[str]: """ Assemble CmdStan invocation. When running parallel chains from single process (2.28 and up), @@ -182,12 +182,12 @@ def cmd(self, idx: int) -> List[str]: ) @property - def csv_files(self) -> List[str]: + def csv_files(self) -> list[str]: """List of paths to CmdStan output files.""" return self._csv_files @property - def stdout_files(self) -> List[str]: + def stdout_files(self) -> list[str]: """ List of paths to transcript of CmdStan messages sent to the console. Transcripts include config information, progress, and error messages. @@ -202,12 +202,12 @@ def _check_retcodes(self) -> bool: return True @property - def diagnostic_files(self) -> List[str]: + def diagnostic_files(self) -> list[str]: """List of paths to CmdStan hamiltonian diagnostic files.""" return self._diagnostic_files @property - def profile_files(self) -> List[str]: + def profile_files(self) -> list[str]: """List of paths to CmdStan profiler files.""" return self._profile_files @@ -302,6 +302,6 @@ def save_csvfiles(self, dir: Optional[str] = None) -> None: def raise_for_timeouts(self) -> None: if any(self._timeout_flags): raise TimeoutError( - f"{sum(self._timeout_flags)} of {self.num_procs} processes " - "timed out" + f"{sum(self._timeout_flags)} of {self.num_procs} " + "processes timed out" ) diff --git a/cmdstanpy/stanfit/vb.py b/cmdstanpy/stanfit/vb.py index 2c8f3e20..a7c4d47b 100644 --- a/cmdstanpy/stanfit/vb.py +++ b/cmdstanpy/stanfit/vb.py @@ -1,7 +1,7 @@ """Container for the results of running autodiff variational inference""" from collections import OrderedDict -from typing import Dict, List, Optional, Tuple, Union +from typing import Optional, Union import numpy as np import pandas as pd @@ -55,7 +55,7 @@ def __init__(self, runset: RunSet) -> None: def create_inits( self, seed: Optional[int] = None, chains: int = 4 - ) -> Union[List[Dict[str, np.ndarray]], Dict[str, np.ndarray]]: + ) -> Union[list[dict[str, np.ndarray]], dict[str, np.ndarray]]: """ Create initial values for the parameters of the model by randomly selecting draws from the variational approximation @@ -120,7 +120,7 @@ def columns(self) -> int: return len(self.column_names) @property - def column_names(self) -> Tuple[str, ...]: + def column_names(self) -> tuple[str, ...]: """ Names of information items returned by sampler for each draw. Includes approximation information and names of model parameters @@ -150,7 +150,7 @@ def variational_params_pd(self) -> pd.DataFrame: return pd.DataFrame([self._variational_mean], columns=self.column_names) @property - def variational_params_dict(self) -> Dict[str, np.ndarray]: + def variational_params_dict(self) -> dict[str, np.ndarray]: """Returns inferred parameter means as Dict.""" return OrderedDict(zip(self.column_names, self._variational_mean)) @@ -240,7 +240,7 @@ def stan_variable( def stan_variables( self, *, mean: Optional[bool] = None - ) -> Dict[str, Union[np.ndarray, float]]: + ) -> dict[str, Union[np.ndarray, float]]: """ Return a dictionary mapping Stan program variables names to the corresponding numpy.ndarray containing the inferred values. diff --git a/cmdstanpy/utils/cmdstan.py b/cmdstanpy/utils/cmdstan.py index 6a5c7805..2c1951f8 100644 --- a/cmdstanpy/utils/cmdstan.py +++ b/cmdstanpy/utils/cmdstan.py @@ -7,7 +7,7 @@ import subprocess import sys from collections import OrderedDict -from typing import Callable, Dict, Optional, Tuple, Union +from typing import Callable, Optional, Union from tqdm.auto import tqdm @@ -198,7 +198,7 @@ def cmdstan_path() -> str: return os.path.normpath(cmdstan) -def cmdstan_version() -> Optional[Tuple[int, ...]]: +def cmdstan_version() -> Optional[tuple[int, ...]]: """ Parses version string out of CmdStan makefile variable CMDSTAN_VERSION, returns Tuple(Major, minor). @@ -242,7 +242,7 @@ def cmdstan_version() -> Optional[Tuple[int, ...]]: def cmdstan_version_before( - major: int, minor: int, info: Optional[Dict[str, str]] = None + major: int, minor: int, info: Optional[dict[str, str]] = None ) -> bool: """ Check that CmdStan version is less than Major.minor version. @@ -274,7 +274,7 @@ def cmdstan_version_before( def cxx_toolchain_path( version: Optional[str] = None, install_dir: Optional[str] = None -) -> Tuple[str, ...]: +) -> tuple[str, ...]: """ Validate, then activate C++ toolchain directory path. """ diff --git a/cmdstanpy/utils/command.py b/cmdstanpy/utils/command.py index ca95983e..99380625 100644 --- a/cmdstanpy/utils/command.py +++ b/cmdstanpy/utils/command.py @@ -1,17 +1,18 @@ """ Run commands and handle returncodes """ + import os import subprocess import sys -from typing import Callable, List, Optional, TextIO +from typing import Callable, Optional, TextIO from .filesystem import pushd from .logging import get_logger def do_command( - cmd: List[str], + cmd: list[str], cwd: Optional[str] = None, *, fd_out: Optional[TextIO] = sys.stdout, diff --git a/cmdstanpy/utils/data_munging.py b/cmdstanpy/utils/data_munging.py index 495d40a5..f25d3130 100644 --- a/cmdstanpy/utils/data_munging.py +++ b/cmdstanpy/utils/data_munging.py @@ -1,7 +1,8 @@ """ Common functions for reshaping numpy arrays """ -from typing import Hashable, MutableMapping, Tuple + +from typing import Hashable, MutableMapping import numpy as np import stanio @@ -27,7 +28,7 @@ def flatten_chains(draws_array: np.ndarray) -> np.ndarray: def build_xarray_data( - data: MutableMapping[Hashable, Tuple[Tuple[str, ...], np.ndarray]], + data: MutableMapping[Hashable, tuple[tuple[str, ...], np.ndarray]], var: stanio.Variable, drawset: np.ndarray, ) -> None: @@ -35,7 +36,7 @@ def build_xarray_data( Adds Stan variable name, labels, and values to a dictionary that will be used to construct an xarray DataSet. """ - var_dims: Tuple[str, ...] = ('draw', 'chain') + var_dims: tuple[str, ...] = ('draw', 'chain') var_dims += tuple(f"{var.name}_dim_{i}" for i in range(len(var.dimensions))) data[var.name] = ( diff --git a/cmdstanpy/utils/filesystem.py b/cmdstanpy/utils/filesystem.py index 233898e1..e8b361a7 100644 --- a/cmdstanpy/utils/filesystem.py +++ b/cmdstanpy/utils/filesystem.py @@ -1,13 +1,14 @@ """ Utilities for interacting with the filesystem on multiple platforms """ + import contextlib import os import platform import re import shutil import tempfile -from typing import Any, Iterator, List, Mapping, Optional, Tuple, Union +from typing import Any, Iterator, Mapping, Optional, Union from cmdstanpy import _TMPDIR @@ -104,7 +105,7 @@ def pushd(new_dir: str) -> Iterator[None]: def _temp_single_json( - data: Union[str, os.PathLike, Mapping[str, Any], None] + data: Union[str, os.PathLike, Mapping[str, Any], None], ) -> Iterator[Optional[str]]: """Context manager for json files.""" if data is None: @@ -128,7 +129,7 @@ def _temp_single_json( def _temp_multiinput( - input: Union[str, os.PathLike, Mapping[str, Any], List[Any], None], + input: Union[str, os.PathLike, Mapping[str, Any], list[Any], None], base: int = 1, ) -> Iterator[Optional[str]]: if isinstance(input, list): @@ -141,7 +142,7 @@ def _temp_multiinput( dir=_TMPDIR, prefix='', suffix='.json', name_only=True ) new_files = [ - os.path.splitext(mother_file)[0] + f'_{i+base}.json' + os.path.splitext(mother_file)[0] + f'_{i + base}.json' for i in range(len(input)) ] for init, file in zip(input, new_files): @@ -167,7 +168,7 @@ def _temp_multiinput( @contextlib.contextmanager def temp_inits( inits: Union[ - str, os.PathLike, Mapping[str, Any], float, int, List[Any], None + str, os.PathLike, Mapping[str, Any], float, int, list[Any], None ], *, allow_multiple: bool = True, @@ -228,7 +229,7 @@ def __init__(self, file_path: str): else: self._path = file_path - def __enter__(self) -> Tuple[str, bool]: + def __enter__(self) -> tuple[str, bool]: return self._path, self._tmpdir is not None def __exit__(self, exc_type, exc_val, exc_tb) -> None: # type: ignore diff --git a/cmdstanpy/utils/json.py b/cmdstanpy/utils/json.py index ce1674e6..a2f94403 100644 --- a/cmdstanpy/utils/json.py +++ b/cmdstanpy/utils/json.py @@ -1,6 +1,7 @@ """ Delegated to stanio - https://github.com/WardBrian/stanio """ + from stanio import write_stan_json __all__ = ['write_stan_json'] diff --git a/cmdstanpy/utils/stancsv.py b/cmdstanpy/utils/stancsv.py index f68260fe..32f01d5f 100644 --- a/cmdstanpy/utils/stancsv.py +++ b/cmdstanpy/utils/stancsv.py @@ -8,7 +8,7 @@ import os import re import warnings -from typing import Any, Dict, Iterator, List, Optional, Tuple, Union +from typing import Any, Iterator, Optional, Union import numpy as np import numpy.typing as npt @@ -18,7 +18,7 @@ def parse_comments_header_and_draws( stan_csv: Union[str, os.PathLike, Iterator[bytes]], -) -> Tuple[List[bytes], Optional[str], List[bytes]]: +) -> tuple[list[bytes], Optional[str], list[bytes]]: """Parses lines of a Stan CSV file into comment lines, the header line, and draws lines. @@ -27,9 +27,9 @@ def parse_comments_header_and_draws( def partition_csv( lines: Iterator[bytes], - ) -> Tuple[List[bytes], Optional[str], List[bytes]]: - comment_lines: List[bytes] = [] - draws_lines: List[bytes] = [] + ) -> tuple[list[bytes], Optional[str], list[bytes]]: + comment_lines: list[bytes] = [] + draws_lines: list[bytes] = [] header = None for line in lines: if line.startswith(b"#"): # is comment line @@ -48,8 +48,8 @@ def partition_csv( def filter_csv_bytes_by_columns( - csv_bytes_list: List[bytes], indexes_to_keep: List[int] -) -> List[bytes]: + csv_bytes_list: list[bytes], indexes_to_keep: list[int] +) -> list[bytes]: """Given the list of bytes representing the lines of a CSV file and the indexes of columns to keep, will return a new list of bytes containing only those columns in the index order provided. Assumes @@ -62,7 +62,7 @@ def filter_csv_bytes_by_columns( def csv_bytes_list_to_numpy( - csv_bytes_list: List[bytes], + csv_bytes_list: list[bytes], ) -> npt.NDArray[np.float64]: """Efficiently converts a list of bytes representing whose concatenation represents a CSV file into a numpy array. @@ -104,8 +104,8 @@ def csv_bytes_list_to_numpy( def parse_hmc_adaptation_lines( - comment_lines: List[bytes], -) -> Tuple[Optional[float], Optional[npt.NDArray[np.float64]]]: + comment_lines: list[bytes], +) -> tuple[Optional[float], Optional[npt.NDArray[np.float64]]]: """Extracts step size/mass matrix information from the Stan CSV comment lines by parsing the adaptation section. If the diag_e metric is used, the returned mass matrix will be a 1D array of the diagnoal elements, @@ -142,8 +142,8 @@ def parse_hmc_adaptation_lines( def extract_key_val_pairs( - comment_lines: List[bytes], remove_default_text: bool = True -) -> Iterator[Tuple[str, str]]: + comment_lines: list[bytes], remove_default_text: bool = True +) -> Iterator[tuple[str, str]]: """Yields cleaned key = val pairs from stan csv comments. Removes '(Default)' text from values if remove_default_text is True.""" cleaned_lines = ( @@ -162,11 +162,11 @@ def extract_key_val_pairs( def parse_config( - comment_lines: List[bytes], -) -> Dict[str, Union[str, int, float]]: + comment_lines: list[bytes], +) -> dict[str, Union[str, int, float]]: """Extracts the key=value config settings from Stan CSV comment lines and returns a dictionary.""" - out: Dict[str, Union[str, int, float]] = {} + out: dict[str, Union[str, int, float]] = {} for key, val in extract_key_val_pairs(comment_lines): if key == 'file': if not val.endswith('csv'): @@ -188,25 +188,25 @@ def parse_config( return out -def parse_header(header: str) -> Tuple[str, ...]: +def parse_header(header: str) -> tuple[str, ...]: """Returns munged variable names from a Stan csv header line""" return tuple(munge_varname(name) for name in header.split(",")) def construct_config_header_dict( - comment_lines: List[bytes], header: Optional[str] -) -> Dict[str, Union[str, int, float, Tuple[str, ...]]]: + comment_lines: list[bytes], header: Optional[str] +) -> dict[str, Union[str, int, float, tuple[str, ...]]]: """Extracts config and header info from comment/draws lines parsed from a Stan CSV file.""" config = parse_config(comment_lines) - out: Dict[str, Union[str, int, float, Tuple[str, ...]]] = {**config} + out: dict[str, Union[str, int, float, tuple[str, ...]]] = {**config} if header: out["raw_header"] = header out["column_names"] = parse_header(header) return out -def parse_variational_eta(comment_lines: List[bytes]) -> float: +def parse_variational_eta(comment_lines: list[bytes]) -> float: """Extracts the variational eta parameter from stancsv comment lines""" for i, line in enumerate(comment_lines): if line.startswith(b"# Stepsize adaptation") and ( @@ -224,8 +224,8 @@ def parse_variational_eta(comment_lines: List[bytes]) -> float: def extract_max_treedepth_and_divergence_counts( - header: str, draws_lines: List[bytes], max_treedepth: int, warmup_draws: int -) -> Tuple[int, int]: + header: str, draws_lines: list[bytes], max_treedepth: int, warmup_draws: int +) -> tuple[int, int]: """Extracts the max treedepth and divergence counts from the header and draw lines of the MCMC stan csv output.""" if len(draws_lines) <= 1: # Empty draws @@ -265,12 +265,12 @@ def is_sneaky_fixed_param(header: str) -> bool: def count_warmup_and_sampling_draws( stan_csv: Union[str, os.PathLike, Iterator[bytes]], -) -> Tuple[int, int]: +) -> tuple[int, int]: """Scans through a Stan CSV file to count the number of lines in the warmup/sampling blocks to determine counts for warmup and sampling draws. """ - def determine_draw_counts(lines: Iterator[bytes]) -> Tuple[int, int]: + def determine_draw_counts(lines: Iterator[bytes]) -> tuple[int, int]: is_fixed_param = False header_line_idx = None adaptation_block_idx = None @@ -325,7 +325,7 @@ def determine_draw_counts(lines: Iterator[bytes]) -> Tuple[int, int]: def raise_on_inconsistent_draws_shape( - header: str, draw_lines: List[bytes] + header: str, draw_lines: list[bytes] ) -> None: """Throws a ValueError if any draws are found to have an inconsistent shape, i.e. too many/few columns compared to the header""" @@ -341,12 +341,12 @@ def column_count(ln: bytes) -> int: for i, draw in enumerate(draw_lines, start=1): if (draw_size := column_count(draw)) != num_cols: raise ValueError( - f"line {i}: bad draw, expecting {num_cols} items," - f" found {draw_size}" + f"line {i}: bad draw, expecting {num_cols} items, " + f"found {draw_size}" ) -def raise_on_invalid_adaptation_block(comment_lines: List[bytes]) -> None: +def raise_on_invalid_adaptation_block(comment_lines: list[bytes]) -> None: """Throws ValueErrors if the parsed adaptation block is invalid, e.g. the metric information is not present, consistent with the rest of the file, or the step size info cannot be processed.""" @@ -373,8 +373,7 @@ def column_count(ln: bytes) -> int: num, line = next(ln_iter) if not line.startswith(b"# Step size"): raise ValueError( - f"line {num}: expecting step size, " - f"found:\n\t \"{line.decode()}\"" + f"line {num}: expecting step size, found:\n\t \"{line.decode()}\"" ) _, step_size = line.split(b" = ") try: @@ -409,12 +408,12 @@ def column_count(ln: bytes) -> int: def parse_timing_lines( - comment_lines: List[bytes], -) -> Dict[str, float]: + comment_lines: list[bytes], +) -> dict[str, float]: """Parse the timing lines into a dictionary with key corresponding to the phase, e.g. Warm-up, Sampling, Total, and value the elapsed seconds """ - out: Dict[str, float] = {} + out: dict[str, float] = {} cleaned_lines = (ln.lstrip(b"# ") for ln in comment_lines) in_timing_block = False @@ -438,7 +437,7 @@ def check_sampler_csv( iter_warmup: int = _CMDSTAN_WARMUP, save_warmup: bool = False, thin: int = _CMDSTAN_THIN, -) -> Dict[str, Any]: +) -> dict[str, Any]: """Capture essential config, shape from stan_csv file.""" meta = parse_sampler_metadata_from_csv(path) if thin > _CMDSTAN_THIN: @@ -462,8 +461,8 @@ def check_sampler_csv( if save_warmup: if not ('save_warmup' in meta and meta['save_warmup'] == 1): raise ValueError( - f'Bad Stan CSV file {path}, ' - 'config error, expected save_warmup = 1' + f'Bad Stan CSV file {path}, config error, expected ' + 'save_warmup = 1' ) if meta['draws_warmup'] != draws_warmup: raise ValueError( @@ -475,7 +474,7 @@ def check_sampler_csv( def parse_sampler_metadata_from_csv( path: Union[str, os.PathLike], -) -> Dict[str, Union[int, float, str, Tuple[str, ...], Dict[str, float]]]: +) -> dict[str, Union[int, float, str, tuple[str, ...], dict[str, float]]]: """Parses sampling metadata from a given Stan CSV path for a sample run""" try: comments, header, draws = parse_comments_header_and_draws(path) @@ -505,14 +504,14 @@ def parse_sampler_metadata_from_csv( "Sampling": "sampling", "Total": "total", } - addtl: Dict[str, Union[int, Dict[str, float]]] = { + addtl: dict[str, Union[int, dict[str, float]]] = { "draws_warmup": num_warmup, "draws_sampling": num_sampling, "ct_divergences": divs, "ct_max_treedepth": max_tree_hits, "time": {key_renames[k]: v for k, v in timings.items()}, } - return {**config, **addtl} + return config | addtl def munge_varname(name: str) -> str: @@ -531,7 +530,7 @@ def munge_varname(name: str) -> str: return '.'.join(tuple_parts) -def read_metric(path: str) -> List[int]: +def read_metric(path: str) -> list[int]: """ Read metric file in JSON or Rdump format. Return dimensions of entry "inv_metric". @@ -544,20 +543,18 @@ def read_metric(path: str) -> List[int]: return list(dims_np.shape) else: raise ValueError( - 'metric file {}, bad or missing' - ' entry "inv_metric"'.format(path) + 'metric file {}, bad or missing entry "inv_metric"'.format(path) ) else: dims = list(read_rdump_metric(path)) if dims is None: raise ValueError( - 'metric file {}, bad or missing' - ' entry "inv_metric"'.format(path) + 'metric file {}, bad or missing entry "inv_metric"'.format(path) ) return dims -def read_rdump_metric(path: str) -> List[int]: +def read_rdump_metric(path: str) -> list[int]: """ Find dimensions of variable named 'inv_metric' in Rdump data file. """ @@ -572,7 +569,7 @@ def read_rdump_metric(path: str) -> List[int]: return list(metric_dict['inv_metric'].shape) -def rload(fname: str) -> Optional[Dict[str, Union[int, float, np.ndarray]]]: +def rload(fname: str) -> Optional[dict[str, Union[int, float, np.ndarray]]]: """Parse data and parameter variable values from an R dump format file. This parser only supports the subset of R dump data as described in the "Dump Data Format" section of the CmdStan manual, i.e., diff --git a/pyproject.toml b/pyproject.toml index c5a31161..ebc67b6a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ description = "Python interface to CmdStan" readme = "README.md" license = { text = "BSD-3-Clause" } authors = [{ name = "Stan Dev Team" }] -requires-python = ">=3.8" +requires-python = ">=3.9" dependencies = ["pandas", "numpy>=1.21", "tqdm", "stanio>=0.4.0,<2.0.0"] dynamic = ["version"] classifiers = [ @@ -88,6 +88,9 @@ warn_redundant_casts = true strict_equality = true disallow_untyped_calls = true +[tool.ruff.format] +quote-style = "preserve" + [[tool.mypy.overrides]] module = ['tqdm.auto', 'pandas'] ignore_missing_imports = true