Fix more typing problems

michaelosthege · michaelosthege · commit 08b6b01e825d · 2022-03-15T11:14:43.000+01:00
diff --git a/pymc/aesaraf.py b/pymc/aesaraf.py
@@ -941,7 +941,9 @@ def local_check_parameter_to_ninf_switch(fgraph, node):
 )
 
 
-def compile_pymc(inputs, outputs, mode=None, **kwargs):
+def compile_pymc(
+    inputs, outputs, mode=None, **kwargs
+) -> Callable[..., Union[np.ndarray, List[np.ndarray]]]:
     """Use ``aesara.function`` with specialized pymc rewrites always enabled.
 
     Included rewrites
diff --git a/pymc/backends/arviz.py b/pymc/backends/arviz.py
@@ -7,6 +7,7 @@
     Any,
     Dict,
     Iterable,
+    Mapping,
     Optional,
     Tuple,
     Union,
@@ -532,8 +533,8 @@ def to_inference_data(self):
 def to_inference_data(
     trace: Optional["MultiTrace"] = None,
     *,
-    prior: Optional[Dict[str, Any]] = None,
-    posterior_predictive: Optional[Dict[str, Any]] = None,
+    prior: Optional[Mapping[str, Any]] = None,
+    posterior_predictive: Optional[Mapping[str, Any]] = None,
     log_likelihood: Union[bool, Iterable[str]] = True,
     coords: Optional[CoordSpec] = None,
     dims: Optional[DimSpec] = None,
diff --git a/pymc/func_utils.py b/pymc/func_utils.py
@@ -13,7 +13,7 @@
 #   limitations under the License.
 import warnings
 
-from typing import Dict, Optional
+from typing import Callable, Dict, Optional, Union
 
 import aesara.tensor as aet
 import numpy as np
@@ -129,6 +129,7 @@ def find_constrained_prior(
     cdf_error = (pm.math.exp(logcdf_upper) - pm.math.exp(logcdf_lower)) - mass
     cdf_error_fn = pm.aesaraf.compile_pymc([dist_params], cdf_error, allow_input_downcast=True)
 
+    jac: Union[str, Callable]
     try:
         aesara_jac = pm.gradient(cdf_error, [dist_params])
         jac = pm.aesaraf.compile_pymc([dist_params], aesara_jac, allow_input_downcast=True)
diff --git a/pymc/model.py b/pymc/model.py
@@ -1443,8 +1443,9 @@ def add_random_variable(self, var, dims: Optional[Tuple[Union[str, None], ...]]
         if dims is not None:
             if isinstance(dims, str):
                 dims = (dims,)
-            if any(dim not in self.coords and dim is not None for dim in dims):
-                raise ValueError(f"Dimension {dim} is not specified in `coords`.")
+            for dim in dims:
+                if dim not in self.coords and dim is not None:
+                    raise ValueError(f"Dimension {dim} is not specified in `coords`.")
             if any(var.name == dim for dim in dims):
                 raise ValueError(f"Variable `{var.name}` has the same name as its dimension label.")
             self._RV_dims[var.name] = dims
diff --git a/pymc/sampling.py b/pymc/sampling.py
@@ -24,6 +24,8 @@
 from collections import defaultdict
 from copy import copy
 from typing import (
+    Any,
+    Callable,
     Dict,
     Iterable,
     Iterator,
@@ -811,12 +813,16 @@ def _sample(
 
     trace = copy(trace)
 
-    sampling = _iter_sample(draws, step, start, trace, chain, tune, model, random_seed, callback)
+    sampling_gen = _iter_sample(
+        draws, step, start, trace, chain, tune, model, random_seed, callback
+    )
     _pbar_data = {"chain": chain, "divergences": 0}
     _desc = "Sampling chain {chain:d}, {divergences:,d} divergences"
     if progressbar:
-        sampling = progress_bar(sampling, total=draws, display=progressbar)
+        sampling = progress_bar(sampling_gen, total=draws, display=progressbar)
         sampling.comment = _desc.format(**_pbar_data)
+    else:
+        sampling = sampling_gen
     try:
         strace = None
         for it, (strace, diverging) in enumerate(sampling):
@@ -826,6 +832,8 @@ def _sample(
                     sampling.comment = _desc.format(**_pbar_data)
     except KeyboardInterrupt:
         pass
+    if strace is None:
+        raise Exception("KeyboardInterrupt happened before the base trace was created.")
     return strace
 
 
@@ -1494,10 +1502,12 @@ def _choose_chains(traces: Sequence[BaseTrace], tune: int) -> Tuple[List[BaseTra
     idxs = np.argsort(lengths)
     l_sort = np.array(lengths)[idxs]
 
-    use_until = np.argmax(l_sort * np.arange(1, l_sort.shape[0] + 1)[::-1])
+    use_until = cast(int, np.argmax(l_sort * np.arange(1, l_sort.shape[0] + 1)[::-1]))
     final_length = l_sort[use_until]
 
-    return [traces[idx] for idx in idxs[use_until:]], final_length + tune
+    take_idx = cast(Sequence[int], idxs[use_until:])
+    sliced_traces = [traces[idx] for idx in take_idx]
+    return sliced_traces, final_length + tune
 
 
 def stop_tuning(step):
@@ -1590,30 +1600,30 @@ def sample_posterior_predictive(
     """
 
     _trace: Union[MultiTrace, PointList]
+    nchain: int
     if isinstance(trace, InferenceData):
         _trace = dataset_to_point_list(trace.posterior)
+        nchain, len_trace = chains_and_samples(trace)
     elif isinstance(trace, xarray.Dataset):
         _trace = dataset_to_point_list(trace)
-    else:
+        nchain, len_trace = chains_and_samples(trace)
+    elif isinstance(trace, MultiTrace):
         _trace = trace
+        nchain = _trace.nchains
+        len_trace = len(_trace)
+    elif isinstance(trace, list) and all(isinstance(x, dict) for x in trace):
+        _trace = trace
+        nchain = 1
+        len_trace = len(_trace)
+    else:
+        raise TypeError(f"Unsupported type for `trace` argument: {type(trace)}.")
 
     if keep_size is None:
         # This will allow users to set return_inferencedata=False and
         # automatically get the old behaviour instead of needing to
         # set both return_inferencedata and keep_size to False
         keep_size = return_inferencedata
 
-    nchain: int
-    len_trace: int
-    if isinstance(trace, (InferenceData, xarray.Dataset)):
-        nchain, len_trace = chains_and_samples(trace)
-    else:
-        len_trace = len(_trace)
-        try:
-            nchain = _trace.nchains
-        except AttributeError:
-            nchain = 1
-
     if keep_size and samples is not None:
         raise IncorrectArgumentsError(
             "Should not specify both keep_size and samples arguments. "
@@ -1625,7 +1635,7 @@ def sample_posterior_predictive(
     if samples is None:
         if isinstance(_trace, MultiTrace):
             samples = sum(len(v) for v in _trace._straces.values())
-        elif isinstance(_trace, list) and all(isinstance(x, dict) for x in _trace):
+        elif isinstance(_trace, list):
             # this is a list of points
             samples = len(_trace)
         else:
@@ -1693,6 +1703,7 @@ def sample_posterior_predictive(
         else:
             inputs, input_names = [], []
     else:
+        assert isinstance(_trace, MultiTrace)
         output_names = [v.name for v in vars_to_sample if v.name is not None]
         input_names = [
             n
@@ -1715,7 +1726,7 @@ def sample_posterior_predictive(
 
     ppc_trace_t = _DefaultTrace(samples)
     try:
-        if hasattr(_trace, "_straces"):
+        if isinstance(_trace, MultiTrace):
             # trace dict is unordered, but we want to return ppc samples in
             # a predictable ordering, so sort the chain indices
             chain_idx_mapping = sorted(_trace._straces.keys())
@@ -1750,7 +1761,7 @@ def sample_posterior_predictive(
 
     if not return_inferencedata:
         return ppc_trace
-    ikwargs = dict(model=model)
+    ikwargs: Dict[str, Any] = dict(model=model)
     if idata_kwargs:
         ikwargs.update(idata_kwargs)
     if predictions:
@@ -1881,8 +1892,8 @@ def sample_posterior_predictive_w(
         indices = np.random.randint(0, nchain * len_trace, j)
         if nchain > 1:
             chain_idx, point_idx = np.divmod(indices, len_trace)
-            for idx in zip(chain_idx, point_idx):
-                trace.append(tr._straces[idx[0]].point(idx[1]))
+            for cidx, pidx in zip(chain_idx, point_idx):
+                trace.append(tr._straces[cidx].point(pidx))
         else:
             for idx in indices:
                 trace.append(tr[idx])
@@ -1892,12 +1903,12 @@ def sample_posterior_predictive_w(
 
     lengths = list({np.atleast_1d(observed).shape for observed in obs})
 
+    size: List[Optional[Tuple[int, ...]]] = []
     if len(lengths) == 1:
-        size = [None for i in variables]
+        size = [None] * len(variables)
     elif len(lengths) > 2:
         raise ValueError("Observed variables could not be broadcast together")
     else:
-        size = []
         x = np.zeros(shape=lengths[0])
         y = np.zeros(shape=lengths[1])
         b = np.broadcast(x, y)
@@ -1919,7 +1930,7 @@ def sample_posterior_predictive_w(
         indices = progress_bar(indices, total=samples, display=progressbar)
 
     try:
-        ppc = defaultdict(list)
+        ppcl: Dict[str, list] = defaultdict(list)
         for idx in indices:
             param = trace[idx]
             var = variables[idx]
@@ -1932,13 +1943,13 @@ def sample_posterior_predictive_w(
     except KeyboardInterrupt:
         pass
     else:
-        ppc = {k: np.asarray(v) for k, v in ppc.items()}
+        ppcd = {k: np.asarray(v) for k, v in ppcl.items()}
         if not return_inferencedata:
-            return ppc
-        ikwargs = dict(model=models)
+            return ppcd
+        ikwargs: Dict[str, Any] = dict(model=models)
         if idata_kwargs:
             ikwargs.update(idata_kwargs)
-        return pm.to_inference_data(posterior_predictive=ppc, **ikwargs)
+        return pm.to_inference_data(posterior_predictive=ppcd, **ikwargs)
 
 
 def sample_prior_predictive(
@@ -2044,7 +2055,7 @@ def sample_prior_predictive(
 
     if not return_inferencedata:
         return prior
-    ikwargs = dict(model=model)
+    ikwargs: Dict[str, Any] = dict(model=model)
     if idata_kwargs:
         ikwargs.update(idata_kwargs)
     return pm.to_inference_data(prior=prior, **ikwargs)
@@ -2106,10 +2117,11 @@ def draw(
 
     # Single variable output
     if not isinstance(vars, (list, tuple)):
-        drawn_values = (draw_fn() for _ in range(draws))
-        return np.stack(drawn_values)
+        cast(Callable[[], np.ndarray], draw_fn)
+        return np.stack([draw_fn() for _ in range(draws)])
 
     # Multiple variable output
+    cast(Callable[[], List[np.ndarray]], draw_fn)
     drawn_values = zip(*(draw_fn() for _ in range(draws)))
     return [np.stack(v) for v in drawn_values]
 
@@ -2120,7 +2132,7 @@ def _init_jitter(
     seeds: Sequence[int],
     jitter: bool,
     jitter_max_retries: int,
-) -> PointType:
+) -> List[PointType]:
     """Apply a uniform jitter in [-1, 1] to the test value as starting point in each chain.
 
     ``model.check_start_vals`` is used to test whether the jittered starting
@@ -2144,7 +2156,7 @@ def _init_jitter(
     ipfns = make_initial_point_fns_per_chain(
         model=model,
         overrides=initvals,
-        jitter_rvs=set(model.free_RVs) if jitter else {},
+        jitter_rvs=set(model.free_RVs) if jitter else set(),
         chains=len(seeds),
     )
 
@@ -2282,6 +2294,7 @@ def init_nuts(
 
     apoints = [DictToArrayBijection.map(point) for point in initial_points]
     apoints_data = [apoint.data for apoint in apoints]
+    potential: quadpotential.QuadPotential
 
     if init == "adapt_diag":
         mean = np.mean(apoints_data, axis=0)
diff --git a/pymc/smc/smc.py b/pymc/smc/smc.py
@@ -14,7 +14,7 @@
 import abc
 
 from abc import ABC
-from typing import Dict
+from typing import Dict, cast
 
 import aesara.tensor as at
 import numpy as np
@@ -173,15 +173,15 @@ def __init__(
         self.resampling_indexes = None
         self.weights = np.ones(self.draws) / self.draws
 
-    def initialize_population(self) -> Dict[str, NDArray]:
+    def initialize_population(self) -> Dict[str, np.ndarray]:
         """Create an initial population from the prior distribution"""
-
-        return sample_prior_predictive(
+        result = sample_prior_predictive(
             self.draws,
             var_names=[v.name for v in self.model.unobserved_value_vars],
             model=self.model,
             return_inferencedata=False,
         )
+        return cast(Dict[str, np.ndarray], result)
 
     def _initialize_kernel(self):
         """Create variables and logp function necessary to run kernel
diff --git a/pymc/step_methods/arraystep.py b/pymc/step_methods/arraystep.py
@@ -50,7 +50,7 @@ class Competence(IntEnum):
 class BlockedStep(ABC):
 
     generates_stats = False
-    stats_dtypes: List[Dict[str, np.dtype]] = []
+    stats_dtypes: List[Dict[str, type]] = []
     vars: List[Variable] = []
 
     def __new__(cls, *args, **kwargs):
diff --git a/pymc/tests/test_sampling.py b/pymc/tests/test_sampling.py
@@ -635,7 +635,7 @@ def test_exceptions(self, caplog):
 
             # test wrong type argument
             bad_trace = {"mu": stats.norm.rvs(size=1000)}
-            with pytest.raises(TypeError):
+            with pytest.raises(TypeError, match="type for `trace`"):
                 ppc = pm.sample_posterior_predictive(bad_trace)
 
     def test_vector_observed(self):
diff --git a/scripts/run_mypy.py b/scripts/run_mypy.py
@@ -50,15 +50,19 @@
 pymc/ode/utils.py
 pymc/parallel_sampling.py
 pymc/plots/__init__.py
+pymc/sampling.py
 pymc/smc/__init__.py
 pymc/smc/sample_smc.py
+pymc/smc/smc.py
 pymc/stats/__init__.py
 pymc/step_methods/__init__.py
 pymc/step_methods/compound.py
 pymc/step_methods/elliptical_slice.py
 pymc/step_methods/hmc/__init__.py
 pymc/step_methods/hmc/base_hmc.py
+pymc/step_methods/hmc/hmc.py
 pymc/step_methods/hmc/integration.py
+pymc/step_methods/hmc/nuts.py
 pymc/step_methods/hmc/quadpotential.py
 pymc/step_methods/slicer.py
 pymc/step_methods/step_sizes.py
@@ -159,6 +163,9 @@ def check_no_unexpected_results(mypy_lines: Iterator[str]):
         print(f"{len(unexpected_passing)} files unexpectedly passed the type checks:")
         print("\n".join(sorted(map(str, unexpected_passing))))
         print("This is good news! Go to scripts/run-mypy.py and add them to the list.")
+        if all_files.issubset(passing):
+            print("WOW! All files are passing the mypy type checks!")
+            print("scripts\\run_mypy.py may no longer be needed.")
         sys.exit(1)
     return
 

Original file line number	Diff line number	Diff line change
`@@ -941,7 +941,9 @@ def local_check_parameter_to_ninf_switch(fgraph, node):`
`941`	`941`	`)`
`942`	`942`
`943`	`943`
`944`		`-def compile_pymc(inputs, outputs, mode=None, **kwargs):`
	`944`	`+def compile_pymc(`
	`945`	`+ inputs, outputs, mode=None, **kwargs`
	`946`	`+) -> Callable[..., Union[np.ndarray, List[np.ndarray]]]:`
`945`	`947`	"""Use ``aesara.function`` with specialized pymc rewrites always enabled.
`946`	`948`
`947`	`949`	`Included rewrites`