diff --git a/pymc/backends/arviz.py b/pymc/backends/arviz.py index 63f8370523..44ccc0b9e4 100644 --- a/pymc/backends/arviz.py +++ b/pymc/backends/arviz.py @@ -58,10 +58,10 @@ Var = Any -def dict_to_dataset_drop_incompatible_coords(vars_dict, *args, dims, coords, **kwargs): +def dict_to_dataset_drop_incompatible_coords(vars_dict, *args, dims=None, coords=None, **kwargs): safe_coords = coords - if not RAISE_ON_INCOMPATIBLE_COORD_LENGTHS: + if dims and coords is not None and not RAISE_ON_INCOMPATIBLE_COORD_LENGTHS: coords_lengths = {k: len(v) for k, v in coords.items()} for var_name, var in vars_dict.items(): # Iterate in reversed because of chain/draw batch dimensions @@ -70,9 +70,8 @@ def dict_to_dataset_drop_incompatible_coords(vars_dict, *args, dims, coords, **k if (coord_length is not None) and (coord_length != dim_length): warnings.warn( f"Incompatible coordinate length of {coord_length} for dimension '{dim}' of variable '{var_name}'.\n" - "This usually happens when a sliced or concatenated variable is wrapped as a `pymc.dims.Deterministic`." - "The originate coordinates for this dim will not be included in the returned dataset for any of the variables. " - "Instead they will default to `np.arange(var_length)` and the shorter variables will be right-padded with nan.\n" + "The original coordinates for this dim will not be included in the returned dataset for any of the variables. " + "Instead they will default to `np.arange`, possibly right-padded with nan.\n" "To make this warning into an error set `pymc.backends.arviz.RAISE_ON_INCOMPATIBLE_COORD_LENGTHS` to `True`", UserWarning, ) @@ -303,14 +302,14 @@ def posterior_to_xarray(self): self.posterior_trace.get_values(var_name, combine=False, squeeze=False) ) return ( - dict_to_dataset( + dict_to_dataset_drop_incompatible_coords( data, library=pymc, coords=self.coords, dims=self.dims, attrs=self.attrs, ), - dict_to_dataset( + dict_to_dataset_drop_incompatible_coords( data_warmup, library=pymc, coords=self.coords, @@ -345,14 +344,14 @@ def sample_stats_to_xarray(self): ) return ( - dict_to_dataset( + dict_to_dataset_drop_incompatible_coords( data, library=pymc, dims=None, coords=self.coords, attrs=self.attrs, ), - dict_to_dataset( + dict_to_dataset_drop_incompatible_coords( data_warmup, library=pymc, dims=None, @@ -366,7 +365,7 @@ def posterior_predictive_to_xarray(self): """Convert posterior_predictive samples to xarray.""" data = self.posterior_predictive dims = {var_name: self.sample_dims + self.dims.get(var_name, []) for var_name in data} - return dict_to_dataset( + return dict_to_dataset_drop_incompatible_coords( data, library=pymc, coords=self.coords, dims=dims, default_dims=self.sample_dims ) @@ -375,7 +374,7 @@ def predictions_to_xarray(self): """Convert predictions (out of sample predictions) to xarray.""" data = self.predictions dims = {var_name: self.sample_dims + self.dims.get(var_name, []) for var_name in data} - return dict_to_dataset( + return dict_to_dataset_drop_incompatible_coords( data, library=pymc, coords=self.coords, dims=dims, default_dims=self.sample_dims ) @@ -412,7 +411,7 @@ def observed_data_to_xarray(self): """Convert observed data to xarray.""" if self.predictions: return None - return dict_to_dataset( + return dict_to_dataset_drop_incompatible_coords( self.observations, library=pymc, coords=self.coords, @@ -427,7 +426,7 @@ def constant_data_to_xarray(self): if not constant_data: return None - xarray_dataset = dict_to_dataset( + xarray_dataset = dict_to_dataset_drop_incompatible_coords( constant_data, library=pymc, coords=self.coords, @@ -705,7 +704,7 @@ def apply_function_over_dataset( ) ) - return dict_to_dataset( + return dict_to_dataset_drop_incompatible_coords( out_trace, library=pymc, dims=dims, diff --git a/pymc/sampling/mcmc.py b/pymc/sampling/mcmc.py index de341c68cd..508a574535 100644 --- a/pymc/sampling/mcmc.py +++ b/pymc/sampling/mcmc.py @@ -33,7 +33,7 @@ import numpy as np import pytensor.gradient as tg -from arviz import InferenceData, dict_to_dataset +from arviz import InferenceData from arviz.data.base import make_attrs from pytensor.graph.basic import Variable from rich.theme import Theme @@ -45,6 +45,7 @@ from pymc.backends import RunType, TraceOrBackend, init_traces from pymc.backends.arviz import ( coords_and_dims_for_inferencedata, + dict_to_dataset_drop_incompatible_coords, find_constants, find_observations, ) @@ -355,14 +356,14 @@ def _sample_external_nuts( # Temporary work-around. Revert once https://github.com/pymc-devs/nutpie/issues/74 is fixed # gather observed and constant data as nutpie.sample() has no access to the PyMC model coords, dims = coords_and_dims_for_inferencedata(model) - constant_data = dict_to_dataset( + constant_data = dict_to_dataset_drop_incompatible_coords( find_constants(model), library=pm, coords=coords, dims=dims, default_dims=[], ) - observed_data = dict_to_dataset( + observed_data = dict_to_dataset_drop_incompatible_coords( find_observations(model), library=pm, coords=coords, diff --git a/pymc/smc/sampling.py b/pymc/smc/sampling.py index 5afd398281..8834d1d226 100644 --- a/pymc/smc/sampling.py +++ b/pymc/smc/sampling.py @@ -33,7 +33,7 @@ import pymc -from pymc.backends.arviz import dict_to_dataset, to_inference_data +from pymc.backends.arviz import dict_to_dataset_drop_incompatible_coords, to_inference_data from pymc.backends.base import MultiTrace from pymc.distributions.custom import CustomDistRV, CustomSymbolicDistRV from pymc.distributions.distribution import _support_point @@ -264,7 +264,7 @@ def _save_sample_stats( else: sample_stats_dict[stat] = np.array(value) - sample_stats = dict_to_dataset( + sample_stats = dict_to_dataset_drop_incompatible_coords( sample_stats_dict, attrs=sample_settings_dict, library=pymc, diff --git a/tests/backends/test_arviz.py b/tests/backends/test_arviz.py index 85c1d9915c..dfbad7da17 100644 --- a/tests/backends/test_arviz.py +++ b/tests/backends/test_arviz.py @@ -213,15 +213,16 @@ def test_predictions_to_idata_new(self, data, eight_schools_params): def test_posterior_predictive_keep_size(self, data, chains, draws, eight_schools_params): with data.model: - posterior_predictive = pm.sample_posterior_predictive( - data.obj, return_inferencedata=False - ) - inference_data = to_inference_data( - trace=data.obj, - posterior_predictive=posterior_predictive, - coords={"school": np.arange(eight_schools_params["J"])}, - dims={"theta": ["school"], "eta": ["school"]}, - ) + with pytest.warns(UserWarning, match="Incompatible coordinate length"): + posterior_predictive = pm.sample_posterior_predictive( + data.obj, return_inferencedata=False + ) + inference_data = to_inference_data( + trace=data.obj, + posterior_predictive=posterior_predictive, + coords={"school": np.arange(eight_schools_params["J"])}, + dims={"theta": ["school"], "eta": ["school"]}, + ) shape = inference_data.posterior_predictive.obs.shape assert np.all( @@ -236,7 +237,8 @@ def test_posterior_predictive_thinned(self, data): warnings.filterwarnings("ignore", ".*number of samples.*", UserWarning) idata = pm.sample(tune=5, draws=draws, chains=2, return_inferencedata=True) thinned_idata = idata.sel(draw=slice(None, None, thin_by)) - idata.extend(pm.sample_posterior_predictive(thinned_idata)) + with pytest.warns(UserWarning, match="Incompatible coordinate length"): + idata.extend(pm.sample_posterior_predictive(thinned_idata)) test_dict = { "posterior": ["mu", "tau", "eta", "theta"], "sample_stats": ["diverging", "lp", "~log_likelihood"], @@ -851,7 +853,16 @@ def test_zero_size(self): assert pl[0]["x"].dtype == np.float64 -def test_incompatible_coordinate_lengths(): +@pytest.mark.parametrize( + "sampling_method", + ( + lambda: pm.sample_prior_predictive(draws=1).prior, + lambda: pm.sample( + chains=1, draws=1, tune=0, compute_convergence_checks=False, progressbar=False + ).posterior, + ), +) +def test_incompatible_coordinate_lengths(sampling_method): with pm.Model(coords={"a": [-1, -2, -3]}) as m: x = pm.Normal("x", dims="a") y = pm.Deterministic("y", x[1:], dims=("a",)) @@ -862,7 +873,7 @@ def test_incompatible_coordinate_lengths(): "Incompatible coordinate length of 3 for dimension 'a' of variable 'y'" ), ): - prior = pm.sample_prior_predictive(draws=1).prior.squeeze(("chain", "draw")) + prior = sampling_method().squeeze(("chain", "draw")) assert prior.x.dims == prior.y.dims == ("a",) assert prior.x.shape == prior.y.shape == (3,) assert np.isnan(prior.y.values[-1]) @@ -870,6 +881,6 @@ def test_incompatible_coordinate_lengths(): pm.backends.arviz.RAISE_ON_INCOMPATIBLE_COORD_LENGTHS = True with pytest.raises(ValueError): - pm.sample_prior_predictive(draws=1) + sampling_method() pm.backends.arviz.RAISE_ON_INCOMPATIBLE_COORD_LENGTHS = False diff --git a/tests/distributions/test_timeseries.py b/tests/distributions/test_timeseries.py index e9d3e76159..63424db211 100644 --- a/tests/distributions/test_timeseries.py +++ b/tests/distributions/test_timeseries.py @@ -951,7 +951,8 @@ def _gen_sde_path(sde, pars, dt, n, x0): with model: trace = sample(chains=1, random_seed=numpy_rng) - ppc = sample_posterior_predictive(trace, model=model, random_seed=numpy_rng) + with pytest.warns(UserWarning, match="Incompatible coordinate length"): + ppc = sample_posterior_predictive(trace, model=model, random_seed=numpy_rng) p95 = [2.5, 97.5] lo, hi = np.percentile(trace.posterior["lamh"], p95, axis=[0, 1]) diff --git a/tests/sampling/test_forward.py b/tests/sampling/test_forward.py index 3dd30e14f7..43f003ffde 100644 --- a/tests/sampling/test_forward.py +++ b/tests/sampling/test_forward.py @@ -1134,8 +1134,10 @@ def test_observed_data_needed_in_pp(self): with m: pm.set_data({"x_data": new_x_data}, coords=new_coords) + pm.backends.arviz.RAISE_ON_INCOMPATIBLE_COORD_LENGTHS = True with pytest.raises(ValueError, match="conflicting sizes for dimension 'trial'"): pm.sample_posterior_predictive(fake_idata, predictions=True, progressbar=False) + pm.backends.arviz.RAISE_ON_INCOMPATIBLE_COORD_LENGTHS = False new_y_data = np.random.normal(size=(2,)) with m: