Skip to content

Commit 1ffbc2c

Browse files
committed
Make coords and data always mutable
1 parent 9f15330 commit 1ffbc2c

21 files changed

+318
-440
lines changed

pymc/data.py

Lines changed: 16 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -262,29 +262,25 @@ def ConstantData(
262262
*,
263263
dims: Optional[Sequence[str]] = None,
264264
coords: Optional[dict[str, Union[Sequence, np.ndarray]]] = None,
265-
export_index_as_coords=False,
266265
infer_dims_and_coords=False,
267266
**kwargs,
268267
) -> TensorConstant:
269-
"""Alias for ``pm.Data(..., mutable=False)``.
268+
"""Alias for ``pm.Data``.
270269
271270
Registers the ``value`` as a :class:`~pytensor.tensor.TensorConstant` with the model.
272271
For more information, please reference :class:`pymc.Data`.
273272
"""
274-
if export_index_as_coords:
275-
infer_dims_and_coords = export_index_as_coords
276-
warnings.warn(
277-
"Deprecation warning: 'export_index_as_coords; is deprecated and will be removed in future versions. Please use 'infer_dims_and_coords' instead.",
278-
DeprecationWarning,
279-
)
273+
warnings.warn(
274+
"ConstantData is deprecated. All Data variables are now mutable. Use Data instead.",
275+
FutureWarning,
276+
)
280277

281278
var = Data(
282279
name,
283280
value,
284281
dims=dims,
285282
coords=coords,
286283
infer_dims_and_coords=infer_dims_and_coords,
287-
mutable=False,
288284
**kwargs,
289285
)
290286
return cast(TensorConstant, var)
@@ -296,29 +292,25 @@ def MutableData(
296292
*,
297293
dims: Optional[Sequence[str]] = None,
298294
coords: Optional[dict[str, Union[Sequence, np.ndarray]]] = None,
299-
export_index_as_coords=False,
300295
infer_dims_and_coords=False,
301296
**kwargs,
302297
) -> SharedVariable:
303-
"""Alias for ``pm.Data(..., mutable=True)``.
298+
"""Alias for ``pm.Data``.
304299
305300
Registers the ``value`` as a :class:`~pytensor.compile.sharedvalue.SharedVariable`
306301
with the model. For more information, please reference :class:`pymc.Data`.
307302
"""
308-
if export_index_as_coords:
309-
infer_dims_and_coords = export_index_as_coords
310-
warnings.warn(
311-
"Deprecation warning: 'export_index_as_coords; is deprecated and will be removed in future versions. Please use 'infer_dims_and_coords' instead.",
312-
DeprecationWarning,
313-
)
303+
warnings.warn(
304+
"MutableData is deprecated. All Data variables are now mutable. Use Data instead.",
305+
FutureWarning,
306+
)
314307

315308
var = Data(
316309
name,
317310
value,
318311
dims=dims,
319312
coords=coords,
320313
infer_dims_and_coords=infer_dims_and_coords,
321-
mutable=True,
322314
**kwargs,
323315
)
324316
return cast(SharedVariable, var)
@@ -330,7 +322,6 @@ def Data(
330322
*,
331323
dims: Optional[Sequence[str]] = None,
332324
coords: Optional[dict[str, Union[Sequence, np.ndarray]]] = None,
333-
export_index_as_coords=False,
334325
infer_dims_and_coords=False,
335326
mutable: Optional[bool] = None,
336327
**kwargs,
@@ -373,15 +364,6 @@ def Data(
373364
infer_dims_and_coords : bool, default=False
374365
If True, the ``Data`` container will try to infer what the coordinates
375366
and dimension names should be if there is an index in ``value``.
376-
mutable : bool, optional
377-
Switches between creating a :class:`~pytensor.compile.sharedvalue.SharedVariable`
378-
(``mutable=True``) vs. creating a :class:`~pytensor.tensor.TensorConstant`
379-
(``mutable=False``).
380-
Consider using :class:`pymc.ConstantData` or :class:`pymc.MutableData` as less
381-
verbose alternatives to ``pm.Data(..., mutable=...)``.
382-
If this parameter is not specified, the value it takes will depend on the
383-
version of the package. Since ``v4.1.0`` the default value is
384-
``mutable=False``, with previous versions having ``mutable=True``.
385367
**kwargs : dict, optional
386368
Extra arguments passed to :func:`pytensor.shared`.
387369
@@ -394,7 +376,7 @@ def Data(
394376
>>> observed_data = [mu + np.random.randn(20) for mu in true_mu]
395377
396378
>>> with pm.Model() as model:
397-
... data = pm.MutableData('data', observed_data[0])
379+
... data = pm.Data('data', observed_data[0])
398380
... mu = pm.Normal('mu', 0, 10)
399381
... pm.Normal('y', mu=mu, sigma=1, observed=data)
400382
@@ -430,19 +412,12 @@ def Data(
430412
"Pass them directly to `observed` if you want to trigger auto-imputation"
431413
)
432414

433-
if mutable is None:
415+
if mutable is not None:
434416
warnings.warn(
435-
"The `mutable` kwarg was not specified. Before v4.1.0 it defaulted to `pm.Data(mutable=True)`,"
436-
" which is equivalent to using `pm.MutableData()`."
437-
" In v4.1.0 the default changed to `pm.Data(mutable=False)`, equivalent to `pm.ConstantData`."
438-
" Use `pm.ConstantData`/`pm.MutableData` or pass `pm.Data(..., mutable=False/True)` to avoid this warning.",
439-
UserWarning,
417+
"Data is now always mutable. Specifying the `mutable` kwarg will raise an error in a future release",
418+
FutureWarning,
440419
)
441-
mutable = False
442-
if mutable:
443-
x = pytensor.shared(arr, name, **kwargs)
444-
else:
445-
x = pt.as_tensor_variable(arr, name, **kwargs)
420+
x = pytensor.shared(arr, name, **kwargs)
446421

447422
if isinstance(dims, str):
448423
dims = (dims,)
@@ -453,24 +428,11 @@ def Data(
453428
expected=x.ndim,
454429
)
455430

456-
# Optionally infer coords and dims from the input value.
457-
if export_index_as_coords:
458-
infer_dims_and_coords = export_index_as_coords
459-
warnings.warn(
460-
"Deprecation warning: 'export_index_as_coords; is deprecated and will be removed in future versions. Please use 'infer_dims_and_coords' instead.",
461-
DeprecationWarning,
462-
)
463-
464431
if infer_dims_and_coords:
465432
coords, dims = determine_coords(model, value, dims)
466433

467434
if dims:
468-
if not mutable:
469-
# Use the dimension lengths from the before it was tensorified.
470-
# These can still be tensors, but in many cases they are numeric.
471-
xshape = np.shape(arr)
472-
else:
473-
xshape = x.shape
435+
xshape = x.shape
474436
# Register new dimension lengths
475437
for d, dname in enumerate(dims):
476438
if dname not in model.dim_lengths:
@@ -479,7 +441,6 @@ def Data(
479441
# Note: Coordinate values can't be taken from
480442
# the value, because it could be N-dimensional.
481443
values=coords.get(dname, None),
482-
mutable=mutable,
483444
length=xshape[d],
484445
)
485446

pymc/gp/hsgp_approx.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ def prior_linearized(self, Xs: TensorLike):
252252
eigenfunctions `phi`, and the square root of the power spectral density.
253253
254254
Correct results when using `prior_linearized` in tandem with `pm.set_data` and
255-
`pm.MutableData` require two conditions. First, one must specify `L` instead of `c` when
255+
`pm.Data` require two conditions. First, one must specify `L` instead of `c` when
256256
the GP is constructed. If not, a RuntimeError is raised. Second, the `Xs` needs to be
257257
zero-centered, so its mean must be subtracted. An example is given below.
258258
@@ -290,7 +290,7 @@ def prior_linearized(self, Xs: TensorLike):
290290
# First calculate the mean, then make X a shared variable, then subtract the mean.
291291
# When X is mutated later, the correct mean will be subtracted.
292292
X_mean = np.mean(X, axis=0)
293-
X = pm.MutableData("X", X)
293+
X = pm.Data("X", X)
294294
Xs = X - X_mean
295295
296296
# Pass the zero-subtracted Xs in to the GP

pymc/model/core.py

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,12 @@ def __init__(
515515
self.name = self._validate_name(name)
516516
self.check_bounds = check_bounds
517517

518+
if coords_mutable is not None:
519+
warnings.warn(
520+
"All coords are now mutable by default. coords_mutable will be removed in a future release.",
521+
FutureWarning,
522+
)
523+
518524
if self.parent is not None:
519525
self.named_vars = treedict(parent=self.parent.named_vars)
520526
self.named_vars_to_dims = treedict(parent=self.parent.named_vars_to_dims)
@@ -951,7 +957,7 @@ def add_coord(
951957
self,
952958
name: str,
953959
values: Optional[Sequence] = None,
954-
mutable: bool = False,
960+
mutable: Optional[bool] = None,
955961
*,
956962
length: Optional[Union[int, Variable]] = None,
957963
):
@@ -972,6 +978,12 @@ def add_coord(
972978
A scalar of the dimensions length.
973979
Defaults to ``pytensor.tensor.constant(len(values))``.
974980
"""
981+
if mutable is not None:
982+
warnings.warn(
983+
"Coords are now always mutable. Specifying `mutable` will raise an error in a future release",
984+
FutureWarning,
985+
)
986+
975987
if name in {"draw", "chain", "__sample__"}:
976988
raise ValueError(
977989
"Dimensions can not be named `draw`, `chain` or `__sample__`, "
@@ -995,10 +1007,7 @@ def add_coord(
9951007
if length is None:
9961008
length = len(values)
9971009
if not isinstance(length, Variable):
998-
if mutable:
999-
length = pytensor.shared(length, name=name)
1000-
else:
1001-
length = pytensor.tensor.constant(length)
1010+
length = pytensor.shared(length, name=name)
10021011
assert length.type.ndim == 0
10031012
self._dim_lengths[name] = length
10041013
self._coords[name] = values
@@ -1029,8 +1038,6 @@ def set_dim(self, name: str, new_length: int, coord_values: Optional[Sequence] =
10291038
coord_values : array_like, optional
10301039
Optional sequence of coordinate values.
10311040
"""
1032-
if not isinstance(self.dim_lengths[name], SharedVariable):
1033-
raise ValueError(f"The dimension '{name}' is immutable.")
10341041
if coord_values is None and self.coords.get(name, None) is not None:
10351042
raise ValueError(
10361043
f"'{name}' has coord values. Pass `set_dim(..., coord_values=...)` to update them."
@@ -1079,7 +1086,7 @@ def set_data(
10791086
):
10801087
"""Changes the values of a data variable in the model.
10811088
1082-
In contrast to pm.MutableData().set_value, this method can also
1089+
In contrast to pm.Data().set_value, this method can also
10831090
update the corresponding coordinates.
10841091
10851092
Parameters
@@ -1097,7 +1104,7 @@ def set_data(
10971104
if not isinstance(shared_object, SharedVariable):
10981105
raise TypeError(
10991106
f"The variable `{name}` must be a `SharedVariable`"
1100-
" (created through `pm.MutableData()` or `pm.Data(mutable=True)`) to allow updating. "
1107+
" (created through `pm.Data()` or `pm.Data(mutable=True)`) to allow updating. "
11011108
f"The current type is: {type(shared_object)}"
11021109
)
11031110

@@ -1114,15 +1121,16 @@ def set_data(
11141121

11151122
for d, dname in enumerate(dims):
11161123
length_tensor = self.dim_lengths[dname]
1117-
old_length = length_tensor.eval()
1124+
with pytensor.config.change_flags(cxx=""):
1125+
old_length = length_tensor.eval()
11181126
new_length = values.shape[d]
11191127
original_coords = self.coords.get(dname, None)
11201128
new_coords = coords.get(dname, None)
11211129

11221130
length_changed = new_length != old_length
11231131

11241132
# Reject resizing if we already know that it would create shape problems.
1125-
# NOTE: If there are multiple pm.MutableData containers sharing this dim, but the user only
1133+
# NOTE: If there are multiple pm.Data containers sharing this dim, but the user only
11261134
# changes the values for one of them, they will run into shape problems nonetheless.
11271135
if length_changed:
11281136
if original_coords is not None:
@@ -1984,8 +1992,8 @@ def set_data(new_data, model=None, *, coords=None):
19841992
import pymc as pm
19851993
19861994
with pm.Model() as model:
1987-
x = pm.MutableData('x', [1., 2., 3.])
1988-
y = pm.MutableData('y', [1., 2., 3.])
1995+
x = pm.Data('x', [1., 2., 3.])
1996+
y = pm.Data('y', [1., 2., 3.])
19891997
beta = pm.Normal('beta', 0, 1)
19901998
obs = pm.Normal('obs', x * beta, 1, observed=y, shape=x.shape)
19911999
idata = pm.sample()
@@ -2014,7 +2022,7 @@ def set_data(new_data, model=None, *, coords=None):
20142022
data = rng.normal(loc=1.0, scale=2.0, size=100)
20152023
20162024
with pm.Model() as model:
2017-
y = pm.MutableData('y', data)
2025+
y = pm.Data('y', data)
20182026
theta = pm.Normal('theta', mu=0.0, sigma=10.0)
20192027
obs = pm.Normal('obs', theta, 2.0, observed=y, shape=y.shape)
20202028
idata = pm.sample()

pymc/model/fgraph.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -182,8 +182,7 @@ def fgraph_from_model(
182182
for named_val in named_value_vars:
183183
idx = value_vars.index(named_val)
184184
value_vars[idx] = named_val
185-
# Other variables that are in named_vars but are not any of the categories above
186-
# E.g., MutableData, ConstantData, _dim_lengths
185+
# Other variables that are in named_vars but are not any of the categories above (e.g., Data)
187186
# We use the same trick as deterministics!
188187
accounted_for = set(free_rvs + observed_rvs + potentials + old_deterministics + old_value_vars)
189188
other_named_vars = [
@@ -200,8 +199,8 @@ def fgraph_from_model(
200199

201200
# Replace the following shared variables in the model:
202201
# 1. RNGs
203-
# 2. MutableData (could increase memory usage significantly)
204-
# 3. Mutable coords dim lengths
202+
# 2. Data (could increase memory usage significantly)
203+
# 3. Symbolic coords dim lengths
205204
shared_vars_to_copy = find_rng_nodes(model_vars)
206205
shared_vars_to_copy += [v for v in model.dim_lengths.values() if isinstance(v, SharedVariable)]
207206
shared_vars_to_copy += [v for v in model.named_vars.values() if isinstance(v, SharedVariable)]

pymc/model_graph.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,13 @@
1919
from typing import Optional
2020

2121
from pytensor import function
22-
from pytensor.compile.sharedvalue import SharedVariable
2322
from pytensor.graph import Apply
2423
from pytensor.graph.basic import ancestors, walk
2524
from pytensor.scalar.basic import Cast
2625
from pytensor.tensor.elemwise import Elemwise
2726
from pytensor.tensor.random.op import RandomVariable
2827
from pytensor.tensor.shape import Shape
29-
from pytensor.tensor.variable import TensorConstant, TensorVariable
28+
from pytensor.tensor.variable import TensorVariable
3029

3130
import pymc as pm
3231

@@ -162,14 +161,6 @@ def _make_node(self, var_name, graph, *, nx=False, cluster=False, formatting: st
162161
shape = "octagon"
163162
style = "filled"
164163
label = f"{var_name}\n~\nPotential"
165-
elif isinstance(v, TensorConstant):
166-
shape = "box"
167-
style = "rounded, filled"
168-
label = f"{var_name}\n~\nConstantData"
169-
elif isinstance(v, SharedVariable):
170-
shape = "box"
171-
style = "rounded, filled"
172-
label = f"{var_name}\n~\nMutableData"
173164
elif v in self.model.basic_RVs:
174165
shape = "ellipse"
175166
if v in self.model.observed_RVs:
@@ -180,10 +171,14 @@ def _make_node(self, var_name, graph, *, nx=False, cluster=False, formatting: st
180171
if symbol.endswith("RV"):
181172
symbol = symbol[:-2]
182173
label = f"{var_name}\n~\n{symbol}"
183-
else:
174+
elif v in self.model.deterministics:
184175
shape = "box"
185176
style = None
186177
label = f"{var_name}\n~\nDeterministic"
178+
else:
179+
shape = "box"
180+
style = "rounded, filled"
181+
label = f"{var_name}\n~\nCData"
187182

188183
kwargs = {
189184
"shape": shape,

pymc/sampling/forward.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def compile_forward_sampling_function(
115115
116116
Concretely, this function can be used to compile a function to sample from the
117117
posterior predictive distribution of a model that has variables that are conditioned
118-
on ``MutableData`` instances. The variables that depend on the mutable data that have changed
118+
on ``Data`` instances. The variables that depend on the mutable data that have changed
119119
will be considered volatile, and as such, they wont be included as inputs into the compiled
120120
function. This means that if they have values stored in the posterior, these values will be
121121
ignored and new values will be computed (in the case of deterministics and potentials) or
@@ -147,8 +147,8 @@ def compile_forward_sampling_function(
147147
in the compiled function. The types of the key and value should match or an error will be
148148
raised during compilation.
149149
constant_data : Optional[Dict[str, numpy.ndarray]]
150-
A dictionary that maps the names of ``MutableData`` or ``ConstantData`` instances to their
151-
corresponding values at inference time. If a model was created with ``MutableData``, these
150+
A dictionary that maps the names of ``Data`` instances to their
151+
corresponding values at inference time. If a model was created with ``Data``, these
152152
are stored as ``SharedVariable`` with the name of the data variable and a value equal to
153153
the initial data. At inference time, this information is stored in ``InferenceData``
154154
objects under the ``constant_data`` group, which allows us to check whether a

0 commit comments

Comments
 (0)