Skip to content

Commit 512e0a7

Browse files
alcreneAlexandre Renémichaelosthege
authored
Add a Null backend (#112)
* [mcbackend] Add NullBackend A null storage for draws from a chain: gobbles up draws without storing them Possible use cases include - Online computations: Draws are used and discarded immediately, allowing for much larger sample spaces. - Profiling: To use as a baseline, to measure compute time & memory before allocating memory for draws. Comparing with another backend would then show how much overhead it adds. * [NullBackend] Clean up docstrings * [NullBackend] Add tests; fix issue with preallocate=0 Tests were copied from test_backend_numpy and the parts checking the `_samples` array removed. Fixed issue: - Reset default preallocation to 1_000, like with NumPyBackend: it is still used for the stats array, so it makes sense to use a reasonable default. - Preallocate = 0 no longer switches the allocation to object arrays, in contrast to NumPyBackend - IMO this is a bug in NumPyBackend: `grow_append` cannot know if ``preallocate = 0`` was used; it only looks at the `rigid` value to determine how to append. - Without this change, `grow_append` will always fail when we use `preallocate = 0` with multivariate statistics. * Avoid code duplication --------- Co-authored-by: Alexandre René <[email protected]> Co-authored-by: Michael Osthege <[email protected]>
1 parent b9704f3 commit 512e0a7

File tree

4 files changed

+360
-18
lines changed

4 files changed

+360
-18
lines changed

mcbackend/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
A framework agnostic implementation for storage of MCMC draws.
33
"""
44

5+
from .backends.null import NullBackend
56
from .backends.numpy import NumPyBackend
67
from .core import Backend, Chain, Run
78
from .meta import ChainMeta, Coordinate, DataVariable, ExtendedValue, RunMeta, Variable
@@ -16,6 +17,7 @@
1617
__version__ = "0.5.2"
1718
__all__ = [
1819
"NumPyBackend",
20+
"NullBackend",
1921
"Backend",
2022
"Chain",
2123
"Run",

mcbackend/backends/null.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
"""
2+
This backend simply discards draws. There are not stored in memory.
3+
This can be used in situations where we want to run an MCMC but not permanently
4+
store its output.
5+
"""
6+
7+
# Code-wise, a NullChain is essentially just a NumpyChain without the underlying data array.
8+
9+
from typing import Dict, List, Mapping, Optional, Sequence, Tuple
10+
11+
import numpy
12+
13+
from ..core import Backend, Chain, Run
14+
from ..meta import ChainMeta, RunMeta
15+
from .numpy import grow_append, prepare_storage
16+
17+
18+
class NullChain(Chain):
19+
"""A null storage: discards values immediately and allocates no memory.
20+
21+
Use cases are
22+
23+
- Online computations: Draws are used and discarded immediately, allowing for much larger sample spaces.
24+
- Profiling: To use as a baseline, to measure compute time & memory before allocating memory for draws.
25+
Comparing with another backend would then show how much overhead it adds.
26+
27+
Since draws are not stored, only a subset of the `Chain` interface is supported:
28+
29+
- Supported: `__len__`, `append`, `get_stats`, `get_stats_at`
30+
- Not supported: `get_draws`, `get_draws_at`
31+
32+
.. Todo:: Option to also sampling stats?
33+
.. Todo:: Allow retrieving the most recent draw?
34+
35+
"""
36+
37+
def __init__(self, cmeta: ChainMeta, rmeta: RunMeta, *, preallocate: int) -> None:
38+
"""Creates a null storage for draws from a chain: will gobble outputs without storing them
39+
40+
Parameters
41+
----------
42+
cmeta : ChainMeta
43+
Metadata of the chain.
44+
rmeta : RunMeta
45+
Metadata of the MCMC run.
46+
preallocate : int
47+
Influences the memory pre-allocation behavior.
48+
(Draws are not saved, but stats may still be.)
49+
The default is to reserve memory for ``preallocate`` draws
50+
and grow the allocated memory by 10 % when needed.
51+
Exceptions are variables with non-rigid shapes (indicated by 0 in the shape tuple)
52+
where the correct amount of memory cannot be pre-allocated.
53+
In these cases object arrays are used.
54+
"""
55+
self._draw_idx = 0
56+
57+
# Create storage ndarrays only for sampler stats.
58+
self._stats, self._stat_is_rigid = prepare_storage(rmeta.sample_stats, preallocate)
59+
60+
super().__init__(cmeta, rmeta)
61+
62+
def append( # pylint: disable=duplicate-code
63+
self, draw: Mapping[str, numpy.ndarray], stats: Optional[Mapping[str, numpy.ndarray]] = None
64+
):
65+
if stats:
66+
grow_append(self._stats, stats, self._stat_is_rigid, self._draw_idx)
67+
self._draw_idx += 1
68+
return
69+
70+
def __len__(self) -> int:
71+
return self._draw_idx
72+
73+
def get_draws(self, var_name: str, slc: slice = slice(None)) -> numpy.ndarray:
74+
raise RuntimeError("NullChain does not save draws.")
75+
76+
def get_draws_at(self, idx: int, var_names: Sequence[str]) -> Dict[str, numpy.ndarray]:
77+
raise RuntimeError("NullChain does not save draws.")
78+
79+
def get_stats( # pylint: disable=duplicate-code
80+
self, stat_name: str, slc: slice = slice(None)
81+
) -> numpy.ndarray:
82+
data = self._stats[stat_name][: self._draw_idx][slc]
83+
if self.sample_stats[stat_name].dtype == "str":
84+
return numpy.array(data.tolist(), dtype=str)
85+
return data
86+
87+
def get_stats_at(self, idx: int, stat_names: Sequence[str]) -> Dict[str, numpy.ndarray]:
88+
return {sn: numpy.asarray(self._stats[sn][idx]) for sn in stat_names}
89+
90+
91+
class NullRun(Run):
92+
"""An MCMC run where samples are immediately discarded."""
93+
94+
def __init__(self, meta: RunMeta, *, preallocate: int) -> None:
95+
self._settings = {"preallocate": preallocate}
96+
self._chains: List[NullChain] = []
97+
super().__init__(meta)
98+
99+
def init_chain(self, chain_number: int) -> NullChain:
100+
cmeta = ChainMeta(self.meta.rid, chain_number)
101+
chain = NullChain(cmeta, self.meta, **self._settings)
102+
self._chains.append(chain)
103+
return chain
104+
105+
def get_chains(self) -> Tuple[NullChain, ...]:
106+
return tuple(self._chains)
107+
108+
109+
class NullBackend(Backend):
110+
"""A backend which discards samples immediately."""
111+
112+
def __init__(self, preallocate: int = 1_000) -> None:
113+
self._settings = {"preallocate": preallocate}
114+
super().__init__()
115+
116+
def init_run(self, meta: RunMeta) -> NullRun:
117+
return NullRun(meta, **self._settings)

mcbackend/backends/numpy.py

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33
"""
44

55
import math
6-
from typing import Dict, List, Mapping, Optional, Sequence, Tuple
6+
from typing import Dict, Iterable, List, Mapping, Optional, Sequence, Tuple
77

88
import numpy
99

1010
from ..core import Backend, Chain, Run, is_rigid
11-
from ..meta import ChainMeta, RunMeta
11+
from ..meta import ChainMeta, RunMeta, Variable
1212

1313

1414
def grow_append(
@@ -34,6 +34,22 @@ def grow_append(
3434
return
3535

3636

37+
def prepare_storage(
38+
variables: Iterable[Variable], preallocate: int
39+
) -> Tuple[Dict[str, numpy.ndarray], Dict[str, bool]]:
40+
storage: Dict[str, numpy.ndarray] = {}
41+
rigid_dict: Dict[str, bool] = {}
42+
for var in variables:
43+
rigid = is_rigid(var.shape) and not var.undefined_ndim and var.dtype != "str"
44+
rigid_dict[var.name] = rigid
45+
if rigid:
46+
reserve = (preallocate, *var.shape)
47+
storage[var.name] = numpy.empty(reserve, var.dtype)
48+
else:
49+
storage[var.name] = numpy.array([None] * preallocate, dtype=object)
50+
return storage, rigid_dict
51+
52+
3753
class NumPyChain(Chain):
3854
"""Stores value draws in NumPy arrays and can pre-allocate memory."""
3955

@@ -54,25 +70,11 @@ def __init__(self, cmeta: ChainMeta, rmeta: RunMeta, *, preallocate: int) -> Non
5470
where the correct amount of memory cannot be pre-allocated.
5571
In these cases object arrays are used.
5672
"""
57-
self._var_is_rigid: Dict[str, bool] = {}
58-
self._samples: Dict[str, numpy.ndarray] = {}
59-
self._stat_is_rigid: Dict[str, bool] = {}
60-
self._stats: Dict[str, numpy.ndarray] = {}
6173
self._draw_idx = 0
6274

6375
# Create storage ndarrays for each model variable and sampler stat.
64-
for target_dict, rigid_dict, variables in [
65-
(self._samples, self._var_is_rigid, rmeta.variables),
66-
(self._stats, self._stat_is_rigid, rmeta.sample_stats),
67-
]:
68-
for var in variables:
69-
rigid = is_rigid(var.shape) and not var.undefined_ndim and var.dtype != "str"
70-
rigid_dict[var.name] = rigid
71-
if rigid:
72-
reserve = (preallocate, *var.shape)
73-
target_dict[var.name] = numpy.empty(reserve, var.dtype)
74-
else:
75-
target_dict[var.name] = numpy.array([None] * preallocate, dtype=object)
76+
self._samples, self._var_is_rigid = prepare_storage(rmeta.variables, preallocate)
77+
self._stats, self._stat_is_rigid = prepare_storage(rmeta.sample_stats, preallocate)
7678

7779
super().__init__(cmeta, rmeta)
7880

0 commit comments

Comments
 (0)