Skip to content

Commit 82e7345

Browse files
committed
Remove nnpe from adapter
1 parent 85b884b commit 82e7345

File tree

9 files changed

+94
-161
lines changed

9 files changed

+94
-161
lines changed

bayesflow/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from . import (
22
approximators,
33
adapters,
4+
augmentations,
45
datasets,
56
diagnostics,
67
distributions,

bayesflow/adapters/adapter.py

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
Keep,
1919
Log,
2020
MapTransform,
21-
NNPE,
2221
NumpyTransform,
2322
OneHot,
2423
Rename,
@@ -695,43 +694,6 @@ def map_dtype(self, keys: str | Sequence[str], to_dtype: str):
695694
self.transforms.append(transform)
696695
return self
697696

698-
def nnpe(
699-
self,
700-
keys: str | Sequence[str],
701-
*,
702-
spike_scale: float | None = None,
703-
slab_scale: float | None = None,
704-
per_dimension: bool = True,
705-
seed: int | None = None,
706-
):
707-
"""Append an :py:class:`~transforms.NNPE` transform to the adapter.
708-
709-
Parameters
710-
----------
711-
keys : str or Sequence of str
712-
The names of the variables to transform.
713-
spike_scale : float or np.ndarray or None, default=None
714-
The scale of the spike (Normal) distribution. Automatically determined if None.
715-
slab_scale : float or np.ndarray or None, default=None
716-
The scale of the slab (Cauchy) distribution. Automatically determined if None.
717-
per_dimension : bool, default=True
718-
If true, noise is applied per dimension of the last axis of the input data.
719-
If false, noise is applied globally.
720-
seed : int or None
721-
The seed for the random number generator. If None, a random seed is used.
722-
"""
723-
if isinstance(keys, str):
724-
keys = [keys]
725-
726-
transform = MapTransform(
727-
{
728-
key: NNPE(spike_scale=spike_scale, slab_scale=slab_scale, per_dimension=per_dimension, seed=seed)
729-
for key in keys
730-
}
731-
)
732-
self.transforms.append(transform)
733-
return self
734-
735697
def one_hot(self, keys: str | Sequence[str], num_classes: int):
736698
"""Append a :py:class:`~transforms.OneHot` transform to the adapter.
737699

bayesflow/adapters/transforms/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from .keep import Keep
1313
from .log import Log
1414
from .map_transform import MapTransform
15-
from .nnpe import NNPE
1615
from .numpy_transform import NumpyTransform
1716
from .one_hot import OneHot
1817
from .rename import Rename
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .nnpe import NNPE

bayesflow/adapters/transforms/nnpe.py renamed to bayesflow/augmentations/nnpe.py

Lines changed: 38 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,7 @@
11
import numpy as np
22

3-
from bayesflow.utils.serialization import serializable, serialize
43

5-
from .elementwise_transform import ElementwiseTransform
6-
7-
8-
@serializable("bayesflow.adapters")
9-
class NNPE(ElementwiseTransform):
4+
class NNPE:
105
"""Implements noisy neural posterior estimation (NNPE) as described in [1], which adds noise following a
116
spike-and-slab distribution to the training data as a mild form of data augmentation to robustify against noisy
127
real-world data (see [1, 2] for benchmarks). Adds the options of automatic noise scale determination and
@@ -34,12 +29,18 @@ class NNPE(ElementwiseTransform):
3429
automatic scale determination occurs globally. The original implementation in [1] uses global application
3530
(i.e., per_dimension=False), whereas dimensionwise is recommended if the data dimensions are heterogeneous.
3631
seed : int or None
37-
The seed for the random number generator. If None, a random seed is used. Used instead of np.random.Generator
38-
here to enable easy serialization.
32+
The seed for the random number generator. If None, a random seed is used.
3933
4034
Examples
4135
--------
42-
>>> adapter = bf.Adapter().nnpe(["x"])
36+
>>> nnpe_aug = bf.augmentations.NNPE(spike_scale=0.01, slab_scale=0.2, per_dimension=True, seed=42)
37+
>>> dataset = bf.datasets.OnlineDataset(
38+
... simulator=my_sim,
39+
... batch_size=64,
40+
... num_batches=100,
41+
... adapter=None,
42+
... augmentations={"data": nnpe_aug},
43+
... )
4344
"""
4445

4546
DEFAULT_SPIKE = 0.01
@@ -54,12 +55,40 @@ def __init__(
5455
seed: int | None = None,
5556
):
5657
super().__init__()
58+
5759
self.spike_scale = spike_scale
5860
self.slab_scale = slab_scale
5961
self.per_dimension = per_dimension
6062
self.seed = seed
6163
self.rng = np.random.default_rng(seed)
6264

65+
def __call__(self, data: np.ndarray, **kwargs) -> np.ndarray:
66+
"""
67+
Add spike‐and‐slab noise to `data` using automatic scale determination if not provided.
68+
See “Notes” section of the class docstring for details).
69+
70+
Parameters
71+
----------
72+
data : np.ndarray
73+
Input array to be perturbed.
74+
**kwargs
75+
Unused keyword arguments.
76+
"""
77+
78+
# Check data validity
79+
if not np.all(np.isfinite(data)):
80+
raise ValueError("NNPE.forward: `data` contains NaN or infinite values.")
81+
82+
spike_scale = self._resolve_scale("spike_scale", self.spike_scale, self.DEFAULT_SPIKE, data)
83+
slab_scale = self._resolve_scale("slab_scale", self.slab_scale, self.DEFAULT_SLAB, data)
84+
85+
# Apply spike-and-slab noise
86+
mixture_mask = self.rng.binomial(n=1, p=0.5, size=data.shape).astype(bool)
87+
noise_spike = self.rng.standard_normal(size=data.shape) * spike_scale
88+
noise_slab = self.rng.standard_cauchy(size=data.shape) * slab_scale
89+
noise = np.where(mixture_mask, noise_slab, noise_spike)
90+
return data + noise
91+
6392
def _resolve_scale(
6493
self,
6594
name: str,
@@ -118,44 +147,3 @@ def _resolve_scale(
118147
if arr.ndim != 0:
119148
raise ValueError(f"{name}: expected scalar, got array of shape {arr.shape}")
120149
return arr
121-
122-
def forward(self, data: np.ndarray, **kwargs) -> np.ndarray:
123-
"""
124-
Add spike‐and‐slab noise to `data` using automatic scale determination if not provided.
125-
See “Notes” section of the class docstring for details).
126-
127-
Parameters
128-
----------
129-
data : np.ndarray
130-
Input array to be perturbed.
131-
**kwargs
132-
Unused keyword arguments.
133-
"""
134-
135-
# Check data validity
136-
if not np.all(np.isfinite(data)):
137-
raise ValueError("NNPE.forward: `data` contains NaN or infinite values.")
138-
139-
spike_scale = self._resolve_scale("spike_scale", self.spike_scale, self.DEFAULT_SPIKE, data)
140-
slab_scale = self._resolve_scale("slab_scale", self.slab_scale, self.DEFAULT_SLAB, data)
141-
142-
# Apply spike-and-slab noise
143-
mixture_mask = self.rng.binomial(n=1, p=0.5, size=data.shape).astype(bool)
144-
noise_spike = self.rng.standard_normal(size=data.shape) * spike_scale
145-
noise_slab = self.rng.standard_cauchy(size=data.shape) * slab_scale
146-
noise = np.where(mixture_mask, noise_slab, noise_spike)
147-
return data + noise
148-
149-
def inverse(self, data: np.ndarray, **kwargs) -> np.ndarray:
150-
# Non-invertible transform
151-
return data
152-
153-
def get_config(self) -> dict:
154-
return serialize(
155-
{
156-
"spike_scale": self.spike_scale,
157-
"slab_scale": self.slab_scale,
158-
"per_dimension": self.per_dimension,
159-
"seed": self.seed,
160-
}
161-
)

tests/test_adapters/test_adapters.py

Lines changed: 0 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -317,75 +317,3 @@ def test_nan_to_num():
317317
out = transform.forward(arr)
318318
np.testing.assert_array_equal(out["test-2d"], np.array([[1.0, 0.5], [0.5, 4.0]]))
319319
np.testing.assert_array_equal(out["new_mask_test-2d"], np.array([[1, 0], [0, 1]]))
320-
321-
322-
def test_nnpe(random_data):
323-
# NNPE cannot be integrated into the adapter fixture and its tests since it modifies the input data
324-
# and therefore breaks existing allclose checks
325-
import numpy as np
326-
from bayesflow.adapters import Adapter
327-
328-
# Test basic case with global noise application
329-
ad = Adapter().nnpe("x1", spike_scale=1.0, slab_scale=1.0, per_dimension=False, seed=42)
330-
result_training = ad(random_data)
331-
result_inversed = ad(random_data, inverse=True)
332-
serialized = serialize(ad)
333-
deserialized = deserialize(serialized)
334-
reserialized = serialize(deserialized)
335-
336-
assert keras.tree.lists_to_tuples(serialized) == keras.tree.lists_to_tuples(reserialized)
337-
338-
# check that only x1 is changed
339-
assert "x1" in result_training
340-
assert not np.allclose(result_training["x1"], random_data["x1"])
341-
342-
# all other keys are untouched
343-
for k, v in random_data.items():
344-
if k == "x1":
345-
continue
346-
assert np.allclose(result_training[k], v)
347-
348-
# check that the validation and inference data as well as inversed results are unchanged
349-
for k, v in random_data.items():
350-
assert np.allclose(result_inversed[k], v)
351-
352-
# Test both scales and seed are None case (automatic scale determination) with dimensionwise noise application
353-
ad_auto = Adapter().nnpe("y1", slab_scale=None, spike_scale=None, per_dimension=True, seed=None)
354-
result_training_auto = ad_auto(random_data)
355-
assert not np.allclose(result_training_auto["y1"], random_data["y1"])
356-
for k, v in random_data.items():
357-
if k == "y1":
358-
continue
359-
assert np.allclose(result_training_auto[k], v)
360-
361-
serialized_auto = serialize(ad_auto)
362-
deserialized_auto = deserialize(serialized_auto)
363-
reserialized_auto = serialize(deserialized_auto)
364-
assert keras.tree.lists_to_tuples(serialized_auto) == keras.tree.lists_to_tuples(serialize(reserialized_auto))
365-
366-
# Test dimensionwise versus global noise application (per_dimension=True vs per_dimension=False)
367-
# Create data with second dimension having higher variance
368-
data_shape = (32, 16, 1)
369-
rng = np.random.default_rng(42)
370-
zero = np.ones(shape=data_shape)
371-
high = rng.normal(0, 100.0, size=data_shape)
372-
var_data = {"x": np.concatenate([zero, high], axis=-1)}
373-
374-
# Apply dimensionwise and global adapters with automatic slab_scale scale determination
375-
ad_partial_global = Adapter().nnpe("x", spike_scale=0, slab_scale=None, per_dimension=False, seed=42)
376-
ad_partial_dim = Adapter().nnpe("x", spike_scale=[0, 1], slab_scale=None, per_dimension=True, seed=42)
377-
res_dim = ad_partial_dim(var_data)
378-
res_glob = ad_partial_global(var_data)
379-
380-
# Compute standard deviations of noise per last axis dimension
381-
noise_dim = res_dim["x"] - var_data["x"]
382-
noise_glob = res_glob["x"] - var_data["x"]
383-
std_dim = np.std(noise_dim, axis=(0, 1))
384-
std_glob = np.std(noise_glob, axis=(0, 1))
385-
386-
# Dimensionwise should assign zero noise, global some noise to zero-variance dimension
387-
assert std_dim[0] == 0
388-
assert std_glob[0] > 0
389-
# Both should assign noise to high-variance dimension
390-
assert std_dim[1] > 0
391-
assert std_glob[1] > 0

tests/test_augmentations/__init__.py

Whitespace-only changes.
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import numpy as np
2+
import pytest
3+
4+
5+
@pytest.fixture()
6+
def random_data():
7+
return {
8+
"x1": np.random.standard_normal(size=(4, 1)),
9+
"x2": np.random.standard_normal(size=(8, 10, 1)),
10+
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
def test_nnpe(random_data):
2+
import numpy as np
3+
from bayesflow.augmentations import NNPE
4+
5+
# Test basic case with global noise application
6+
aug = NNPE(spike_scale=1.0, slab_scale=1.0, per_dimension=False, seed=42)
7+
result = aug(random_data["x1"])
8+
9+
# Check that result is the same shape as x1, but changed
10+
assert result.shape == random_data["x1"].shape
11+
assert not np.allclose(result, random_data["x1"])
12+
13+
# Test both scales and seed are None case (automatic scale determination) with dimensionwise noise application
14+
aug_auto = NNPE(slab_scale=None, spike_scale=None, per_dimension=True, seed=None)
15+
result_auto = aug_auto(random_data["x2"])
16+
assert result_auto.shape == random_data["x2"].shape
17+
assert not np.allclose(result_auto, random_data["x2"])
18+
19+
# Test dimensionwise versus global noise application (per_dimension=True vs per_dimension=False)
20+
# Create data with second dimension having higher variance
21+
data_shape = (32, 16, 1)
22+
rng = np.random.default_rng(42)
23+
zero = np.ones(shape=data_shape)
24+
high = rng.normal(0, 100.0, size=data_shape)
25+
var_data = np.concatenate([zero, high], axis=-1)
26+
27+
# Apply dimensionwise and global adapters with automatic slab_scale scale determination
28+
aug_partial_global = NNPE(spike_scale=0, slab_scale=None, per_dimension=False, seed=42)
29+
aug_partial_dim = NNPE(spike_scale=[0, 1], slab_scale=None, per_dimension=True, seed=42)
30+
result_dim = aug_partial_dim(var_data)
31+
result_glob = aug_partial_global(var_data)
32+
33+
# Compute standard deviations of noise per last axis dimension
34+
noise_dim = result_dim - var_data
35+
noise_glob = result_glob - var_data
36+
std_dim = np.std(noise_dim, axis=(0, 1))
37+
std_glob = np.std(noise_glob, axis=(0, 1))
38+
39+
# Dimensionwise should assign zero noise, global some noise to zero-variance dimension
40+
assert std_dim[0] == 0
41+
assert std_glob[0] > 0
42+
# Both should assign noise to high-variance dimension
43+
assert std_dim[1] > 0
44+
assert std_glob[1] > 0

0 commit comments

Comments
 (0)