|
| 1 | +import numpy as np |
| 2 | + |
| 3 | +from bayesflow.utils.serialization import serializable, serialize |
| 4 | +from .elementwise_transform import ElementwiseTransform |
| 5 | + |
| 6 | + |
| 7 | +@serializable |
| 8 | +class ReplaceNaN(ElementwiseTransform): |
| 9 | + """ |
| 10 | + Replace NaNs with a default value, and optionally encode a missing‐data mask. |
| 11 | +
|
| 12 | + This is based on "Missing data in amortized simulation-based neural posterior estimation" by Wang et al. (2024). |
| 13 | +
|
| 14 | + Parameters |
| 15 | + ---------- |
| 16 | + default_value : float |
| 17 | + Value to substitute wherever data is NaN. |
| 18 | + encode_mask : bool, default=False |
| 19 | + If True, the forward pass will expand the array by one new axis and |
| 20 | + concatenate a binary mask (0 for originally-NaN entries, 1 otherwise). |
| 21 | + axis : int or None |
| 22 | + Axis along which to add the new dimension for mask encoding. |
| 23 | + If None, defaults to `data.ndim` (i.e., a new trailing axis). |
| 24 | +
|
| 25 | + Examples |
| 26 | + -------- |
| 27 | + >>> a = np.array([1.0, np.nan, 3.0]) |
| 28 | + >>> r_nan = bf.adapters.transforms.ReplaceNaN(default_value=0.0) |
| 29 | + >>> r_nan.forward(a) |
| 30 | + array([1., 0., 3.]) |
| 31 | +
|
| 32 | + >>> # With mask encoding along a new last axis: |
| 33 | + >>> r_nan = bf.adapters.transforms.ReplaceNaN(default_value=-1.0, encode_mask=True, axis=-1) |
| 34 | + >>> enc = r_nan.forward(a) |
| 35 | + >>> enc.shape |
| 36 | + (3, 2) |
| 37 | +
|
| 38 | + It’s recommended to precede this with a ToArray transform if your data |
| 39 | + might not already be a NumPy array. |
| 40 | + """ |
| 41 | + |
| 42 | + def __init__( |
| 43 | + self, |
| 44 | + *, |
| 45 | + default_value: float = 0.0, |
| 46 | + encode_mask: bool = False, |
| 47 | + axis: int | None = None, |
| 48 | + ): |
| 49 | + super().__init__() |
| 50 | + self.default_value = default_value |
| 51 | + self.encode_mask = encode_mask |
| 52 | + self.axis = axis |
| 53 | + |
| 54 | + def get_config(self) -> dict: |
| 55 | + return serialize( |
| 56 | + { |
| 57 | + "default_value": self.default_value, |
| 58 | + "encode_mask": self.encode_mask, |
| 59 | + "axis": self.axis, |
| 60 | + } |
| 61 | + ) |
| 62 | + |
| 63 | + def forward(self, data: np.ndarray, **kwargs) -> np.ndarray: |
| 64 | + # Create mask of where data is NaN |
| 65 | + mask = np.isnan(data) |
| 66 | + # Fill NaNs with the default value |
| 67 | + filled = np.where(mask, self.default_value, data) |
| 68 | + |
| 69 | + if not self.encode_mask: |
| 70 | + return filled |
| 71 | + |
| 72 | + # Decide where to insert the new axis |
| 73 | + ax = self.axis if self.axis is not None else data.ndim |
| 74 | + # Expand dims for both filled data and mask |
| 75 | + filled_exp = np.expand_dims(filled, axis=ax) |
| 76 | + mask_exp = 1 - np.expand_dims(mask.astype(np.int8), axis=ax) |
| 77 | + # Concatenate along that axis: [..., value, mask] |
| 78 | + return np.concatenate([filled_exp, mask_exp], axis=ax) |
| 79 | + |
| 80 | + def inverse(self, data: np.ndarray, **kwargs) -> np.ndarray: |
| 81 | + if not self.encode_mask: |
| 82 | + # No mask was encoded, so nothing to undo |
| 83 | + return data |
| 84 | + |
| 85 | + ax = self.axis if self.axis is not None else data.ndim - 1 |
| 86 | + # Extract the two “channels” |
| 87 | + values = np.take(data, indices=0, axis=ax) |
| 88 | + mask = np.take(data, indices=1, axis=ax).astype(bool) |
| 89 | + # Restore NaNs where mask == 1 |
| 90 | + values[mask] = np.nan |
| 91 | + return values |
0 commit comments