Merge branch 'feat-diffusion-model-adapt' into feat-diffusion-model

vpratz · vpratz · commit efeff85da112 · 2025-04-24T08:16:33.000Z
diff --git a/bayesflow/experimental/diffusion_model.py b/bayesflow/experimental/diffusion_model.py
@@ -2,8 +2,8 @@
 from abc import ABC, abstractmethod
 import keras
 from keras import ops
-from keras.saving import register_keras_serializable as serializable
 
+from bayesflow.utils.serialization import serialize, deserialize, serializable
 from bayesflow.types import Tensor, Shape
 import bayesflow as bf
 from bayesflow.networks import InferenceNetwork
@@ -13,8 +13,7 @@
     expand_right_as,
     find_network,
     jacobian_trace,
-    serialize_value_or_type,
-    deserialize_value_or_type,
+    layer_kwargs,
     weighted_mean,
     integrate,
 )
@@ -132,9 +131,9 @@ class LinearNoiseSchedule(NoiseSchedule):
     """
 
     def __init__(self, min_log_snr: float = -15, max_log_snr: float = 15):
-        super().__init__(name="linear_noise_schedule", variance_type="preserving")
-        self._log_snr_min = ops.convert_to_tensor(min_log_snr)
-        self._log_snr_max = ops.convert_to_tensor(max_log_snr)
+        super().__init__(name="linear_noise_schedule")
+        self._log_snr_min = min_log_snr
+        self._log_snr_max = max_log_snr
 
         self._t_min = self.get_t_from_log_snr(log_snr_t=self._log_snr_max, training=True)
         self._t_max = self.get_t_from_log_snr(log_snr_t=self._log_snr_max, training=True)
@@ -182,9 +181,10 @@ class CosineNoiseSchedule(NoiseSchedule):
 
     def __init__(self, min_log_snr: float = -15, max_log_snr: float = 15, s_shift_cosine: float = 0.0):
         super().__init__(name="cosine_noise_schedule", variance_type="preserving")
-        self._log_snr_min = ops.convert_to_tensor(min_log_snr)
-        self._log_snr_max = ops.convert_to_tensor(max_log_snr)
         self._s_shift_cosine = ops.convert_to_tensor(s_shift_cosine)
+        self._log_snr_min = min_log_snr
+        self._log_snr_max = max_log_snr
+        self._s_shift_cosine = s_shift_cosine
 
         self._t_min = self.get_t_from_log_snr(log_snr_t=self._log_snr_max, training=True)
         self._t_max = self.get_t_from_log_snr(log_snr_t=self._log_snr_max, training=True)
@@ -229,12 +229,13 @@ class EDMNoiseSchedule(NoiseSchedule):
 
     def __init__(self, sigma_data: float = 0.5, sigma_min: float = 0.002, sigma_max: float = 80):
         super().__init__(name="edm_noise_schedule", variance_type="exploding")
-        self.sigma_data = ops.convert_to_tensor(sigma_data)
-        self.sigma_max = ops.convert_to_tensor(sigma_max)
-        self.sigma_min = ops.convert_to_tensor(sigma_min)
-        self.p_mean = ops.convert_to_tensor(-1.2)
-        self.p_std = ops.convert_to_tensor(1.2)
-        self.rho = ops.convert_to_tensor(7)
+        super().__init__(name="edm_noise_schedule")
+        self.sigma_data = sigma_data
+        self.sigma_max = sigma_max
+        self.sigma_min = sigma_min
+        self.p_mean = -1.2
+        self.p_std = 1.2
+        self.rho = 7
 
         # convert EDM parameters to signal-to-noise ratio formulation
         self._log_snr_min = -2 * ops.log(sigma_max)
@@ -306,7 +307,7 @@ def get_weights_for_snr(self, log_snr_t: Tensor) -> Tensor:
         return ops.exp(-log_snr_t) + 0.5**2
 
 
-@serializable(package="bayesflow.networks")
+@serializable
 class DiffusionModel(InferenceNetwork):
     """Diffusion Model as described in this overview paper [1].
 
@@ -401,18 +402,11 @@ def __init__(
         self.subnet = find_network(subnet, **subnet_kwargs)
         self.output_projector = keras.layers.Dense(units=None, bias_initializer="zeros")
 
-        # serialization: store all parameters necessary to call __init__
-        self.config = {
-            "integrate_kwargs": self.integrate_kwargs,
-            "subnet_kwargs": subnet_kwargs,
-            "noise_schedule": self.noise_schedule,
-            "prediction_type": self.prediction_type,
-            **kwargs,
-        }
-        self.config = serialize_value_or_type(self.config, "subnet", subnet)
-
     def build(self, xz_shape: Shape, conditions_shape: Shape = None) -> None:
-        super().build(xz_shape, conditions_shape=conditions_shape)
+        if self.built:
+            return
+
+        self.base_distribution.build(xz_shape)
 
         self.output_projector.units = xz_shape[-1]
         input_shape = list(xz_shape)
@@ -430,12 +424,19 @@ def build(self, xz_shape: Shape, conditions_shape: Shape = None) -> None:
 
     def get_config(self):
         base_config = super().get_config()
-        return base_config | self.config
+        base_config = layer_kwargs(base_config)
+
+        config = {
+            "subnet": self.subnet,
+            "noise_schedule": self.noise_schedule,
+            "integrate_kwargs": self.integrate_kwargs,
+            "prediction_type": self.prediction_type,
+        }
+        return base_config | serialize(config)
 
     @classmethod
-    def from_config(cls, config):
-        config = deserialize_value_or_type(config, "subnet")
-        return cls(**config)
+    def from_config(cls, config, custom_objects=None):
+        return cls(**deserialize(config, custom_objects=custom_objects))
 
     def convert_prediction_to_x(
         self, pred: Tensor, z: Tensor, alpha_t: Tensor, sigma_t: Tensor, log_snr_t: Tensor, clip_x: bool
@@ -515,7 +516,14 @@ def _forward(
         training: bool = False,
         **kwargs,
     ) -> Tensor | tuple[Tensor, Tensor]:
-        integrate_kwargs = self.integrate_kwargs | kwargs
+        integrate_kwargs = (
+            {
+                "start_time": self.noise_schedule._t_min,
+                "stop_time": self.noise_schedule._t_max,
+            }
+            | self.integrate_kwargs
+            | kwargs
+        )
         if density:
 
             def deltas(time, xz):
@@ -557,7 +565,14 @@ def _inverse(
         training: bool = False,
         **kwargs,
     ) -> Tensor | tuple[Tensor, Tensor]:
-        integrate_kwargs = self.integrate_kwargs | kwargs
+        integrate_kwargs = (
+            {
+                "start_time": self.noise_schedule._t_max,
+                "stop_time": self.noise_schedule._t_min,
+            }
+            | self.integrate_kwargs
+            | kwargs
+        )
         if density:
 
             def deltas(time, xz):
diff --git a/bayesflow/links/ordered.py b/bayesflow/links/ordered.py
@@ -2,6 +2,7 @@
 from keras.saving import register_keras_serializable as serializable
 
 from bayesflow.utils import layer_kwargs
+from bayesflow.utils.decorators import sanitize_input_shape
 
 
 @serializable(package="links.ordered")
@@ -49,5 +50,6 @@ def call(self, inputs):
         x = keras.ops.concatenate([below, anchor_input, above], self.axis)
         return x
 
+    @sanitize_input_shape
     def compute_output_shape(self, input_shape):
         return input_shape
diff --git a/bayesflow/networks/summary_network.py b/bayesflow/networks/summary_network.py
@@ -21,6 +21,7 @@ def build(self, input_shape):
         if self.base_distribution is not None:
             self.base_distribution.build(keras.ops.shape(z))
 
+    @sanitize_input_shape
     def compute_output_shape(self, input_shape):
         return keras.ops.shape(self.call(keras.ops.zeros(input_shape)))
 
diff --git a/bayesflow/networks/transformers/mab.py b/bayesflow/networks/transformers/mab.py
@@ -4,6 +4,7 @@
 from bayesflow.networks import MLP
 from bayesflow.types import Tensor
 from bayesflow.utils import layer_kwargs
+from bayesflow.utils.decorators import sanitize_input_shape
 from bayesflow.utils.serialization import serializable
 
 
@@ -122,8 +123,10 @@ def call(self, seq_x: Tensor, seq_y: Tensor, training: bool = False, **kwargs) -
         return out
 
     # noinspection PyMethodOverriding
+    @sanitize_input_shape
     def build(self, seq_x_shape, seq_y_shape):
         self.call(keras.ops.zeros(seq_x_shape), keras.ops.zeros(seq_y_shape))
 
+    @sanitize_input_shape
     def compute_output_shape(self, seq_x_shape, seq_y_shape):
         return keras.ops.shape(self.call(keras.ops.zeros(seq_x_shape), keras.ops.zeros(seq_y_shape)))
diff --git a/bayesflow/networks/transformers/pma.py b/bayesflow/networks/transformers/pma.py
@@ -4,6 +4,7 @@
 from bayesflow.networks import MLP
 from bayesflow.types import Tensor
 from bayesflow.utils import layer_kwargs
+from bayesflow.utils.decorators import sanitize_input_shape
 from bayesflow.utils.serialization import serializable
 
 from .mab import MultiHeadAttentionBlock
@@ -125,5 +126,6 @@ def call(self, input_set: Tensor, training: bool = False, **kwargs) -> Tensor:
         summaries = self.mab(seed_tiled, set_x_transformed, training=training, **kwargs)
         return ops.reshape(summaries, (ops.shape(summaries)[0], -1))
 
+    @sanitize_input_shape
     def compute_output_shape(self, input_shape):
         return keras.ops.shape(self.call(keras.ops.zeros(input_shape)))
diff --git a/bayesflow/networks/transformers/sab.py b/bayesflow/networks/transformers/sab.py
@@ -1,6 +1,7 @@
 import keras
 
 from bayesflow.types import Tensor
+from bayesflow.utils.decorators import sanitize_input_shape
 from bayesflow.utils.serialization import serializable
 
 from .mab import MultiHeadAttentionBlock
@@ -16,6 +17,7 @@ class SetAttentionBlock(MultiHeadAttentionBlock):
     """
 
     # noinspection PyMethodOverriding
+    @sanitize_input_shape
     def build(self, input_set_shape):
         self.call(keras.ops.zeros(input_set_shape))
 
@@ -42,5 +44,6 @@ def call(self, input_set: Tensor, training: bool = False, **kwargs) -> Tensor:
         return super().call(input_set, input_set, training=training, **kwargs)
 
     # noinspection PyMethodOverriding
+    @sanitize_input_shape
     def compute_output_shape(self, input_set_shape):
         return keras.ops.shape(self.call(keras.ops.zeros(input_set_shape)))
diff --git a/bayesflow/utils/decorators.py b/bayesflow/utils/decorators.py
@@ -114,7 +114,7 @@ def callback(x):
 
 
 def sanitize_input_shape(fn: Callable):
-    """Decorator to replace the first dimension in input_shape with a dummy batch size if it is None"""
+    """Decorator to replace the first dimension in ..._shape arguments with a dummy batch size if it is None"""
 
     # The Keras functional API passes input_shape = (None, second_dim, third_dim, ...), which
     # causes problems when constructions like self.call(keras.ops.zeros(input_shape)) are used
@@ -126,5 +126,8 @@ def callback(input_shape: Shape) -> Shape:
             return tuple(input_shape)
         return input_shape
 
-    fn = argument_callback("input_shape", callback)(fn)
+    args = inspect.getfullargspec(fn).args
+    for arg in args:
+        if arg.endswith("_shape"):
+            fn = argument_callback(arg, callback)(fn)
     return fn
diff --git a/examples/From_ABC_to_BayesFlow.ipynb b/examples/From_ABC_to_BayesFlow.ipynb
@@ -38,7 +38,10 @@
    "outputs": [],
    "source": [
     "import numpy as np\n",
-    "import matplotlib.pyplot as plt"
+    "import matplotlib.pyplot as plt\n",
+    "import tempfile\n",
+    "from pathlib import Path\n",
+    "import platform"
    ]
   },
   {
@@ -322,7 +325,9 @@
     ")\n",
     "\n",
     "# generate a temporary SQLite DB\n",
-    "abc_id = abc.new(\"sqlite:////tmp/mjp.db\", observations)"
+    "prefix = \"sqlite:///\" if platform.system() == \"Windows\" else \"sqlite:////\"\n",
+    "db_path = (Path(tempfile.gettempdir()).absolute() / \"mjp.db\").as_uri().replace(\"file:///\", prefix)\n",
+    "abc_id = abc.new(db_path, observations)"
    ]
   },
   {
diff --git a/examples/SIR_Posterior_Estimation.ipynb b/examples/SIR_Posterior_Estimation.ipynb
@@ -19,7 +19,11 @@
    "source": [
     "import os\n",
     "# Set to your favorite backend\n",
-    "os.environ[\"KERAS_BACKEND\"] = \"jax\""
+    "if \"KERAS_BACKEND\" not in os.environ:\n",
+    "    # set this to \"torch\", \"tensorflow\", or \"jax\"\n",
+    "    os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"\n",
+    "else:\n",
+    "    print(f\"Using '{os.environ['KERAS_BACKEND']}' backend\")"
    ]
   },
   {
diff --git a/examples/Two_Moons_Starter.ipynb b/examples/Two_Moons_Starter.ipynb
@@ -24,7 +24,11 @@
    "source": [
     "import os\n",
     "# Set to your favorite backend\n",
-    "os.environ[\"KERAS_BACKEND\"] = \"jax\""
+    "if \"KERAS_BACKEND\" not in os.environ:\n",
+    "    # set this to \"torch\", \"tensorflow\", or \"jax\"\n",
+    "    os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"\n",
+    "else:\n",
+    "    print(f\"Using '{os.environ['KERAS_BACKEND']}' backend\")"
    ]
   },
   {
diff --git a/tests/test_networks/test_summary_networks.py b/tests/test_networks/test_summary_networks.py
@@ -25,6 +25,28 @@ def test_build(automatic, summary_network, random_set):
     assert summary_network.variables, "Model has no variables."
 
 
+@pytest.mark.parametrize("automatic", [True, False])
+def test_build_functional_api(automatic, summary_network, random_set):
+    if summary_network is None:
+        pytest.skip(reason="Nothing to do, because there is no summary network.")
+
+    assert summary_network.built is False
+
+    inputs = keras.layers.Input(shape=keras.ops.shape(random_set)[1:])
+    outputs = summary_network(inputs)
+    model = keras.Model(inputs=inputs, outputs=outputs)
+
+    if automatic:
+        model(random_set)
+    else:
+        model.build(keras.ops.shape(random_set))
+
+    assert model.built is True
+
+    # check the model has variables
+    assert summary_network.variables, "Model has no variables."
+
+
 def test_variable_batch_size(summary_network, random_set):
     if summary_network is None:
         pytest.skip(reason="Nothing to do, because there is no summary network.")
diff --git a/tests/utils/jupyter.py b/tests/utils/jupyter.py
@@ -10,10 +10,10 @@ def run_notebook(path):
     checkpoint_path = path.parent / "checkpoints"
     # only clean up if the directory did not exist before the test
     cleanup_checkpoints = not checkpoint_path.exists()
-    with open(str(path)) as f:
+    with open(str(path), encoding="utf-8") as f:
         nb = nbformat.read(f, nbformat.NO_CONVERT)
 
-    kernel = ExecutePreprocessor(timeout=600, kernel_name="python3", resources={"metadata": {"path": path.parent}})
+    kernel = ExecutePreprocessor(timeout=3600, kernel_name="python3", resources={"metadata": {"path": path.parent}})
 
     try:
         result = kernel.preprocess(nb)

Original file line number	Diff line number	Diff line change
`@@ -38,7 +38,10 @@`
`38`	`38`	`"outputs": [],`
`39`	`39`	`"source": [`
`40`	`40`	`"import numpy as np\n",`
`41`		`- "import matplotlib.pyplot as plt"`
	`41`	`+ "import matplotlib.pyplot as plt\n",`
	`42`	`+ "import tempfile\n",`
	`43`	`+ "from pathlib import Path\n",`
	`44`	`+ "import platform"`
`42`	`45`	`]`
`43`	`46`	`},`
`44`	`47`	`{`
`@@ -322,7 +325,9 @@`
`322`	`325`	`")\n",`
`323`	`326`	`"\n",`
`324`	`327`	`"# generate a temporary SQLite DB\n",`
`325`		`- "abc_id = abc.new(\"sqlite:////tmp/mjp.db\", observations)"`
	`328`	`+ "prefix = \"sqlite:///\" if platform.system() == \"Windows\" else \"sqlite:////\"\n",`
	`329`	`+ "db_path = (Path(tempfile.gettempdir()).absolute() / \"mjp.db\").as_uri().replace(\"file:///\", prefix)\n",`
	`330`	`+ "abc_id = abc.new(db_path, observations)"`
`326`	`331`	`]`
`327`	`332`	`},`
`328`	`333`	`{`
Original file line number	Diff line number	Diff line change
`@@ -19,7 +19,11 @@`
`19`	`19`	`"source": [`
`20`	`20`	`"import os\n",`
`21`	`21`	`"# Set to your favorite backend\n",`
`22`		`- "os.environ[\"KERAS_BACKEND\"] = \"jax\""`
	`22`	`+ "if \"KERAS_BACKEND\" not in os.environ:\n",`
	`23`	`+ " # set this to \"torch\", \"tensorflow\", or \"jax\"\n",`
	`24`	`+ " os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"\n",`
	`25`	`+ "else:\n",`
	`26`	`+ " print(f\"Using '{os.environ['KERAS_BACKEND']}' backend\")"`
`23`	`27`	`]`
`24`	`28`	`},`
`25`	`29`	`{`
Original file line number	Diff line number	Diff line change
`@@ -24,7 +24,11 @@`
`24`	`24`	`"source": [`
`25`	`25`	`"import os\n",`
`26`	`26`	`"# Set to your favorite backend\n",`
`27`		`- "os.environ[\"KERAS_BACKEND\"] = \"jax\""`
	`27`	`+ "if \"KERAS_BACKEND\" not in os.environ:\n",`
	`28`	`+ " # set this to \"torch\", \"tensorflow\", or \"jax\"\n",`
	`29`	`+ " os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"\n",`
	`30`	`+ "else:\n",`
	`31`	`+ " print(f\"Using '{os.environ['KERAS_BACKEND']}' backend\")"`
`28`	`32`	`]`
`29`	`33`	`},`
`30`	`34`	`{`