Optimize time series networks

stefanradev93 · stefanradev93 · commit 0d64899d3b92 · 2025-04-09T11:08:08.000-04:00
diff --git a/bayesflow/networks/time_series_network/skip_recurrent.py b/bayesflow/networks/time_series_network/skip_recurrent.py
@@ -9,15 +9,12 @@
 @serializable(package="bayesflow.networks")
 class SkipRecurrentNet(keras.Model):
     """
-    Implements a Skip recurrent layer as described in [1], but allowing a more flexible
-    recurrent backbone and a more flexible implementation.
+    Implements a Skip recurrent layer as described in [1], allowing a more flexible recurrent backbone
+    and a more efficient implementation.
 
     [1] Y. Zhang and L. Mikelsons, Solving Stochastic Inverse Problems with Stochastic BayesFlow,
     2023 IEEE/ASME International Conference on Advanced Intelligent Mechatronics (AIM),
     Seattle, WA, USA, 2023, pp. 966-972, doi: 10.1109/AIM46323.2023.10196190.
-
-    TODO: Add proper docstring
-
     """
 
     def __init__(
@@ -30,6 +27,32 @@ def __init__(
         dropout: float = 0.05,
         **kwargs,
     ):
+        """
+        Creates a skip recurrent neural network layer that extends a traditional recurrent backbone with
+        skip connections implemented via convolution and an additional recurrent path. This allows
+        more efficient modeling of long-term dependencies by combining local and non-local temporal
+        features.
+
+        Parameters
+        ----------
+        hidden_dim : int, optional
+            Dimensionality of the hidden state in the recurrent layers. Default is 256.
+        recurrent_type : str, optional
+            Type of recurrent unit to use. Should correspond to a supported type in `find_recurrent_net`,
+            such as "gru" or "lstm". Default is "gru".
+        bidirectional : bool, optional
+            If True, uses bidirectional wrappers for both recurrent and skip recurrent layers. Default is True.
+        input_channels : int, optional
+            Number of input channels for the 1D convolution used in skip connections. Default is 64.
+        skip_steps : int, optional
+            Step size and kernel size used in the skip convolution. Determines how many steps are skipped.
+            Also determines the multiplier for the number of filters. Default is 4.
+        dropout : float, optional
+            Dropout rate applied within the recurrent layers. Default is 0.05.
+        **kwargs
+            Additional keyword arguments passed to the parent class constructor.
+        """
+
         super().__init__(**keras_kwargs(kwargs))
 
         self.skip_conv = keras.layers.Conv1D(
@@ -64,4 +87,4 @@ def call(self, time_series: Tensor, training: bool = False, **kwargs) -> Tensor:
 
     @sanitize_input_shape
     def build(self, input_shape):
-        self.call(keras.ops.zeros(input_shape))
+        super().build(input_shape)
diff --git a/bayesflow/networks/time_series_network/time_series_network.py b/bayesflow/networks/time_series_network/time_series_network.py
@@ -25,7 +25,7 @@ def __init__(
         strides: int | list | tuple = 1,
         activation: str = "mish",
         kernel_initializer: str = "glorot_uniform",
-        groups: int = 8,
+        groups: int = None,
         recurrent_type: str = "gru",
         recurrent_dim: int = 128,
         bidirectional: bool = True,
@@ -62,7 +62,7 @@ def __init__(
             Default is "glorot_uniform".
         groups : int, optional
             Number of groups for group normalization applied after each convolutional layer.
-            Default is 8.
+            Default is None.
         recurrent_type : str, optional
             Type of recurrent layer used for sequence modeling, such as "gru" or "lstm".
             Default is "gru".
@@ -99,7 +99,8 @@ def __init__(
                     padding="same",
                 )
             )
-            self.conv_blocks.append(keras.layers.GroupNormalization(groups=groups))
+            if groups is not None:
+                self.conv_blocks.append(keras.layers.GroupNormalization(groups=groups))
 
         # Recurrent and feedforward backbones
         self.recurrent = SkipRecurrentNet(
@@ -149,4 +150,3 @@ def call(self, x: Tensor, training: bool = False, **kwargs) -> Tensor:
     @sanitize_input_shape
     def build(self, input_shape):
         super().build(input_shape)
-        self.call(keras.ops.zeros(input_shape))