Updated DenseLayer, added docs

srigas · srigas · commit 728b0c4d293c · 2026-03-12T13:57:57.000+02:00
diff --git a/docs/about.rst b/docs/about.rst
@@ -15,13 +15,17 @@ Research
 
 If you have used jaxKAN in your research, we'd love to hear from you! Below, you can find a list of academic publications that have used jaxKAN.
 
-- Rigas, S., Anagnostopoulos, F., Papachristou, M., & Alexandridis, G. (2026). Training deep physics-informed Kolmogorov–Arnold networks. Computer Methods in Applied Mechanics and Engineering, 452, 118761. https://doi.org/10.1016/j.cma.2026.118761
+- Rigas, S., Verma, D., Alexandridis, G., & Wang, Y. (2026). Initialization schemes for Kolmogorov–Arnold networks: An empirical study. The Fourteenth International Conference on Learning Representations (ICLR 2026). https://openreview.net/forum?id=dwNXKkiP51 | `GitHub Reference <https://github.com/srigas/KAN_Initialization_Schemes>`_
 
-- Cerardi, N., Tolley, E., & Mishra, A. (2026). Solving the cosmological Vlasov–Poisson equations with physics-informed Kolmogorov–Arnold networks. Monthly Notices of the Royal Astronomical Society, 545, staf2241. https://doi.org/10.1093/mnras/staf2241 | `GitHub Reference <https://github.com/nicolas-cerardi/cdm-pikan>`_
+- Daniels, M., & Rigollet, P. (2026). Splat regression models. The Fourteenth International Conference on Learning Representations (ICLR 2026). https://openreview.net/forum?id=rubeJmT1XM
+
+- Rigas, S., Papaioannou, T., Trakadas, P., & Alexandridis, G. (2026). A Dynamic Framework for Grid Adaptation in Kolmogorov-Arnold Networks. arXiv. https://doi.org/10.48550/arXiv.2601.18672 | `GitHub Reference <https://github.com/srigas/kan_grid>`_
 
-- Daniels, M., & Rigollet, P. (2025). Splat regression models (No. arXiv:2511.14042). arXiv. https://doi.org/10.48550/arXiv.2511.14042
+- Leiva, F., Canales, C., Valenzuela, M., & Ruiz-del-Solar, J. (2026). Data-driven control of hydraulic impact hammers under strict operational and control constraints. arXiv. https://doi.org/10.48550/arXiv.2601.07813
 
-- Rigas, S., Verma, D., Alexandridis, G., & Wang, Y. (2025). Initialization schemes for Kolmogorov-Arnold networks: An empirical study. arXiv. https://doi.org/10.48550/ARXIV.2509.03417 | `GitHub Reference <https://github.com/srigas/KAN_Initialization_Schemes>`_
+- Rigas, S., Anagnostopoulos, F., Papachristou, M., & Alexandridis, G. (2026). Training deep physics-informed Kolmogorov–Arnold networks. Computer Methods in Applied Mechanics and Engineering, 452, 118761. https://doi.org/10.1016/j.cma.2026.118761
+
+- Cerardi, N., Tolley, E., & Mishra, A. (2026). Solving the cosmological Vlasov–Poisson equations with physics-informed Kolmogorov–Arnold networks. Monthly Notices of the Royal Astronomical Society, 545, staf2241. https://doi.org/10.1093/mnras/staf2241 | `GitHub Reference <https://github.com/nicolas-cerardi/cdm-pikan>`_
 
 - Howard, A. A., Jacob, B., & Stinis, P. (2025). Multifidelity kolmogorov–arnold networks. Machine Learning: Science and Technology, 6(3), 035038. https://doi.org/10.1088/2632-2153/adf702
 
diff --git a/docs/conf.py b/docs/conf.py
@@ -9,10 +9,10 @@
 # -- Project information -----------------------------------------------------
 
 project = 'jaxkan'
-copyright = '2025, Spyros Rigas, Michalis Papachristou'
+copyright = '2026, Spyros Rigas, Michalis Papachristou'
 author = 'Spyros Rigas, Michalis Papachristou'
 
-release = '0.3.5'
+release = '0.3.6'
 
 # -- General configuration ------------------------------------------------
 
diff --git a/jaxkan/layers/Dense.py b/jaxkan/layers/Dense.py
@@ -5,137 +5,112 @@
 from typing import Union
 
 
-class Dense(nnx.Module):
+class DenseLayer(nnx.Module):
     """
-    Weight-normalized Dense layer for use in MLP architectures.
-    
-    This layer implements weight normalization as described in:
-    "Weight Normalization: A Simple Reparameterization to Accelerate Training of Deep Neural Networks"
-    by Salimans & Kingma (arXiv:1602.07868)
+    Dense layer with random weight factorization (RWF) for use in MLP architectures.
     
     Note: This is not a KAN layer, but a standard MLP building block used in advanced
     KAN architectures like KKAN (see jaxkan.models module).
 
     Attributes:
-        rngs (nnx.Rngs):
-            Random number generator state.
-        W (nnx.Param):
-            Weight matrix.
         g (nnx.Param):
-            Scale parameter for weight normalization.
-        b (Union[nnx.Param, None]):
-            Bias parameter if add_bias is True, else None.
+            Scale factor vector of shape (n_out,) from the RWF reparameterization.
+        v (nnx.Param):
+            Direction matrix of shape (n_in, n_out) from the RWF reparameterization.
+        b (nnx.Param or None):
+            Bias vector of shape (n_out,), or None if add_bias is False.
+        activation (callable or None):
+            Activation function applied after the linear transformation, or None.
     """
     
-    def __init__(self, n_in: int, n_out: int, init_scheme: str = 'glorot',
+    def __init__(self, n_in: int, n_out: int, activation = None,
+                 RWF: dict = {"mean": 1.0, "std": 0.1},
                  add_bias: bool = True, seed: int = 42):
         """
-        Initializes a Dense layer with weight normalization.
+        Initializes a Dense layer with RWF.
 
         Args:
             n_in (int):
                 Number of input features.
             n_out (int):
                 Number of output features.
-            init_scheme (str):
-                Initialization scheme for weight matrix W. Options:
-                - 'glorot' or 'xavier': Glorot/Xavier normal initialization (default)
-                - 'glorot_uniform': Glorot/Xavier uniform initialization
-                - 'he' or 'kaiming': He/Kaiming normal initialization
-                - 'he_uniform': He/Kaiming uniform initialization
-                - 'lecun': LeCun normal initialization
-                - 'normal': Standard normal initialization
-                - 'uniform': Uniform initialization in [-1, 1]
-            add_bias (bool):
-                Whether to include a bias term.
-            seed (int):
-                Random seed for initialization.
-                
+            activation (callable, optional):
+                Activation function applied after the linear transformation.
+                Defaults to None.
+            RWF (dict, optional):
+                Dictionary with keys ``'mean'`` and ``'std'`` controlling the
+                log-normal scale of the RWF reparameterization.
+                Defaults to ``{"mean": 1.0, "std": 0.1}``.
+            add_bias (bool, optional):
+                Whether to include a learnable bias term. Defaults to True.
+            seed (int, optional):
+                Random seed for parameter initialization. Defaults to 42.
+
         Example:
-            >>> layer = Dense(n_in=64, n_out=32, init_scheme='glorot', add_bias=True, seed=42)
+            >>> layer = DenseLayer(n_in=64, n_out=32, add_bias=True, seed=42)
         """
         # Setup nnx rngs
-        self.rngs = nnx.Rngs(seed)
-        
-        # Get the initializer based on init_scheme
-        initializer = self._get_initializer(init_scheme.lower())
+        rngs = nnx.Rngs(seed)
         
-        # Initialize weight matrix W
-        # Shape: (n_in, n_out)
-        self.W = nnx.Param(initializer(
-            self.rngs.params(), (n_in, n_out), jnp.float32))
-        
-        # Initialize scale parameter g (one per output feature)
-        # Shape: (n_out,)
-        self.g = nnx.Param(jnp.ones((n_out,)))
-        
-        # Initialize bias parameter b
-        # Shape: (n_out,)
+        # Initialize kernel via RWF - shape (n_in, n_out)
+        mu, sigma = RWF["mean"], RWF["std"]
+
+        # Glorot Initialization
+        stddev = jnp.sqrt(2.0/(n_in + n_out))
+
+        # Weight matrix with shape (n_in, n_out)
+        w = nnx.initializers.normal(stddev=stddev)(
+                rngs.params(), (n_in, n_out), jnp.float32
+            )
+
+        # Reparameterization towards g, v
+        g = nnx.initializers.normal(stddev=sigma)(
+                rngs.params(), (n_out,), jnp.float32
+            )
+        g += mu
+        g = jnp.exp(g) # shape (n_out,)
+        v = w/g # shape (n_in, n_out)
+
+        self.g = nnx.Param(g)
+        self.v = nnx.Param(v)
+
+        # Initialize bias - shape (n_out,)
         if add_bias:
             self.b = nnx.Param(jnp.zeros((n_out,)))
         else:
             self.b = None
 
-    def _get_initializer(self, init_scheme: str):
-        """
-        Returns the appropriate initializer based on the scheme name.
-
-        Args:
-            init_scheme (str):
-                Name of the initialization scheme.
-
-        Returns:
-            initializer:
-                An nnx initializer function.
-        """
-        init_map = {
-            'glorot': nnx.initializers.glorot_normal(),
-            'xavier': nnx.initializers.glorot_normal(),
-            'glorot_uniform': nnx.initializers.glorot_uniform(),
-            'xavier_uniform': nnx.initializers.glorot_uniform(),
-            'he': nnx.initializers.he_normal(),
-            'kaiming': nnx.initializers.he_normal(),
-            'he_uniform': nnx.initializers.he_uniform(),
-            'kaiming_uniform': nnx.initializers.he_uniform(),
-            'lecun': nnx.initializers.lecun_normal(),
-            'lecun_uniform': nnx.initializers.lecun_uniform(),
-            'normal': nnx.initializers.normal(stddev=1.0),
-            'uniform': nnx.initializers.uniform(scale=1.0),
-        }
-        
-        if init_scheme not in init_map:
-            raise ValueError(f"Unknown init_scheme: {init_scheme}. "
-                           f"Available options: {list(init_map.keys())}")
+        self.activation = activation
         
-        return init_map[init_scheme]
 
     def __call__(self, x):
         """
-        Forward pass with weight normalization.
-        
-        Computes: y = g * (x @ V) + b, where V = W / ||W||_2 (column-wise)
+        Applies the dense layer to the input.
 
         Args:
-            x (jnp.array):
-                Input tensor, shape (batch, n_in).
+            x (jnp.ndarray):
+                Input array of shape (batch, n_in).
 
         Returns:
-            y (jnp.array):
-                Output tensor, shape (batch, n_out).
-                
+            jnp.ndarray:
+                Output array of shape (batch, n_out).
+
         Example:
-            >>> layer = Dense(n_in=64, n_out=32, seed=42)
-            >>> x = jax.random.uniform(jax.random.key(0), (100, 64))
-            >>> y = layer(x)  # shape: (100, 32)
+            >>> layer = DenseLayer(n_in=4, n_out=2)
+            >>> x = jnp.ones((3, 4))
+            >>> y = layer(x)  # shape: (3, 2)
         """
-        # Weight normalization: V = W / ||W||_2 (column-wise)
-        W_norm = jnp.linalg.norm(self.W, axis=0, keepdims=True)
-        V = self.W / (W_norm + 1e-8)
-        
-        # Compute output: y = g * (x @ V) + b
-        y = self.g * jnp.dot(x, V)
+        # Reconstruct kernel
+        g, v = self.g[...], self.v[...]
+        kernel = g * v
+
+        # Apply kernel and bias
+        y = jnp.dot(x, kernel)
         
         if self.b is not None:
-            y = y + self.b
+            y = y + self.b[...]
+
+        if self.activation is not None:
+            y = self.activation(y)
         
         return y
diff --git a/jaxkan/models/KKAN.py b/jaxkan/models/KKAN.py
@@ -5,7 +5,7 @@
 from typing import Union, List
 
 from ..layers import get_layer
-from ..layers.Dense import Dense
+from ..layers.Dense import DenseLayer
 from ..layers.Chebyshev import Cb
 from .utils import get_activation
 
@@ -112,7 +112,7 @@ class InnerBlock(nnx.Module):
             Activation function.
         input_embedding (ChebyshevEmbedding):
             Chebyshev embedding layer for input.
-        input_layer (Dense):
+        input_layer (DenseLayer):
             Dense layer after input embedding.
         hidden_layers (nnx.List):
             List of hidden Dense layers.
@@ -156,19 +156,19 @@ def __init__(self,
         self.input_embedding = ChebyshevEmbedding(D_e=D_e)
         
         # Input Dense layer: (D_e + 1) -> H
-        self.input_layer = Dense(n_in=D_e + 1, n_out=H, seed=seed)
+        self.input_layer = DenseLayer(n_in=D_e + 1, n_out=H, seed=seed)
         
         # Hidden Dense layers: H -> H
         self.hidden_layers = nnx.List([
-            Dense(n_in=H, n_out=H, seed=seed + i + 1)
+            DenseLayer(n_in=H, n_out=H, seed=seed + i + 1)
             for i in range(L)
         ])
         
         # Output Chebyshev embedding (operates on H-dimensional vector)
         self.output_embedding = ChebyshevEmbedding(D_e=D_e)
         
         # Final Dense layer: H * (D_e + 1) -> m
-        self.output_layer = Dense(n_in=H * (D_e + 1), n_out=m, seed=seed)
+        self.output_layer = DenseLayer(n_in=H * (D_e + 1), n_out=m, seed=seed)
 
 
     def __call__(self, x_p):
diff --git a/pyproject.toml b/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "jaxkan"
-version = "0.3.5"
+version = "0.3.6"
 description = "A JAX implementation of Kolmogorov-Arnold Networks"
 readme = "README.md"
 keywords = ["JAX", "NNX", "KANs", "Kolmogorov-Arnold", "PIKAN"]
diff --git a/tests/layers/test_dense_layer.py b/tests/layers/test_dense_layer.py
@@ -4,7 +4,7 @@
 import jax.numpy as jnp
 from flax import nnx
 
-from jaxkan.layers.Dense import Dense
+from jaxkan.layers.Dense import DenseLayer
 
 
 @pytest.fixture
@@ -26,53 +26,57 @@ def layer_params():
 
 # Tests
 def test_dense_layer_initialization(seed, layer_params):
-    """Test that Dense layer initializes correctly."""
-    layer = Dense(**layer_params, seed=seed)
+    """Test that DenseLayer initializes correctly."""
+    layer = DenseLayer(**layer_params, seed=seed)
     
-    assert layer.W[...].shape == (layer_params["n_in"], layer_params["n_out"]), "W shape incorrect"
+    assert layer.v[...].shape == (layer_params["n_in"], layer_params["n_out"]), "v shape incorrect"
     assert layer.g[...].shape == (layer_params["n_out"],), "g shape incorrect"
     assert layer.b[...].shape == (layer_params["n_out"],), "b shape incorrect"
 
 
 def test_dense_layer_no_bias(seed, layer_params):
-    """Test Dense layer without bias."""
-    layer = Dense(**layer_params, add_bias=False, seed=seed)
+    """Test DenseLayer without bias."""
+    layer = DenseLayer(**layer_params, add_bias=False, seed=seed)
     
     assert layer.b is None, "Bias should be None when add_bias=False"
 
 
 def test_dense_layer_forward_pass(seed, layer_params, x):
     """Test forward pass produces correct output shape."""
-    layer = Dense(**layer_params, seed=seed)
+    layer = DenseLayer(**layer_params, seed=seed)
     y = layer(x)
     
     assert y.shape == (x.shape[0], layer_params["n_out"]), "Forward pass output shape incorrect"
 
 
 def test_dense_layer_weight_normalization(seed, layer_params, x):
-    """Test that weight normalization is applied (columns of V have unit norm)."""
-    layer = Dense(**layer_params, seed=seed)
+    """Test that the RWF kernel is correctly reconstructed as g * v."""
+    layer = DenseLayer(**layer_params, seed=seed)
     
-    # Compute normalized weights as in forward pass
-    W_norm = jnp.linalg.norm(layer.W[...], axis=0, keepdims=True)
-    V = layer.W[...] / (W_norm + 1e-8)
+    # Reconstruct kernel as in forward pass
+    expected = jnp.dot(x, layer.g[...] * layer.v[...]) + layer.b[...]
+    actual = layer(x)
     
-    # Check that each column has approximately unit norm
-    col_norms = jnp.linalg.norm(V, axis=0)
-    assert jnp.allclose(col_norms, 1.0, atol=1e-5), "Normalized weight columns should have unit norm"
+    assert jnp.allclose(actual, expected, atol=1e-5), "RWF kernel reconstruction mismatch"
 
 
 def test_dense_layer_init_schemes(seed, layer_params):
-    """Test different initialization schemes."""
-    schemes = ['glorot', 'he', 'lecun', 'normal', 'uniform']
+    """Test different RWF configurations."""
+    rwf_configs = [
+        {"mean": 1.0, "std": 0.1},
+        {"mean": 0.5, "std": 0.2},
+        {"mean": 2.0, "std": 0.05},
+        {"mean": 0.0, "std": 0.3},
+        {"mean": 1.5, "std": 0.15},
+    ]
     
-    for scheme in schemes:
-        layer = Dense(**layer_params, init_scheme=scheme, seed=seed)
-        assert layer.W[...].shape == (layer_params["n_in"], layer_params["n_out"]), \
-            f"Initialization failed for scheme: {scheme}"
+    for rwf in rwf_configs:
+        layer = DenseLayer(**layer_params, RWF=rwf, seed=seed)
+        assert layer.v[...].shape == (layer_params["n_in"], layer_params["n_out"]), \
+            f"Initialization failed for RWF config: {rwf}"
 
 
 def test_dense_layer_invalid_init_scheme(seed, layer_params):
-    """Test that invalid init_scheme raises ValueError."""
-    with pytest.raises(ValueError, match="Unknown init_scheme"):
-        Dense(**layer_params, init_scheme="invalid_scheme", seed=seed)
+    """Test that an incomplete RWF dict raises KeyError."""
+    with pytest.raises(KeyError):
+        DenseLayer(**layer_params, RWF={"mean": 1.0}, seed=seed)  # missing 'std'