update code

rxng8 · rxng8 · commit 1dc4bdaa159d · 2025-04-12T15:47:00.000-04:00
diff --git a/ngclearn/components/synapses/modulated/REINFORCESynapse.py b/ngclearn/components/synapses/modulated/REINFORCESynapse.py
@@ -67,7 +67,8 @@ class REINFORCESynapse(DenseSynapse):
     # Define Functions
     def __init__(
             self, name, shape, eta=1e-4, decay=0.99, weight_init=None, resist_scale=1., act_fx=None,
-            p_conn=1., w_bound=1., batch_size=1, seed=None, mu_act_fx=None, mu_out_min=-jnp.inf, mu_out_max=jnp.inf, **kwargs
+            p_conn=1., w_bound=1., batch_size=1, seed=None, mu_act_fx=None, mu_out_min=-jnp.inf, mu_out_max=jnp.inf,
+            scalar_stddev=-1.0, **kwargs
     ) -> None:
         # This is because we have weights mu and weight log sigma
         input_dim, output_dim = shape
@@ -84,6 +85,7 @@ def __init__(
         self.mu_act_fx, self.dmu_act_fx = create_function(mu_act_fx if mu_act_fx is not None else "identity")
         self.mu_out_min = mu_out_min
         self.mu_out_max = mu_out_max
+        self.scalar_stddev = scalar_stddev
 
         ## Compartment setup
         self.dWeights = Compartment(self.weights.value * 0)
@@ -99,7 +101,8 @@ def __init__(
         self.seed = Compartment(jax.random.PRNGKey(seed if seed is not None else 42))
 
     @staticmethod
-    def _compute_update(dt, inputs, rewards, act_fx, weights, seed, mu_act_fx, dmu_act_fx, mu_out_min, mu_out_max):
+    def _compute_update(dt, inputs, rewards, act_fx, weights, seed, mu_act_fx, dmu_act_fx, mu_out_min, mu_out_max, scalar_stddev):
+        learning_stddev_mask = jnp.asarray(scalar_stddev <= 0.0, dtype=jnp.float32)
         # (input_dim, output_dim * 2) => (input_dim, output_dim), (input_dim, output_dim)
         W_mu, W_logstd = jnp.split(weights, 2, axis=-1)
         # Forward pass
@@ -109,6 +112,7 @@ def _compute_update(dt, inputs, rewards, act_fx, weights, seed, mu_act_fx, dmu_a
         logstd = activation @ W_logstd
         clip_logstd = jnp.clip(logstd, -10.0, 2.0)
         std = jnp.exp(clip_logstd)
+        std = learning_stddev_mask * std + (1.0 - learning_stddev_mask) * scalar_stddev # masking trick
         # Sample using reparameterization trick
         epsilon = jax.random.normal(seed, fx_mean.shape)
         sample = epsilon * std + fx_mean
@@ -139,6 +143,7 @@ def _compute_update(dt, inputs, rewards, act_fx, weights, seed, mu_act_fx, dmu_a
             dL_dstd * std
         )
         dL_dWlogstd = activation.T @ dL_dlogstd # (I, B) @ (B, A) = (I, A)
+        dL_dWlogstd = dL_dWlogstd * learning_stddev_mask # there is no learning for the scalar stddev
 
         # Update weights, negate the gradient because gradient ascent in ngc-learn
         dW = jnp.concatenate([-dL_dWmu, -dL_dWlogstd], axis=-1)
@@ -147,10 +152,10 @@ def _compute_update(dt, inputs, rewards, act_fx, weights, seed, mu_act_fx, dmu_a
 
     @transition(output_compartments=["weights", "dWeights", "objective", "outputs", "accumulated_gradients", "step_count", "seed"])
     @staticmethod
-    def evolve(dt, w_bound, inputs, rewards, act_fx, weights, eta, learning_mask, decay, accumulated_gradients, step_count, seed, mu_act_fx, dmu_act_fx, mu_out_min, mu_out_max):
+    def evolve(dt, w_bound, inputs, rewards, act_fx, weights, eta, learning_mask, decay, accumulated_gradients, step_count, seed, mu_act_fx, dmu_act_fx, mu_out_min, mu_out_max, scalar_stddev):
         main_seed, sub_seed = jax.random.split(seed)
         dWeights, objective, outputs = REINFORCESynapse._compute_update(
-            dt, inputs, rewards, act_fx, weights, sub_seed, mu_act_fx, dmu_act_fx, mu_out_min, mu_out_max
+            dt, inputs, rewards, act_fx, weights, sub_seed, mu_act_fx, dmu_act_fx, mu_out_min, mu_out_max, scalar_stddev
         )
         ## do a gradient ascent update/shift
         weights = (weights + dWeights * eta) * learning_mask + weights * (1.0 - learning_mask) # update the weights only where learning_mask is 1.0
diff --git a/tests/components/synapses/modulated/test_REINFORCESynapse.py b/tests/components/synapses/modulated/test_REINFORCESynapse.py
@@ -18,6 +18,7 @@
 import jax.numpy as jnp
 
 def test_REINFORCESynapse1():
+    # Testing reinforce synapse with learning stddev
     name = "reinforce_ctx"
     ## create seeding keys
     np.random.seed(42)
@@ -34,7 +35,8 @@ def test_REINFORCESynapse1():
         a = REINFORCESynapse(
             name="a", shape=(1,1), decay=decay,
             act_fx="tanh", key=subkeys[0], seed=initial_seed,
-            mu_act_fx="tanh", mu_out_min=mu_out_min, mu_out_max=mu_out_max
+            mu_act_fx="tanh", mu_out_min=mu_out_min, mu_out_max=mu_out_max,
+            scalar_stddev=-1.0
         )
 
         evolve_process = (Process("evolve_proc") >> a.evolve)
@@ -137,3 +139,127 @@ def fn(params: dict, inputs: jax.Array, outputs: jax.Array, seed: jax.Array):
 
 # test_REINFORCESynapse1()
 
+
+def test_REINFORCESynapse2():
+    # Testing reinforce synapse with scalar stddev = 2.0
+    name = "reinforce_ctx"
+    ## create seeding keys
+    np.random.seed(42)
+    dkey = random.PRNGKey(1234)
+    dkey, *subkeys = random.split(dkey, 6)
+    dt = 1.  # ms
+    decay = 0.99
+    initial_seed = 42
+    mu_out_min = -jnp.inf
+    mu_out_max = jnp.inf
+    scalar_stddev = 2.0
+
+    # ---- build a simple Poisson cell system ----
+    with Context(name) as ctx:
+        a = REINFORCESynapse(
+            name="a", shape=(1,1), decay=decay,
+            act_fx="tanh", key=subkeys[0], seed=initial_seed,
+            mu_act_fx="tanh", mu_out_min=mu_out_min, mu_out_max=mu_out_max,
+            scalar_stddev=scalar_stddev
+        )
+
+        evolve_process = (Process("evolve_proc") >> a.evolve)
+        ctx.wrap_and_add_command(jit(evolve_process.pure), name="adapt")
+
+        reset_process = (Process("reset_proc") >> a.reset)
+        ctx.wrap_and_add_command(jit(reset_process.pure), name="reset")
+
+        @Context.dynamicCommand
+        def clamp_inputs(x):
+            a.inputs.set(x)
+
+        @Context.dynamicCommand
+        def clamp_rewards(x):
+            assert x.ndim == 1, "Rewards must be a 1D array"
+            a.rewards.set(x)
+
+        @Context.dynamicCommand
+        def clamp_weights(x):
+            a.weights.set(x)
+
+    # Function definition
+    _act = jax.nn.tanh
+    def fn(params: dict, inputs: jax.Array, outputs: jax.Array, seed: jax.Array):
+        W_mu, W_logstd = params
+        activation = _act(inputs)
+        mean = activation @ W_mu
+        mean = jax.nn.tanh(mean)
+        # logstd = activation @ W_logstd
+        # std = jnp.exp(logstd.clip(-10.0, 2.0))
+        std = scalar_stddev
+        sample = jax.random.normal(seed, mean.shape) * std + mean
+        sample = jnp.clip(sample, mu_out_min, mu_out_max)
+        logp = gaussian_logpdf(jax.lax.stop_gradient(sample), mean, std).sum(-1)
+        return (-logp * outputs).mean() * 1e-2
+    grad_fn = jax.value_and_grad(fn)
+
+    # Some setups
+    expected_seed = jax.random.PRNGKey(initial_seed)
+    expected_weights_mu = jnp.asarray([[0.13]])
+    expected_weights_logstd = jnp.asarray([[0.04]])
+    expected_weights = jnp.concatenate([expected_weights_mu, expected_weights_logstd], axis=-1)
+    initial_ngclearn_weights = jnp.concatenate([expected_weights_mu, expected_weights_logstd], axis=-1)[None]
+    expected_gradient_list = []
+    ctx.reset()
+
+    # Loop through 3 steps
+    for step in range(10):
+        expected_seed, expected_subseed = jax.random.split(expected_seed)
+
+        # ---------------- Step {step} --------------------
+        print(f"------------ [Step {step}] ------------")
+        inputs = -1**step * jnp.ones((1, 1)) / 10  # * 0.5 * step / 10.0
+        outputs = -1**step * jnp.ones((1,)) / 10 # * 3 * step / 10.0# reward
+        # --------- ngclearn ---------
+        clamp_weights(initial_ngclearn_weights)
+        clamp_rewards(outputs)
+        clamp_inputs(inputs)
+        ctx.adapt(t=1., dt=dt)
+        print(f"[ngclearn] objective: {a.objective.value}")
+        print(f"[ngclearn] weights: {a.weights.value}")
+        print(f"[ngclearn] dWeights: {a.dWeights.value}")
+        print(f"[ngclearn] step_count: {a.step_count.value}")
+        print(f"[ngclearn] accumulated_gradients: {a.accumulated_gradients.value}")
+        # -------- Expectation ---------
+        print("--------------")
+        expected_objective, expected_grads = grad_fn(
+            (expected_weights_mu, expected_weights_logstd),
+            inputs,
+            outputs,
+            expected_subseed
+        )
+        # NOTE: Viet: negate the gradient because gradient in ngc-learn
+        #   is gradient ascent, while gradient in JAX is gradient descent
+        expected_grads = -jnp.concatenate([expected_grads[0], expected_grads[1]], axis=-1)
+        expected_gradient_list.append(expected_grads)
+        print(f"[Expectation] expected_weights: {expected_weights}")
+        print(f"[Expectation] dWeights: {expected_grads}")
+        print(f"[Expectation] objective: {expected_objective}")
+        np.testing.assert_allclose(
+            a.dWeights.value[0],
+            expected_grads,
+            atol=1e-8
+        )
+        np.testing.assert_allclose(
+            a.objective.value,
+            expected_objective,
+            atol=1e-8
+        )
+        print()
+
+    # Finally, check if the accumulated gradients are correct
+    decay_list = jnp.asarray([decay**i for i in range(len(expected_gradient_list))])[::-1]
+    expected_accumulated_gradients = jnp.mean(jnp.stack(expected_gradient_list, 0) * decay_list[:, None, None], axis=0)
+    np.testing.assert_allclose(
+        a.accumulated_gradients.value[0],
+        expected_accumulated_gradients,
+        atol=1e-9
+    )
+
+# test_REINFORCESynapse2()
+