update reinforce synapse and testing

rxng8 · rxng8 · commit b154e4e8d4f3 · 2025-04-04T23:58:49.000-04:00
diff --git a/ngclearn/components/synapses/modulated/REINFORCESynapse.py b/ngclearn/components/synapses/modulated/REINFORCESynapse.py
@@ -15,7 +15,7 @@ class REINFORCESynapse(DenseSynapse):
 
     # Define Functions
     def __init__(
-            self, name, shape, eta=1e-4, weight_init=None, resist_scale=1., act_fx=None,
+            self, name, shape, eta=1e-4, decay=0.99, weight_init=None, resist_scale=1., act_fx=None,
             p_conn=1., w_bound=1., batch_size=1, **kwargs
     ):
         # This is because we have weights mu and weight log sigma
@@ -37,7 +37,8 @@ def __init__(
         self.rewards = Compartment(jnp.zeros((batch_size,))) # the normalized reward (r - r_hat), input compartment
         self.act_fx, self.dact_fx = create_function(act_fx if act_fx is not None else "identity")
         # self.seed = Component(seed)
-
+        self.accumulated_gradients = Compartment(jnp.zeros((input_dim, output_dim * 2)))
+        self.decay = decay
 
     @staticmethod
     def _compute_update(dt, inputs, rewards, act_fx, weights):
@@ -72,9 +73,9 @@ def _compute_update(dt, inputs, rewards, act_fx, weights):
         # Finally, return metrics if needed
         return dW, objective, outputs
 
-    @transition(output_compartments=["weights", "dWeights", "objective", "outputs"])
+    @transition(output_compartments=["weights", "dWeights", "objective", "outputs", "accumulated_gradients"])
     @staticmethod
-    def evolve(dt, w_bound, inputs, rewards, act_fx, weights, eta):
+    def evolve(dt, w_bound, inputs, rewards, act_fx, weights, eta, decay, accumulated_gradients):
         dWeights, objective, outputs = REINFORCESynapse._compute_update(
             dt, inputs, rewards, act_fx, weights
         )
@@ -83,9 +84,10 @@ def evolve(dt, w_bound, inputs, rewards, act_fx, weights, eta):
         ## enforce non-negativity
         eps = 0.01 # 0.001
         weights = jnp.clip(weights, eps, w_bound - eps)  # jnp.abs(w_bound))
-        return weights, dWeights, objective, outputs
+        accumulated_gradients = accumulated_gradients * decay + dWeights
+        return weights, dWeights, objective, outputs, accumulated_gradients
 
-    @transition(output_compartments=["inputs", "outputs", "objective", "rewards", "dWeights"])
+    @transition(output_compartments=["inputs", "outputs", "objective", "rewards", "dWeights", "accumulated_gradients"])
     @staticmethod
     def reset(batch_size, shape):
         preVals = jnp.zeros((batch_size, shape[0]))
@@ -95,7 +97,8 @@ def reset(batch_size, shape):
         objective = jnp.zeros(())
         rewards = jnp.zeros((batch_size,))
         dWeights = jnp.zeros(shape)
-        return inputs, outputs, objective, rewards, dWeights
+        accumulated_gradients = jnp.zeros((shape[0], shape[1] * 2))
+        return inputs, outputs, objective, rewards, dWeights, accumulated_gradients
 
     @classmethod
     def help(cls): ## component help function
@@ -110,8 +113,8 @@ def help(cls): ## component help function
         }
         info = {cls.__name__: properties,
                 "compartments": compartment_props,
-                "dynamics": "outputs = [(W * Rscale) * inputs] ;"
-                            "dW_{ij}/dt = A_plus * (z_j - x_tar) * s_i - A_minus * s_j * z_i",
+                # "dynamics": "outputs = [(W * Rscale) * inputs] ;"
+                #             "dW_{ij}/dt = A_plus * (z_j - x_tar) * s_i - A_minus * s_j * z_i",
                 "hyperparameters": hyperparams}
         return info
 
diff --git a/tests/components/synapses/modulated/test_REINFORCESynapse.py b/tests/components/synapses/modulated/test_REINFORCESynapse.py
@@ -1,5 +1,6 @@
 # %%
 
+import jax
 from jax import numpy as jnp, random, jit
 from ngcsimlib.context import Context
 import numpy as np
@@ -13,6 +14,9 @@
 from ngcsimlib.compartment import Compartment
 from ngcsimlib.context import Context
 
+import jax
+import jax.numpy as jnp
+
 def test_REINFORCESynapse1():
     name = "reinforce_ctx"
     ## create seeding keys
@@ -40,16 +44,50 @@ def clamp_inputs(x):
         def clamp_rewards(x):
             a.rewards.set(x)
 
+        @Context.dynamicCommand
+        def clamp_weights(x):
+            a.weights.set(x)
+
     # a.weights.set(jnp.ones((1, 1)) * 0.1)
 
     ## check pre-synaptic STDP only
     # truth = jnp.array([[1.25]])
     ctx.reset()
+    clamp_weights(jnp.ones((1, 2)))
     clamp_rewards(jnp.ones((1, 1)))
     clamp_inputs(jnp.ones((1, 1)))
     ctx.adapt(t=1., dt=dt)
     # assert_array_equal(a.dWeights.value, truth)
     print(a.dWeights.value)
 
-#test_REINFORCESynapse1()
 
+    # JAX Grad output
+    _act = jax.nn.tanh
+    def fn(params: dict, inputs: jax.Array, outputs: jax.Array, seed):
+        W_mu, W_logstd = params
+        mean = _act(inputs) @ W_mu
+        logstd = _act(inputs) @ W_logstd
+        std = jnp.exp(logstd.clip(-10.0, 2.0))
+        sample = jax.random.normal(seed, mean.shape) * std + mean
+        # logp = gaussian_logpdf(sample, mean, std).sum(-1)
+        logp = jax.scipy.stats.norm.logpdf(sample, mean, std).sum(-1)
+        return (-logp * outputs).mean() * 1e-2
+    grad_fn = jax.value_and_grad(fn)
+
+    weights_mu = jnp.ones((1, 1))
+    weights_logstd = jnp.ones((1, 1))
+    inputs = jnp.ones((1, 1))
+    outputs = jnp.ones((1, 1))
+    objective, grads = grad_fn(
+        (weights_mu, weights_logstd),
+        inputs,
+        outputs,
+        jax.random.key(42)
+    )
+    np.testing.assert_allclose(
+        a.dWeights.value[0],
+        jnp.concatenate([grads[0], grads[1]], axis=-1),
+        atol=1e-2
+    ) # NOTE: gradient is not exact due to different gradient computation, we need to inspect more closely
+
+# test_REINFORCESynapse1()