update code and test

rxng8 · rxng8 · commit 68d2435eab79 · 2025-04-12T09:56:13.000-04:00
diff --git a/ngclearn/components/synapses/modulated/REINFORCESynapse.py b/ngclearn/components/synapses/modulated/REINFORCESynapse.py
@@ -67,7 +67,7 @@ class REINFORCESynapse(DenseSynapse):
     # Define Functions
     def __init__(
             self, name, shape, eta=1e-4, decay=0.99, weight_init=None, resist_scale=1., act_fx=None,
-            p_conn=1., w_bound=1., batch_size=1, seed=None, mu_act_fx=None, **kwargs
+            p_conn=1., w_bound=1., batch_size=1, seed=None, mu_act_fx=None, mu_out_min=-jnp.inf, mu_out_max=jnp.inf, **kwargs
     ) -> None:
         # This is because we have weights mu and weight log sigma
         input_dim, output_dim = shape
@@ -82,6 +82,8 @@ def __init__(
         # self.out_min = out_min
         # self.out_max = out_max
         self.mu_act_fx, self.dmu_act_fx = create_function(mu_act_fx if mu_act_fx is not None else "identity")
+        self.mu_out_min = mu_out_min
+        self.mu_out_max = mu_out_max
 
         ## Compartment setup
         self.dWeights = Compartment(self.weights.value * 0)
@@ -97,7 +99,7 @@ def __init__(
         self.seed = Compartment(jax.random.PRNGKey(seed if seed is not None else 42))
 
     @staticmethod
-    def _compute_update(dt, inputs, rewards, act_fx, weights, seed, mu_act_fx, dmu_act_fx):
+    def _compute_update(dt, inputs, rewards, act_fx, weights, seed, mu_act_fx, dmu_act_fx, mu_out_min, mu_out_max):
         # (input_dim, output_dim * 2) => (input_dim, output_dim), (input_dim, output_dim)
         W_mu, W_logstd = jnp.split(weights, 2, axis=-1)
         # Forward pass
@@ -110,6 +112,7 @@ def _compute_update(dt, inputs, rewards, act_fx, weights, seed, mu_act_fx, dmu_a
         # Sample using reparameterization trick
         epsilon = jax.random.normal(seed, fx_mean.shape)
         sample = epsilon * std + fx_mean
+        sample = jnp.clip(sample, mu_out_min, mu_out_max)
         outputs = sample # the actual action that we take
         # Compute log probability density of the Gaussian
         log_prob = gaussian_logpdf(sample, fx_mean, std).sum(-1)
@@ -144,10 +147,10 @@ def _compute_update(dt, inputs, rewards, act_fx, weights, seed, mu_act_fx, dmu_a
 
     @transition(output_compartments=["weights", "dWeights", "objective", "outputs", "accumulated_gradients", "step_count", "seed"])
     @staticmethod
-    def evolve(dt, w_bound, inputs, rewards, act_fx, weights, eta, learning_mask, decay, accumulated_gradients, step_count, seed, mu_act_fx, dmu_act_fx):
+    def evolve(dt, w_bound, inputs, rewards, act_fx, weights, eta, learning_mask, decay, accumulated_gradients, step_count, seed, mu_act_fx, dmu_act_fx, mu_out_min, mu_out_max):
         main_seed, sub_seed = jax.random.split(seed)
         dWeights, objective, outputs = REINFORCESynapse._compute_update(
-            dt, inputs, rewards, act_fx, weights, sub_seed, mu_act_fx, dmu_act_fx
+            dt, inputs, rewards, act_fx, weights, sub_seed, mu_act_fx, dmu_act_fx, mu_out_min, mu_out_max
         )
         ## do a gradient ascent update/shift
         weights = (weights + dWeights * eta) * learning_mask + weights * (1.0 - learning_mask) # update the weights only where learning_mask is 1.0
diff --git a/tests/components/synapses/modulated/test_REINFORCESynapse.py b/tests/components/synapses/modulated/test_REINFORCESynapse.py
@@ -26,13 +26,15 @@ def test_REINFORCESynapse1():
     dt = 1.  # ms
     decay = 0.99
     initial_seed = 42
+    mu_out_min = -jnp.inf
+    mu_out_max = jnp.inf
 
     # ---- build a simple Poisson cell system ----
     with Context(name) as ctx:
         a = REINFORCESynapse(
             name="a", shape=(1,1), decay=decay,
             act_fx="tanh", key=subkeys[0], seed=initial_seed,
-            mu_act_fx="tanh"
+            mu_act_fx="tanh", mu_out_min=mu_out_min, mu_out_max=mu_out_max
         )
 
         evolve_process = (Process("evolve_proc") >> a.evolve)
@@ -64,6 +66,7 @@ def fn(params: dict, inputs: jax.Array, outputs: jax.Array, seed: jax.Array):
         logstd = activation @ W_logstd
         std = jnp.exp(logstd.clip(-10.0, 2.0))
         sample = jax.random.normal(seed, mean.shape) * std + mean
+        sample = jnp.clip(sample, mu_out_min, mu_out_max)
         logp = gaussian_logpdf(jax.lax.stop_gradient(sample), mean, std).sum(-1)
         return (-logp * outputs).mean() * 1e-2
     grad_fn = jax.value_and_grad(fn)