push reinforce synapse

rxng8 · rxng8 · commit 7dbaccd87cde · 2025-04-03T20:49:04.000-04:00
diff --git a/ngclearn/components/synapses/hebbian/REINFORCESynapse.py b/ngclearn/components/synapses/hebbian/REINFORCESynapse.py
@@ -0,0 +1,130 @@
+from jax import random, numpy as jnp, jit
+from ngcsimlib.compilers.process import transition
+from ngcsimlib.component import Component
+from ngcsimlib.compartment import Compartment
+import jax
+import jax.numpy as jnp
+import numpy as np
+
+from ngclearn.components.synapses import DenseSynapse
+from ngclearn.utils import tensorstats
+from ngclearn.utils.model_utils import create_function
+
+
+class REINFORCESynapse(DenseSynapse):
+
+    # Define Functions
+    def __init__(
+            self, name, shape, eta=1e-4, weight_init=None, resist_scale=1., act_fx=None,
+            p_conn=1., w_bound=1., batch_size=1, **kwargs
+    ):
+        # This is because we have weights mu and weight log sigma
+        input_dim, output_dim = shape
+        super().__init__(name, (input_dim, output_dim * 2), weight_init, None, resist_scale,
+                         p_conn, batch_size=batch_size, **kwargs)
+
+        ## Synaptic hyper-parameters
+        self.shape = shape ## shape of synaptic efficacy matrix
+        self.Rscale = resist_scale ## post-transformation scale factor
+        self.w_bound = w_bound #1. ## soft weight constraint
+        self.eta = eta ## learning rate
+
+        ## Compartment setup
+        self.dWeights = Compartment(self.weights.value * 0)
+        # self.eta = Compartment(jnp.ones((1, 1)) * eta) ## global learning rate # For eligiblity traces later
+        self.objective = Compartment(jnp.zeros(()))
+        self.outputs = Compartment(jnp.zeros((batch_size, output_dim)))
+        self.rewards = Compartment(jnp.zeros((batch_size,))) # the normalized reward (r - r_hat), input compartment
+        self.act_fx, self.dact_fx = create_function(act_fx if act_fx is not None else "identity")
+        # self.seed = Component(seed)
+
+
+    @staticmethod
+    def _compute_update(dt, inputs, rewards, act_fx, weights):
+        W_mu, W_logstd = jnp.split(weights, 2, axis=-1) # (input_dim, output_dim * 2) => (input_dim, output_dim), (input_dim, output_dim)
+        # Forward pass
+        activation = act_fx(inputs)
+        mean = activation @ W_mu
+        logstd = activation @ W_logstd
+        std = jnp.exp(logstd.clip(-10.0, 2.0))
+        # Sample using reparameterization trick
+        epsilon = jnp.asarray(np.random.normal(0, 1, mean.shape))
+        sample = epsilon * std + mean
+        outputs = sample # the actual action that we take
+        # Compute log probability density of the Gaussian
+        log_prob = -0.5 * jnp.log(2 * jnp.pi) - logstd - 0.5 * ((sample - mean) / std) ** 2
+        log_prob = log_prob.sum(-1)
+        # Compute objective (negative REINFORCE objective)
+        objective = (-log_prob * rewards).mean() * 1e-2
+        # Backward pass
+        # Compute gradients manually based on the derivation
+        # dL/dmu = -(r-r_hat) * dlog_prob/dmu = -(r-r_hat) * (sample-mu)/sigma^2
+        dlog_prob_dmean = (sample - mean) / (std ** 2)
+        # dL/dlog(sigma) = -(r-r_hat) * dlog_prob/dlog(sigma) = -(r-r_hat) * (((sample-mu)/sigma)^2 - 1)
+        dlog_prob_dlogstd = ((sample - mean) / std) ** 2 - 1.0
+        # Compute gradients with respect to weights
+        # Using chain rule: dL/dW_mu = dL/dmu * dmu/dW_mu = dL/dmu * activation^T
+        # Similarly for W_logstd
+        dL_dWmu = activation.T @ (-rewards[:, None] * dlog_prob_dmean) * 1e-2
+        dL_dWlstd = activation.T @ (-rewards[:, None] * dlog_prob_dlogstd) * 1e-2
+        # Update weights
+        dW = jnp.concatenate([dL_dWmu, dL_dWlstd], axis=-1)
+        # Finally, return metrics if needed
+        return dW, objective, outputs
+
+    @transition(output_compartments=["weights", "dWeights", "objective", "outputs"])
+    @staticmethod
+    def evolve(dt, w_bound, inputs, rewards, act_fx, weights, eta):
+        dWeights, objective, outputs = REINFORCESynapse._compute_update(
+            dt, inputs, rewards, act_fx, weights
+        )
+        ## do a gradient ascent update/shift
+        weights = weights + dWeights * eta
+        ## enforce non-negativity
+        eps = 0.01 # 0.001
+        weights = jnp.clip(weights, eps, w_bound - eps)  # jnp.abs(w_bound))
+        return weights, dWeights, objective, outputs
+
+    @transition(output_compartments=["inputs", "outputs", "objective", "rewards", "dWeights"])
+    @staticmethod
+    def reset(batch_size, shape):
+        preVals = jnp.zeros((batch_size, shape[0]))
+        postVals = jnp.zeros((batch_size, shape[1]))
+        inputs = preVals
+        outputs = postVals
+        objective = jnp.zeros(())
+        rewards = jnp.zeros((batch_size,))
+        dWeights = jnp.zeros(shape)
+        return inputs, outputs, objective, rewards, dWeights
+
+    @classmethod
+    def help(cls): ## component help function
+        properties = {
+
+        }
+        compartment_props = {
+
+        }
+        hyperparams = {
+
+        }
+        info = {cls.__name__: properties,
+                "compartments": compartment_props,
+                "dynamics": "outputs = [(W * Rscale) * inputs] ;"
+                            "dW_{ij}/dt = A_plus * (z_j - x_tar) * s_i - A_minus * s_j * z_i",
+                "hyperparameters": hyperparams}
+        return info
+
+    def __repr__(self):
+        comps = [varname for varname in dir(self) if Compartment.is_compartment(getattr(self, varname))]
+        maxlen = max(len(c) for c in comps) + 5
+        lines = f"[{self.__class__.__name__}] PATH: {self.name}\n"
+        for c in comps:
+            stats = tensorstats(getattr(self, c).value)
+            if stats is not None:
+                line = [f"{k}: {v}" for k, v in stats.items()]
+                line = ", ".join(line)
+            else:
+                line = "None"
+            lines += f"  {f'({c})'.ljust(maxlen)}{line}\n"
+        return lines
diff --git a/tests/components/synapses/hebbian/test_REINFORCESynapse.py b/tests/components/synapses/hebbian/test_REINFORCESynapse.py
@@ -0,0 +1,54 @@
+# %%
+
+from jax import numpy as jnp, random, jit
+from ngcsimlib.context import Context
+import numpy as np
+np.random.seed(42)
+from ngclearn.components.synapses.hebbian.REINFORCESynapse import REINFORCESynapse
+from ngcsimlib.compilers import compile_command, wrap_command
+from numpy.testing import assert_array_equal
+
+from ngcsimlib.compilers.process import Process, transition
+from ngcsimlib.component import Component
+from ngcsimlib.compartment import Compartment
+from ngcsimlib.context import Context
+
+def test_REINFORCESynapse1():
+    name = "reinforce_ctx"
+    ## create seeding keys
+    dkey = random.PRNGKey(1234)
+    dkey, *subkeys = random.split(dkey, 6)
+    dt = 1.  # ms
+    # ---- build a simple Poisson cell system ----
+    with Context(name) as ctx:
+        a = REINFORCESynapse(
+            name="a", shape=(1,1), act_fx="tanh", key=subkeys[0]
+        )
+
+        evolve_process = (Process() >> a.evolve)
+        ctx.wrap_and_add_command(jit(evolve_process.pure), name="adapt")
+
+        reset_process = (Process() >> a.reset)
+        ctx.wrap_and_add_command(jit(reset_process.pure), name="reset")
+
+        @Context.dynamicCommand
+        def clamp_inputs(x):
+            a.inputs.set(x)
+
+        @Context.dynamicCommand
+        def clamp_rewards(x):
+            a.rewards.set(x)
+
+    # a.weights.set(jnp.ones((1, 1)) * 0.1)
+
+    ## check pre-synaptic STDP only
+    # truth = jnp.array([[1.25]])
+    ctx.reset()
+    clamp_rewards(jnp.ones((1, 1)))
+    clamp_inputs(jnp.ones((1, 1)))
+    ctx.adapt(t=1., dt=dt)
+    # assert_array_equal(a.dWeights.value, truth)
+    print(a.dWeights.value)
+
+# test_REINFORCESynapse1()
+