Fix chain rule in HopeNetwork backward pass

claude · claude · commit b22a20962031 · 2025-11-11T17:22:29.000Z
The backward pass was incorrectly breaking the chain rule by:
- Iterating over learning levels instead of actual CMS blocks
- Using modulo indexing (level % _numCMSLevels) which broke gradient flow
- Reusing the same gradient for all blocks instead of chaining them
- Accumulating gradients incorrectly

Fixed by:
- Processing context flow gradients in reverse, accumulating them into upstream gradient
- Iterating CMS blocks in reverse order (last to first) without modulo
- Properly chaining gradients: each block receives accumulated gradient from previous block
- Returning final chained gradient as true derivative w.r.t. HOPE input

This ensures proper backpropagation through the entire HOPE architecture.
diff --git a/src/NeuralNetworks/HopeNetwork.cs b/src/NeuralNetworks/HopeNetwork.cs
@@ -174,28 +174,28 @@ public Tensor<T> Backward(Tensor<T> outputGradient)
             gradient = _recurrentLayers[i].Backward(gradient);
         }
 
-        // Backprop through CMS blocks and context flow
-        Tensor<T>? totalGradient = null;
-
+        // Backprop through context flow levels (applied after CMS blocks in forward pass)
+        // Context flow blended with the output of CMS blocks, so we propagate gradients through
         for (int level = _inContextLearningLevels - 1; level >= 0; level--)
         {
-            // Compute context flow gradients
+            // Compute and accumulate context flow gradients for this level
             var contextGrad = _contextFlow.ComputeContextGradients(gradient.ToVector(), level);
+            var contextTensor = new Tensor<T>(new[] { _hiddenDim }, contextGrad);
 
-            int cmsIndex = level % _numCMSLevels;
-            var cmsGrad = _cmsBlocks[cmsIndex].Backward(gradient);
+            // Add context gradient to current upstream gradient (blending was additive in forward)
+            gradient = AddTensors(gradient, contextTensor);
+        }
 
-            if (totalGradient == null)
-            {
-                totalGradient = cmsGrad;
-            }
-            else
-            {
-                totalGradient = AddTensors(totalGradient, cmsGrad);
-            }
+        // Backprop through CMS blocks in reverse order (no modulo - proper chain rule)
+        // Each block receives the accumulated gradient from the previous block
+        for (int i = _numCMSLevels - 1; i >= 0; i--)
+        {
+            // Pass combined gradient to this CMS block's backward
+            gradient = _cmsBlocks[i].Backward(gradient);
+            // gradient now contains the downstream gradient for the next (previous) block
         }
 
-        return totalGradient!;
+        return gradient;
     }
 
     /// <summary>