fix: ShapeKey zero-alloc lookup + revert 3 more state buffer Rent calls

ooples · claude · ooples · commit 3d19820a17d2 · 2026-04-05T01:08:25.000-04:00
- ShapeKey: only defensive-copy in EnsureCapacity (storage), not in Rent
  (hot-path lookup). Eliminates per-Rent allocation from Clone().
- GatedDeltaNetLayer, GatedDeltaProductLayer, KimiLinearAttentionLayer:
  reverted state/allStates buffer allocations back to new Tensor&lt;T&gt;()
  for guaranteed zero initialization of recurrent state.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/src/Memory/ForwardArena.cs b/src/Memory/ForwardArena.cs
@@ -86,7 +86,7 @@ public void Reset()
     /// </summary>
     public void EnsureCapacity(int[] shape, int count)
     {
-        var key = new ShapeKey(shape);
+        var key = new ShapeKey(shape, defensiveCopy: true);
         if (_slabs.TryGetValue(key, out var existing) && existing.Length >= count)
             return;
 
@@ -138,9 +138,9 @@ private Tensor<T> GrowAndRent(ShapeKey key, int[] shape, bool clear = true)
     private readonly int _hash;
     private readonly int[] _dims;
 
-    public ShapeKey(int[] shape)
+    public ShapeKey(int[] shape, bool defensiveCopy = false)
     {
-        _dims = (int[])shape.Clone();
+        _dims = defensiveCopy ? (int[])shape.Clone() : shape;
         unchecked
         {
             int hash = (int)2166136261;
diff --git a/src/NeuralNetworks/Layers/SSM/GatedDeltaNetLayer.cs b/src/NeuralNetworks/Layers/SSM/GatedDeltaNetLayer.cs
@@ -343,8 +343,8 @@ private Tensor<T> DeltaRuleForward(
         var output = TensorAllocator.Rent<T>(new[] { batchSize, seqLen, _modelDimension });
 
         // State matrix per head: [batch, numHeads, headDim, headDim]
-        var state = TensorAllocator.Rent<T>(new[] { batchSize, _numHeads, _headDimension, _headDimension });
-        var allStates = TensorAllocator.Rent<T>(new[] { batchSize, seqLen + 1, _numHeads, _headDimension, _headDimension });
+        var state = new Tensor<T>(new[] { batchSize, _numHeads, _headDimension, _headDimension });
+        var allStates = new Tensor<T>(new[] { batchSize, seqLen + 1, _numHeads, _headDimension, _headDimension });
         T keyScale = NumOps.FromDouble(1.0 / Math.Sqrt(_headDimension));
 
         for (int t = 0; t < seqLen; t++)
diff --git a/src/NeuralNetworks/Layers/SSM/GatedDeltaProductLayer.cs b/src/NeuralNetworks/Layers/SSM/GatedDeltaProductLayer.cs
@@ -434,8 +434,8 @@ private Tensor<T> GatedDeltaProductRecurrence(
         Tensor<T> hVecs, int batchSize, int seqLen)
     {
         var output = TensorAllocator.Rent<T>(new[] { batchSize, seqLen, _modelDimension });
-        var state = TensorAllocator.Rent<T>(new[] { batchSize, _numHeads, _headDimension, _headDimension });
-        var allStates = TensorAllocator.Rent<T>(new[] { batchSize, seqLen + 1, _numHeads, _headDimension, _headDimension });
+        var state = new Tensor<T>(new[] { batchSize, _numHeads, _headDimension, _headDimension });
+        var allStates = new Tensor<T>(new[] { batchSize, seqLen + 1, _numHeads, _headDimension, _headDimension });
         T keyScale = NumOps.FromDouble(1.0 / Math.Sqrt(_headDimension));
 
         for (int t = 0; t < seqLen; t++)
diff --git a/src/NeuralNetworks/Layers/SSM/KimiLinearAttentionLayer.cs b/src/NeuralNetworks/Layers/SSM/KimiLinearAttentionLayer.cs
@@ -280,8 +280,8 @@ private Tensor<T> KVGatedRecurrence(
         var output = TensorAllocator.Rent<T>(new[] { batchSize, seqLen, _modelDimension });
 
         // State: [batch, numHeads, headDim, headDim]
-        var state = TensorAllocator.Rent<T>(new[] { batchSize, _numHeads, _headDimension, _headDimension });
-        var allStates = TensorAllocator.Rent<T>(new[] { batchSize, seqLen + 1, _numHeads, _headDimension, _headDimension });
+        var state = new Tensor<T>(new[] { batchSize, _numHeads, _headDimension, _headDimension });
+        var allStates = new Tensor<T>(new[] { batchSize, seqLen + 1, _numHeads, _headDimension, _headDimension });
 
         // Store KV gate values
         _lastKVGate = TensorAllocator.Rent<T>(new[] { batchSize, seqLen, _numHeads });

Original file line number	Diff line number	Diff line change
`@@ -86,7 +86,7 @@ public void Reset()`
`86`	`86`	`/// </summary>`
`87`	`87`	`public void EnsureCapacity(int[] shape, int count)`
`88`	`88`	`{`
`89`		`- var key = new ShapeKey(shape);`
	`89`	`+ var key = new ShapeKey(shape, defensiveCopy: true);`
`90`	`90`	`if (_slabs.TryGetValue(key, out var existing) && existing.Length >= count)`
`91`	`91`	`return;`
`92`	`92`
`@@ -138,9 +138,9 @@ private Tensor<T> GrowAndRent(ShapeKey key, int[] shape, bool clear = true)`
`138`	`138`	`private readonly int _hash;`
`139`	`139`	`private readonly int[] _dims;`
`140`	`140`
`141`		`- public ShapeKey(int[] shape)`
	`141`	`+ public ShapeKey(int[] shape, bool defensiveCopy = false)`
`142`	`142`	`{`
`143`		`- _dims = (int[])shape.Clone();`
	`143`	`+ _dims = defensiveCopy ? (int[])shape.Clone() : shape;`
`144`	`144`	`unchecked`
`145`	`145`	`{`
`146`	`146`	`int hash = (int)2166136261;`