Skip to content

Commit 3d19820

Browse files
ooplesclaude
andcommitted
fix: ShapeKey zero-alloc lookup + revert 3 more state buffer Rent calls
- ShapeKey: only defensive-copy in EnsureCapacity (storage), not in Rent (hot-path lookup). Eliminates per-Rent allocation from Clone(). - GatedDeltaNetLayer, GatedDeltaProductLayer, KimiLinearAttentionLayer: reverted state/allStates buffer allocations back to new Tensor<T>() for guaranteed zero initialization of recurrent state. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent d8673da commit 3d19820

File tree

4 files changed

+9
-9
lines changed

4 files changed

+9
-9
lines changed

src/Memory/ForwardArena.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ public void Reset()
8686
/// </summary>
8787
public void EnsureCapacity(int[] shape, int count)
8888
{
89-
var key = new ShapeKey(shape);
89+
var key = new ShapeKey(shape, defensiveCopy: true);
9090
if (_slabs.TryGetValue(key, out var existing) && existing.Length >= count)
9191
return;
9292

@@ -138,9 +138,9 @@ private Tensor<T> GrowAndRent(ShapeKey key, int[] shape, bool clear = true)
138138
private readonly int _hash;
139139
private readonly int[] _dims;
140140

141-
public ShapeKey(int[] shape)
141+
public ShapeKey(int[] shape, bool defensiveCopy = false)
142142
{
143-
_dims = (int[])shape.Clone();
143+
_dims = defensiveCopy ? (int[])shape.Clone() : shape;
144144
unchecked
145145
{
146146
int hash = (int)2166136261;

src/NeuralNetworks/Layers/SSM/GatedDeltaNetLayer.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -343,8 +343,8 @@ private Tensor<T> DeltaRuleForward(
343343
var output = TensorAllocator.Rent<T>(new[] { batchSize, seqLen, _modelDimension });
344344

345345
// State matrix per head: [batch, numHeads, headDim, headDim]
346-
var state = TensorAllocator.Rent<T>(new[] { batchSize, _numHeads, _headDimension, _headDimension });
347-
var allStates = TensorAllocator.Rent<T>(new[] { batchSize, seqLen + 1, _numHeads, _headDimension, _headDimension });
346+
var state = new Tensor<T>(new[] { batchSize, _numHeads, _headDimension, _headDimension });
347+
var allStates = new Tensor<T>(new[] { batchSize, seqLen + 1, _numHeads, _headDimension, _headDimension });
348348
T keyScale = NumOps.FromDouble(1.0 / Math.Sqrt(_headDimension));
349349

350350
for (int t = 0; t < seqLen; t++)

src/NeuralNetworks/Layers/SSM/GatedDeltaProductLayer.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -434,8 +434,8 @@ private Tensor<T> GatedDeltaProductRecurrence(
434434
Tensor<T> hVecs, int batchSize, int seqLen)
435435
{
436436
var output = TensorAllocator.Rent<T>(new[] { batchSize, seqLen, _modelDimension });
437-
var state = TensorAllocator.Rent<T>(new[] { batchSize, _numHeads, _headDimension, _headDimension });
438-
var allStates = TensorAllocator.Rent<T>(new[] { batchSize, seqLen + 1, _numHeads, _headDimension, _headDimension });
437+
var state = new Tensor<T>(new[] { batchSize, _numHeads, _headDimension, _headDimension });
438+
var allStates = new Tensor<T>(new[] { batchSize, seqLen + 1, _numHeads, _headDimension, _headDimension });
439439
T keyScale = NumOps.FromDouble(1.0 / Math.Sqrt(_headDimension));
440440

441441
for (int t = 0; t < seqLen; t++)

src/NeuralNetworks/Layers/SSM/KimiLinearAttentionLayer.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,8 +280,8 @@ private Tensor<T> KVGatedRecurrence(
280280
var output = TensorAllocator.Rent<T>(new[] { batchSize, seqLen, _modelDimension });
281281

282282
// State: [batch, numHeads, headDim, headDim]
283-
var state = TensorAllocator.Rent<T>(new[] { batchSize, _numHeads, _headDimension, _headDimension });
284-
var allStates = TensorAllocator.Rent<T>(new[] { batchSize, seqLen + 1, _numHeads, _headDimension, _headDimension });
283+
var state = new Tensor<T>(new[] { batchSize, _numHeads, _headDimension, _headDimension });
284+
var allStates = new Tensor<T>(new[] { batchSize, seqLen + 1, _numHeads, _headDimension, _headDimension });
285285

286286
// Store KV gate values
287287
_lastKVGate = TensorAllocator.Rent<T>(new[] { batchSize, seqLen, _numHeads });

0 commit comments

Comments
 (0)